[python-escript] 01/01: Bring up to date with upstream 5.0

Joel Fenwick jfenwick-guest at moszumanska.debian.org
Wed Sep 21 02:17:33 UTC 2016


This is an automated email from the git hooks/post-receive script.

jfenwick-guest pushed a commit to branch debian
in repository python-escript.

commit f4e5403c526c696f19dfad874fbc4b8059979468
Author: Joel Fenwick <joelfenwick at uq.edu.au>
Date:   Mon Sep 19 14:27:30 2016 +1000

    Bring up to date with upstream 5.0
---
 CREDITS.txt => CREDITS                             |    5 +-
 README_LICENSE => LICENSE                          |    0
 README                                             |   11 +
 SConstruct                                         |  477 +-
 debian/changelog                                   |    6 +
 debian/patches/10_use_python35.patch               |   35 -
 debian/patches/11_use_c++03                        |   11 -
 debian/patches/13_cpp_overlord                     |   84 -
 debian/patches/series                              |    3 -
 doc/SConscript                                     |    2 -
 doc/docguide.tex                                   |    3 +-
 doc/doxygen/doxygen_esys                           |    3 +-
 doc/examples/SConscript                            |   47 +-
 doc/examples/cookbook/example01a.py                |    1 +
 doc/examples/cookbook/example03b.py                |    1 +
 doc/examples/cookbook/example08a.py                |    2 +-
 doc/examples/cookbook/example08b.py                |    2 +-
 doc/examples/cookbook/example09a.py                |    2 +-
 doc/examples/cookbook/example10m.py                |   23 +-
 doc/examples/inversion/grav_ermapper.py            |   15 +-
 doc/examples/inversion/grav_netcdf.py              |   21 +-
 doc/examples/inversion/gravmag_netcdf.py           |   19 +-
 doc/examples/inversion/gravmag_nodriver.py         |    3 -
 doc/examples/inversion/mag_netcdf.py               |   20 +-
 doc/examples/inversion/synthetic_HTI.py            |  347 +-
 doc/examples/inversion/synthetic_TTI.py            |  263 +-
 doc/examples/inversion/synthetic_VTI.py            |  293 +-
 doc/examples/inversion/synthetic_sonic.py          |  228 +-
 doc/examples/inversion/synthetic_sonicHTI.py       |  305 +-
 doc/examples/inversion/test_commemi1.py            |  196 +-
 doc/examples/inversion/test_commemi4.py            |  245 +-
 doc/examples/usersguide/voxet_reader.py            |   50 +-
 doc/examples/usersguide/wave.py                    |    1 +
 doc/install/cxx11.tex                              |   14 +
 doc/install/install.tex                            |    2 +-
 doc/install/intro.tex                              |   21 +-
 doc/install/source.tex                             |   97 +-
 doc/user/changes.tex                               |   22 +-
 doc/user/esys.bib                                  |   11 -
 doc/user/linearPDE.tex                             |   11 +-
 doc/user/subworlds.tex                             |    2 +-
 doc/verinfo.tex                                    |    4 +-
 downunder/py_src/coordinates.py                    |   22 +-
 downunder/py_src/forwardmodels/acoustic.py         |    6 +-
 downunder/py_src/magtel2d.py                       |    2 +-
 downunder/py_src/seismic.py                        |    6 +-
 downunder/test/python/SConscript                   |    5 +-
 downunder/test/python/run_comm1.py                 |   35 +-
 downunder/test/python/run_comm4.py                 |   12 +-
 downunder/test/python/run_datasources.py           |    7 +-
 downunder/test/python/run_dcforward.py             |    2 +-
 downunder/test/python/run_forward.py               |   76 +-
 downunder/test/python/run_inversion_gravmag_2d.py  |    4 +-
 {paso/profiling => dudley}/SConscript              |   22 +-
 dudley/py_src/SConscript                           |    8 +-
 dudley/py_src/__init__.py                          |    1 -
 dudley/py_src/factorywrappers.py                   |   73 +-
 dudley/py_src/readers.py                           |    7 +-
 dudley/src/Assemble.h                              |  235 +-
 dudley/src/Assemble_AverageElementData.cpp         |  174 +-
 dudley/src/Assemble_CopyElementData.cpp            |  125 +-
 dudley/src/Assemble_CopyNodalData.cpp              |  505 +-
 dudley/src/Assemble_LumpedSystem.cpp               |  615 +-
 dudley/src/Assemble_NodeCoordinates.cpp            |   74 +-
 dudley/src/Assemble_PDE.cpp                        |  561 +-
 dudley/src/Assemble_PDE_Points.cpp                 |  117 +-
 dudley/src/Assemble_PDE_Single2_1D.cpp             |  356 -
 dudley/src/Assemble_PDE_Single2_2D.cpp             |  393 -
 dudley/src/Assemble_PDE_Single2_3D.cpp             |  428 -
 dudley/src/Assemble_PDE_Single_2D.cpp              |  321 +
 dudley/src/Assemble_PDE_Single_3D.cpp              |  365 +
 dudley/src/Assemble_PDE_System2_1D.cpp             |  423 -
 dudley/src/Assemble_PDE_System2_2D.cpp             |  461 --
 dudley/src/Assemble_PDE_System2_3D.cpp             |  504 --
 dudley/src/Assemble_PDE_System_2D.cpp              |  374 +
 dudley/src/Assemble_PDE_System_3D.cpp              |  427 +
 dudley/src/Assemble_addToSystemMatrix.cpp          |  545 +-
 dudley/src/Assemble_getAssembleParameters.cpp      |  249 +-
 dudley/src/Assemble_getNormal.cpp                  |   83 +
 dudley/src/Assemble_getSize.cpp                    |  157 +-
 dudley/src/Assemble_gradient.cpp                   |  640 +-
 dudley/src/Assemble_integrate.cpp                  |  146 +-
 dudley/src/Assemble_interpolate.cpp                |  188 +-
 dudley/src/Assemble_jacobeans.cpp                  |  363 -
 dudley/src/Assemble_jacobians.cpp                  |  317 +
 dudley/src/Assemble_setNormal.cpp                  |  123 -
 dudley/src/CPPAdapter/DudleyAdapterException.cpp   |   34 -
 dudley/src/CPPAdapter/DudleyAdapterException.h     |  106 -
 dudley/src/CPPAdapter/DudleyError.cpp              |   54 -
 dudley/src/CPPAdapter/DudleyError.h                |   51 -
 dudley/src/CPPAdapter/MeshAdapter.cpp              | 2043 -----
 dudley/src/CPPAdapter/MeshAdapter.h                |  671 --
 dudley/src/CPPAdapter/MeshAdapterFactory.cpp       |  714 --
 dudley/src/CPPAdapter/MeshAdapterFactory.h         |  168 -
 dudley/src/DomainFactory.cpp                       |  473 ++
 dudley/src/DomainFactory.h                         |  125 +
 dudley/src/Dudley.cpp                              |   79 -
 dudley/src/Dudley.h                                |   66 +-
 dudley/src/DudleyDomain.cpp                        | 1733 ++++
 dudley/src/DudleyDomain.h                          |  743 ++
 .../DudleyException.h}                             |   17 +-
 dudley/src/DudleyVersion.h                         |    3 +-
 dudley/src/ElementFile.cpp                         |  338 +-
 dudley/src/ElementFile.h                           |  255 +-
 dudley/src/ElementFile_allocTable.cpp              |  119 -
 dudley/src/ElementFile_copyTable.cpp               |   61 -
 dudley/src/ElementFile_createColoring.cpp          |  146 +-
 dudley/src/ElementFile_distributeByRankOfDOF.cpp   |  383 +-
 dudley/src/ElementFile_gather.cpp                  |   57 -
 dudley/src/ElementFile_jacobeans.cpp               |  162 -
 dudley/src/ElementFile_jacobians.cpp               |   94 +
 dudley/src/ElementFile_markNodes.cpp               |   82 -
 dudley/src/ElementFile_optimizeOrdering.cpp        |   80 -
 dudley/src/ElementFile_relableNodes.cpp            |   50 -
 dudley/src/ElementFile_scatter.cpp                 |   57 -
 dudley/src/ElementFile_setCoordinates.cpp          |   37 -
 dudley/src/ElementFile_setNodeRange.cpp            |   46 -
 dudley/src/ElementFile_setTags.cpp                 |   90 -
 dudley/src/ElementType.cpp                         |   52 -
 dudley/src/ElementType.h                           |   38 +-
 dudley/src/IndexList.cpp                           |  154 +-
 dudley/src/IndexList.h                             |   42 +-
 dudley/src/Mesh.cpp                                |  163 -
 dudley/src/Mesh.h                                  |  166 -
 dudley/src/Mesh_createNodeFileMappings.cpp         |  523 --
 dudley/src/Mesh_distributeByRankOfDOF.cpp          |  153 +-
 dudley/src/Mesh_findMatchingFaces.cpp              |  262 -
 dudley/src/Mesh_getPattern.cpp                     |  175 +-
 dudley/src/Mesh_markNodes.cpp                      |   57 -
 dudley/src/Mesh_optimizeDOFDistribution.cpp        |  381 +-
 dudley/src/Mesh_optimizeDOFLabeling.cpp            |  168 +-
 dudley/src/Mesh_prepare.cpp                        |  164 -
 dudley/src/Mesh_print.cpp                          |  125 -
 dudley/src/Mesh_read.cpp                           |  911 +--
 dudley/src/Mesh_readGmsh.cpp                       |  663 +-
 dudley/src/Mesh_relableElementNodes.cpp            |   39 -
 dudley/src/Mesh_resolveNodeIds.cpp                 |  166 +-
 dudley/src/Mesh_setCoordinates.cpp                 |   34 -
 dudley/src/Mesh_tagmaps.cpp                        |   43 -
 dudley/src/Mesh_tet4.cpp                           | 1134 ++-
 dudley/src/Mesh_tri3.cpp                           |  494 +-
 dudley/src/Mesh_write.cpp                          |  412 +-
 dudley/src/NodeFile.cpp                            |  407 +-
 dudley/src/NodeFile.h                              |  340 +-
 dudley/src/NodeFile_allocTable.cpp                 |  158 -
 dudley/src/NodeFile_copyTable.cpp                  |   57 -
 dudley/src/NodeFile_createDenseLabelings.cpp       |  735 +-
 dudley/src/NodeFile_createMappings.cpp             |  253 +
 dudley/src/NodeFile_createTrilinosGraph.cpp        |   78 +
 dudley/src/NodeFile_gather.cpp                     |  313 +-
 dudley/src/NodeFile_scatter.cpp                    |   66 -
 dudley/src/NodeFile_setCoordinates.cpp             |   62 -
 dudley/src/NodeFile_setIdRange.cpp                 |  227 -
 dudley/src/NodeFile_setTags.cpp                    |   73 -
 dudley/src/NodeMapping.cpp                         |   99 -
 dudley/src/NodeMapping.h                           |  107 +-
 dudley/src/SConscript                              |  228 +-
 dudley/src/ShapeTable.cpp                          |  199 +-
 dudley/src/ShapeTable.h                            |   78 +-
 dudley/src/TagMap.cpp                              |  125 -
 dudley/src/TagMap.h                                |   38 -
 dudley/src/TriangularMesh.h                        |   35 -
 dudley/src/Util.cpp                                |  878 +--
 dudley/src/Util.h                                  |  107 +-
 dudley/src/dudleycpp.cpp                           |  191 +
 ...dapterTestCase.cpp => DudleyDomainTestCase.cpp} |   28 +-
 .../test/DudleyDomainTestCase.h                    |    8 +-
 dudley/test/SConscript                             |   13 +-
 dudley/test/dudley_UnitTests.cpp                   |   11 +-
 dudley/test/python/FCT_benchmark.py                |    0
 dudley/test/python/SConscript                      |   58 +-
 dudley/test/python/axisymm-splitB.py               |    0
 dudley/test/python/blocktest.py                    |    0
 dudley/test/python/data_meshes/brick_8x10x12.fly   |    1 +
 dudley/test/python/data_meshes/rectangle_8x10.fly  |    1 +
 dudley/test/python/data_meshes/tagtest2.fly        |    1 +
 dudley/test/python/data_meshes/tet10.fly           |    1 +
 dudley/test/python/data_meshes/tet4.fly            |    1 +
 dudley/test/python/data_meshes/tet_2D_order1.fly   |    1 +
 dudley/test/python/data_meshes/tet_3D_order1.fly   |    3 +-
 dudley/test/python/data_meshes/tri3.fly            |    1 +
 dudley/test/python/linearElastic.py                |    0
 dudley/test/python/run_escriptOnDudley.py          |   47 +-
 dudley/test/python/run_inputOutput.py              |   26 +-
 dudley/test/python/run_linearPDEsOnDudley1.py      |   75 +-
 dudley/test/python/run_linearPDEsOnDudley2.py      |   72 +-
 dudley/test/python/run_models.py                   |  124 +-
 dudley/test/python/run_nlpde2dOnDudley.py          |   55 -
 dudley/test/python/run_nlpde3dOnDudley.py          |   55 -
 .../test/python/run_nonlinearPDEsOnDudley.py       |   30 +-
 dudley/test/python/run_pasoSolversOnDudley.py      |  316 +
 dudley/test/python/run_simplesolve.py              |  550 --
 dudley/test/python/run_splitworldOnDudley.py       |   16 +-
 dudley/test/python/run_trilinosSolversOnDudley.py  |  344 +
 dudley/test/python/run_utilOnDudley.py             |   36 +-
 escript/py_src/SConscript                          |    8 +-
 escript/py_src/__init__.py                         |    3 +-
 .../SConscript                                     |   17 +-
 escriptcore/py_src/SConscript                      |    6 +-
 escriptcore/py_src/faultsystems.py                 |    4 +-
 escriptcore/py_src/flows.py                        |    2 +-
 escriptcore/py_src/gmshrunner.py                   |    6 +-
 escriptcore/py_src/linearPDEs.py                   |  158 +-
 escriptcore/py_src/nonlinearPDE.py                 |   19 +-
 escriptcore/py_src/pdetools.py                     |   10 +-
 escriptcore/py_src/util.py                         |  372 +-
 escriptcore/src/AbstractContinuousDomain.cpp       |   11 +-
 escriptcore/src/AbstractContinuousDomain.h         |   22 +-
 escriptcore/src/AbstractDomain.cpp                 |    4 -
 escriptcore/src/AbstractDomain.h                   |   18 +-
 escriptcore/src/AbstractReducer.cpp                |   15 +-
 escriptcore/src/AbstractReducer.h                  |   94 +-
 escriptcore/src/AbstractSystemMatrix.cpp           |   33 +-
 escriptcore/src/AbstractSystemMatrix.h             |   27 +-
 escriptcore/src/AbstractTransportProblem.cpp       |  124 +-
 escriptcore/src/AbstractTransportProblem.h         |   32 +-
 escriptcore/src/ArrayOps.cpp                       |   70 +
 escriptcore/src/ArrayOps.h                         |  892 +++
 escriptcore/src/Assert.h                           |   83 +
 escriptcore/src/BinaryDataReadyOps.cpp             |  873 ++
 escriptcore/src/BinaryDataReadyOps.h               |   59 +
 escriptcore/src/BinaryOp.h                         |  206 -
 escriptcore/src/Data.cpp                           | 3185 ++++++--
 escriptcore/src/Data.h                             | 1920 ++---
 escriptcore/src/DataAbstract.cpp                   |  132 +-
 escriptcore/src/DataAbstract.h                     |  207 +-
 escriptcore/src/DataAlgorithm.h                    |  339 -
 escriptcore/src/DataBlocks2D.cpp                   |   92 -
 escriptcore/src/DataBlocks2D.h                     |  337 -
 escriptcore/src/DataC.cpp                          |  174 -
 escriptcore/src/DataC.h                            |  172 -
 escriptcore/src/DataConstant.cpp                   |  443 +-
 escriptcore/src/DataConstant.h                     |  104 +-
 escriptcore/src/DataEmpty.cpp                      |   68 +-
 escriptcore/src/DataEmpty.h                        |   50 +-
 escriptcore/src/DataException.cpp                  |   30 -
 escriptcore/src/DataException.h                    |   86 +-
 escriptcore/src/DataExpanded.cpp                   |  894 ++-
 escriptcore/src/DataExpanded.h                     |  130 +-
 escriptcore/src/DataFactory.cpp                    |  649 +-
 escriptcore/src/DataLazy.cpp                       | 1757 ++---
 escriptcore/src/DataLazy.h                         |  100 +-
 escriptcore/src/DataMaths.cpp                      |  340 -
 escriptcore/src/DataMaths.h                        |  952 ---
 escriptcore/src/DataReady.cpp                      |   14 +-
 escriptcore/src/DataReady.h                        |  129 +-
 escriptcore/src/DataTagged.cpp                     |  966 ++-
 escriptcore/src/DataTagged.h                       |  226 +-
 escriptcore/src/DataTypes.cpp                      |  464 +-
 escriptcore/src/DataTypes.h                        |  240 +-
 escriptcore/src/DataVector.cpp                     |  623 +-
 escriptcore/src/DataVector.h                       |  566 +-
 escriptcore/src/DataVectorAlt.cpp                  |  113 +
 escriptcore/src/DataVectorAlt.h                    |  485 ++
 escriptcore/src/DataVectorOps.cpp                  |  999 +++
 escriptcore/src/DataVectorOps.h                    | 1452 ++++
 .../src/{DataVector.cpp => DataVectorTaipan.cpp}   |   82 +-
 .../src/{DataVector.h => DataVectorTaipan.h}       |  116 +-
 escriptcore/src/Distribution.h                     |   79 +
 escriptcore/src/DomainException.cpp                |   32 -
 escriptcore/src/DomainException.h                  |   84 +-
 escriptcore/src/ES_optype.cpp                      |   80 +
 escriptcore/src/ES_optype.h                        |  108 +
 escriptcore/src/EscriptParams.cpp                  |  308 +-
 escriptcore/src/EscriptParams.h                    |  139 +-
 escriptcore/src/EsysException.h                    |   97 +
 escriptcore/src/EsysMPI.cpp                        |  242 +
 escriptcore/src/EsysMPI.h                          |  202 +
 .../src/ExceptionTranslators.cpp                   |   36 +-
 escriptcore/src/ExceptionTranslators.h             |   66 +
 .../src/FileWriter.h                               |   20 +-
 escriptcore/src/FunctionSpace.cpp                  |   42 +-
 escriptcore/src/FunctionSpace.h                    |   16 +-
 escriptcore/src/FunctionSpaceException.cpp         |   34 -
 escriptcore/src/FunctionSpaceException.h           |   83 +-
 escriptcore/src/FunctionSpaceFactory.cpp           |    4 -
 escriptcore/src/FunctionSpaceFactory.h             |    9 +-
 {esysUtils => escriptcore}/src/IndexList.h         |   47 +-
 escriptcore/src/LapackInverseHelper.cpp            |   12 +-
 escriptcore/src/LocalOps.h                         |  563 --
 escriptcore/src/MPIDataReducer.cpp                 |  546 +-
 escriptcore/src/MPIDataReducer.h                   |   27 +-
 escriptcore/src/MPIScalarReducer.cpp               |  189 +-
 escriptcore/src/MPIScalarReducer.h                 |   38 +-
 escriptcore/src/NonReducedVariable.cpp             |   18 +-
 escriptcore/src/NonReducedVariable.h               |   37 +-
 escriptcore/src/NullDomain.cpp                     |   22 +-
 escriptcore/src/NullDomain.h                       |   12 +-
 escriptcore/src/Pointers.h                         |    9 +-
 escriptcore/src/Random.cpp                         |  244 +
 .../src/EsysRandom.h => escriptcore/src/Random.h   |   20 +-
 escriptcore/src/SConscript                         |  125 +-
 escriptcore/src/SolverOptions.cpp                  |  204 +-
 escriptcore/src/SolverOptions.h                    |   82 +-
 escriptcore/src/SolverOptionsException.cpp         |   28 -
 escriptcore/src/SolverOptionsException.h           |   91 -
 escriptcore/src/SplitWorld.cpp                     |   51 +-
 escriptcore/src/SplitWorld.h                       |   15 +-
 escriptcore/src/SplitWorldException.cpp            |   34 -
 escriptcore/src/SplitWorldException.h              |   84 +-
 escriptcore/src/SubWorld.cpp                       | 1272 ++-
 escriptcore/src/SubWorld.h                         |   80 +-
 escriptcore/src/SystemMatrixException.cpp          |   32 -
 escriptcore/src/SystemMatrixException.h            |   73 +-
 escriptcore/src/TestDomain.cpp                     |  131 +-
 escriptcore/src/TestDomain.h                       |   33 +-
 escriptcore/src/TransportProblemException.cpp      |   34 -
 escriptcore/src/TransportProblemException.h        |   80 +-
 escriptcore/src/UnaryFuncs.h                       |  167 -
 escriptcore/src/UnaryOp.h                          |   87 -
 escriptcore/src/UtilC.h                            |   25 -
 escriptcore/src/Utils.cpp                          |   83 +-
 escriptcore/src/WrappedArray.cpp                   |  296 +-
 escriptcore/src/WrappedArray.h                     |  142 +-
 escriptcore/src/escriptcpp.cpp                     |  140 +-
 .../system_dep.h => escriptcore/src/index.h        |   31 +-
 escriptcore/src/pyerr.cpp                          |   75 +
 {esysUtils => escriptcore}/src/pyerr.h             |   19 +-
 escriptcore/src/system_dep.h                       |   45 +-
 escriptcore/test/DataAlgorithmAdapterTestCase.cpp  |  273 -
 escriptcore/test/DataBlocks2DTestCase.cpp          |  215 -
 escriptcore/test/DataCombinationsTestCase.cpp      |  876 ++
 ...dapterTestCase.h => DataCombinationsTestCase.h} |   18 +-
 escriptcore/test/DataConstantTestCase.cpp          |   15 +-
 escriptcore/test/DataEmptyTestCase.cpp             |   10 +-
 escriptcore/test/DataExpandedTestCase.cpp          |   40 +-
 escriptcore/test/DataFactoryTestCase.cpp           |    7 +-
 escriptcore/test/DataLazyTestCase.cpp              |   28 +-
 escriptcore/test/DataMathsTestCase.cpp             |  461 +-
 escriptcore/test/DataTaggedTestCase.cpp            | 1826 +----
 escriptcore/test/DataTestCase.cpp                  |  518 +-
 escriptcore/test/DataTestCase.h                    |    2 +-
 escriptcore/test/DataTypesTestCase.cpp             |  112 +-
 escriptcore/test/DataVectorTestCase.cpp            |   42 +-
 escriptcore/test/EsysExceptionTestCase.cpp         |  165 +
 .../test/EsysExceptionTestCase.h                   |    1 -
 .../test/FileWriterTestCase.cpp                    |   36 +-
 .../test/FileWriterTestCase.h                      |    6 +-
 escriptcore/test/FunctionSpaceTestCase.cpp         |   17 +-
 escriptcore/test/SConscript                        |   12 +-
 escriptcore/test/SharedDataTestCase.cpp            |   50 +-
 escriptcore/test/TaipanTestCase.cpp                |    8 +-
 escriptcore/test/escript_UnitTest.cpp              |   36 +-
 escriptcore/test/multi_arrayTestCase.cpp           |    4 +-
 escriptcore/test/python/SConscript                 |    8 +-
 escriptcore/test/python/run_symbolic.py            |   13 +-
 escriptcore/test/python/run_testdomain.py          |    1 +
 escriptcore/test/python/test_linearPDEs.py         |  563 +-
 escriptcore/test/python/test_nonLinearPDE.py       |   11 +-
 escriptcore/test/python/test_objects.py            |   23 +-
 escriptcore/test/python/test_pdetools.py           |  141 +-
 escriptcore/test/python/test_simplesolve.py        |  169 +
 escriptcore/test/python/test_splitworld.py         |   34 +-
 escriptcore/test/python/test_symfuncs.py           |    1 +
 escriptcore/test/python/test_util_NaN_funcs.py     |   21 +
 .../test/python/test_util_binary_no_tagged_data.py | 1088 ++-
 .../python/test_util_binary_with_tagged_data.py    |  810 ++
 .../python/test_util_slicing_no_tagged_data.py     |   34 +
 .../test/python/test_util_spatial_functions1.py    |  594 +-
 .../test/python/test_util_spatial_functions2.py    |    1 +
 .../test/python/test_util_spatial_functions3.py    |    2 +-
 .../test/python/test_util_unary_no_tagged_data.py  |   72 +-
 .../python/test_util_unary_with_tagged_data.py     |   31 +-
 esysUtils/src/EsysAssert.h                         |  100 -
 esysUtils/src/EsysAssertException.cpp              |   52 -
 esysUtils/src/EsysAssertException.h                |  118 -
 esysUtils/src/EsysException.cpp                    |   77 -
 esysUtils/src/EsysException.h                      |  209 -
 esysUtils/src/EsysRandom.cpp                       |  241 -
 esysUtils/src/Esys_MPI.cpp                         |  387 -
 esysUtils/src/Esys_MPI.h                           |  166 -
 esysUtils/src/SConscript                           |   72 -
 esysUtils/src/blocktimer.cpp                       |  189 -
 esysUtils/src/blocktimer.h                         |   44 -
 esysUtils/src/error.cpp                            |   93 -
 esysUtils/src/error.h                              |   94 -
 esysUtils/src/esysExceptionTranslator.h            |   44 -
 esysUtils/src/first.h                              |   26 -
 esysUtils/src/index.h                              |   63 -
 esysUtils/src/maths.h                              |   30 -
 esysUtils/src/mem.h                                |  100 -
 esysUtils/src/pyerr.cpp                            |   78 -
 esysUtils/src/system_dep.h                         |   58 -
 esysUtils/src/types.h                              |   32 -
 esysUtils/test/EsysExceptionTestCase.cpp           |  296 -
 esysUtils/test/SConscript                          |   40 -
 esysUtils/test/esysUtils_UnitTest.cpp              |   53 -
 {pasowrap/py_src => finley}/SConscript             |   24 +-
 finley/py_src/SConscript                           |    4 +-
 finley/py_src/__init__.py                          |    2 -
 finley/src/Assemble.h                              |   74 +-
 finley/src/Assemble_AverageElementData.cpp         |   15 +-
 finley/src/Assemble_CopyElementData.cpp            |   13 +-
 finley/src/Assemble_CopyNodalData.cpp              |  309 +-
 finley/src/Assemble_LumpedSystem.cpp               |  262 +-
 finley/src/Assemble_NodeCoordinates.cpp            |   26 +-
 finley/src/Assemble_PDE.cpp                        |  145 +-
 finley/src/Assemble_PDE_Points.cpp                 |   47 +-
 finley/src/Assemble_PDE_Single_1D.cpp              |   87 +-
 finley/src/Assemble_PDE_Single_2D.cpp              |  278 +-
 finley/src/Assemble_PDE_Single_3D.cpp              |  334 +-
 finley/src/Assemble_PDE_Single_C.cpp               |  126 +-
 finley/src/Assemble_PDE_System_1D.cpp              |   32 +-
 finley/src/Assemble_PDE_System_2D.cpp              |  344 +-
 finley/src/Assemble_PDE_System_3D.cpp              |  402 +-
 finley/src/Assemble_PDE_System_C.cpp               |  132 +-
 finley/src/Assemble_addToSystemMatrix.cpp          |  379 +-
 finley/src/Assemble_getAssembleParameters.cpp      |  221 +-
 finley/src/Assemble_getNormal.cpp                  |   75 +-
 finley/src/Assemble_getSize.cpp                    |   68 +-
 finley/src/Assemble_gradient.cpp                   |  172 +-
 finley/src/Assemble_integrate.cpp                  |   79 +-
 finley/src/Assemble_interpolate.cpp                |   99 +-
 finley/src/Assemble_jacobians.cpp                  |  105 +-
 finley/src/CPPAdapter/FinleyAdapterException.cpp   |   34 -
 finley/src/CPPAdapter/FinleyAdapterException.h     |  106 -
 finley/src/CPPAdapter/MeshAdapter.cpp              | 2277 ------
 finley/src/CPPAdapter/MeshAdapter.h                |  668 --
 finley/src/CPPAdapter/MeshAdapterFactory.cpp       |  986 ---
 finley/src/CPPAdapter/MeshAdapterFactory.h         |  233 -
 finley/src/CPPAdapter/finleycpp.cpp                |  311 -
 finley/src/CPPAdapter/system_dep.h                 |   46 -
 finley/src/DomainFactory.cpp                       |  881 +++
 finley/src/DomainFactory.h                         |  162 +
 finley/src/ElementFile.cpp                         |  282 +-
 finley/src/ElementFile.h                           |   77 +-
 finley/src/ElementFile_jacobians.cpp               |   42 +-
 finley/src/Finley.cpp                              |   84 -
 finley/src/Finley.h                                |   42 +-
 finley/src/FinleyDomain.cpp                        | 2460 ++++++
 finley/src/FinleyDomain.h                          |  897 +++
 .../src/FinleyException.h                          |   19 +-
 finley/src/FinleyVersion.h                         |    2 +-
 finley/src/IndexList.cpp                           |   38 +-
 finley/src/IndexList.h                             |    4 +-
 finley/src/Mesh.cpp                                |  638 --
 finley/src/Mesh.h                                  |  189 -
 finley/src/Mesh_addPoints.cpp                      |  209 +-
 finley/src/Mesh_findMatchingFaces.cpp              |  225 +-
 finley/src/Mesh_getPasoPattern.cpp                 |  137 +
 finley/src/Mesh_getPattern.cpp                     |  153 -
 finley/src/Mesh_getTrilinosGraph.cpp               |  103 +
 finley/src/Mesh_glueFaces.cpp                      |  147 +-
 finley/src/Mesh_hex20.cpp                          |  177 +-
 finley/src/Mesh_hex8.cpp                           |  402 +-
 finley/src/Mesh_joinFaces.cpp                      |  153 +-
 finley/src/Mesh_merge.cpp                          |  252 +-
 finley/src/Mesh_optimizeDOFDistribution.cpp        |  246 +-
 finley/src/Mesh_read.cpp                           |  944 +--
 finley/src/Mesh_readGmsh.cpp                       | 1331 ++--
 finley/src/Mesh_rec4.cpp                           |  325 +-
 finley/src/Mesh_rec8.cpp                           |  399 +-
 finley/src/Mesh_write.cpp                          |  124 +-
 finley/src/NodeFile.cpp                            |  930 ++-
 finley/src/NodeFile.h                              |  177 +-
 finley/src/NodeMapping.h                           |    9 +-
 finley/src/Quadrature.cpp                          |  121 +-
 finley/src/RectangularMesh.h                       |   56 -
 finley/src/ReferenceElementSets.h                  |   18 +-
 finley/src/ReferenceElements.cpp                   |   18 +-
 finley/src/SConscript                              |   86 +-
 finley/src/ShapeFunctions.cpp                      |   10 +-
 finley/src/Util.cpp                                |  252 +-
 finley/src/Util.h                                  |   45 +-
 .../dudleycpp.cpp => finley/src/finleycpp.cpp      |  193 +-
 ...dapterTestCase.cpp => FinleyDomainTestCase.cpp} |   28 +-
 ...eshAdapterTestCase.h => FinleyDomainTestCase.h} |    8 +-
 finley/test/SConscript                             |   11 +-
 finley/test/finley_UnitTests.cpp                   |   10 +-
 finley/test/python/SConscript                      |   45 +-
 finley/test/python/data_meshes/brick_4x4x4.fly     |    1 +
 finley/test/python/data_meshes/brick_8x10x12.fly   |    1 +
 finley/test/python/data_meshes/hex_2D_macro.msh    |    1 +
 finley/test/python/data_meshes/hex_2D_order1.msh   |    1 +
 .../python/data_meshes/hex_2D_order1_macro.msh     |    1 +
 .../python/data_meshes/hex_2D_order1_onFace.msh    |    1 +
 finley/test/python/data_meshes/hex_2D_order2.msh   |    1 +
 .../python/data_meshes/hex_2D_order2_onFace.msh    |    1 +
 finley/test/python/data_meshes/hex_2D_order2p.msh  |    1 +
 finley/test/python/data_meshes/hex_3D_macro.msh    |    1 +
 finley/test/python/data_meshes/hex_3D_order1.msh   |    1 +
 .../python/data_meshes/hex_3D_order1_macro.msh     |    1 +
 .../python/data_meshes/hex_3D_order1_onFace.msh    |    1 +
 finley/test/python/data_meshes/hex_3D_order2.msh   |    1 +
 .../python/data_meshes/hex_3D_order2_onFace.msh    |    1 +
 finley/test/python/data_meshes/hex_3D_order2p.msh  |    1 +
 .../python/data_meshes/hex_contact_2D_order1.msh   |    1 +
 .../data_meshes/hex_contact_2D_order1_onFace.msh   |    1 +
 .../python/data_meshes/hex_contact_2D_order2.msh   |    1 +
 .../data_meshes/hex_contact_2D_order2_onFace.msh   |    1 +
 .../python/data_meshes/hex_contact_3D_order1.msh   |    1 +
 .../data_meshes/hex_contact_3D_order1_onFace.msh   |    1 +
 .../python/data_meshes/hex_contact_3D_order2.msh   |    1 +
 .../data_meshes/hex_contact_3D_order2_onFace.msh   |    1 +
 .../test/python/data_meshes/mesh_2Do1_Contact.fly  |    1 +
 .../mesh_2Do1_Contact_withElementsOnFace.fly       |    1 +
 .../test/python/data_meshes/mesh_2Do2_Contact.fly  |    1 +
 .../mesh_2Do2_Contact_withElementsOnFace.fly       |    1 +
 .../test/python/data_meshes/mesh_3Do1_Contact.fly  |    1 +
 .../mesh_3Do1_Contact_withElementsOnFace.fly       |    1 +
 .../test/python/data_meshes/mesh_3Do2_Contact.fly  |    1 +
 .../mesh_3Do2_Contact_withElementsOnFace.fly       |    1 +
 finley/test/python/data_meshes/rect_4x4.fly        |    1 +
 finley/test/python/data_meshes/rect_test.msh       |    1 +
 finley/test/python/data_meshes/rectangle_8x10.fly  |    1 +
 finley/test/python/data_meshes/tagtest.msh         |    1 +
 finley/test/python/data_meshes/test_Add.msh        |    1 +
 finley/test/python/data_meshes/tet10.fly           |    1 +
 finley/test/python/data_meshes/tet10_gmsh.msh      |    1 +
 finley/test/python/data_meshes/tet10_macro.fly     |    1 +
 finley/test/python/data_meshes/tet4.fly            |    1 +
 finley/test/python/data_meshes/tet4_gmsh.msh       |    1 +
 finley/test/python/data_meshes/tet_2D_macro.fly    |    1 +
 finley/test/python/data_meshes/tet_2D_order1.fly   |    1 +
 finley/test/python/data_meshes/tet_2D_order2.fly   |    1 +
 finley/test/python/data_meshes/tet_3D_macro.fly    |    1 +
 finley/test/python/data_meshes/tet_3D_order1.fly   |    1 +
 finley/test/python/data_meshes/tet_3D_order2.fly   |    1 +
 finley/test/python/data_meshes/tri3.fly            |    1 +
 finley/test/python/data_meshes/tri3_gmsh.msh       |    1 +
 finley/test/python/data_meshes/tri6.fly            |    1 +
 finley/test/python/data_meshes/tri6_gmsh.msh       |    1 +
 finley/test/python/data_meshes/tri6_macro.fly      |    1 +
 finley/test/python/run_amg.py                      |  134 +-
 finley/test/python/run_darcy.py                    |   16 +-
 finley/test/python/run_escriptOnFinley.py          |  246 +-
 finley/test/python/run_generators.py               |   40 +-
 finley/test/python/run_inputOutput.py              |    7 +-
 finley/test/python/run_linearPDEsOnFinley1_2D1.py  |   47 +-
 finley/test/python/run_linearPDEsOnFinley1_2D2.py  |   47 +-
 finley/test/python/run_linearPDEsOnFinley1_3D1.py  |   46 +-
 .../python/run_linearPDEsOnFinley1_3D2_part1.py    |   20 +-
 .../python/run_linearPDEsOnFinley1_3D2_part2.py    |   20 +-
 .../python/run_linearPDEsOnFinley1_3D2_part3-1.py  |   12 +-
 .../python/run_linearPDEsOnFinley1_3D2_part3-2.py  |   13 +-
 .../python/run_linearPDEsOnFinley1_3D2_part3-3.py  |   12 +-
 .../python/run_linearPDEsOnFinley1_3D2_part3-4.py  |   13 +-
 .../python/run_linearPDEsOnFinley1_3D2_part4.py    |   60 -
 finley/test/python/run_linearPDEsOnFinley2.py      |  134 +-
 finley/test/python/run_linearPDEsOnFinley3.py      |   91 +-
 finley/test/python/run_linearPDEsOnFinleyMacro.py  |  129 +-
 finley/test/python/run_models.py                   |  132 +-
 ...e3dOnFinley.py => run_nonlinearPDEsOnFinley.py} |   16 +-
 finley/test/python/run_pasoSolversOnFinley.py      |  638 ++
 finley/test/python/run_simplesolve.py              | 2501 ------
 finley/test/python/run_splitworldOnFinley.py       |    4 +-
 finley/test/python/run_trilinosSolversOnFinley.py  |  690 ++
 finley/test/python/run_utilOnFinley.py             |   61 +-
 finley/test/python/run_visualization_interface.py  |  638 --
 finley/test/python/runcoalgas.py                   |  126 +-
 modellib/test/python/SConscript                    |    5 +-
 modellib/test/python/run_convection.py             |   29 +-
 modellib/test/python/run_domainreaders.py          |   21 +-
 modellib/test/python/run_flow.py                   |    7 +-
 .../jessie_mpi_options.py => paso/SConscript       |    7 +-
 paso/profiling/Paso_tests.cpp                      |  156 -
 paso/profiling/Paso_tests.h                        |   39 -
 paso/profiling/Test.cpp                            |  235 -
 paso/src/AMG.cpp                                   |  301 +-
 paso/src/AMG_Interpolation.cpp                     |  836 +-
 paso/src/AMG_Prolongation.cpp                      |  573 +-
 paso/src/AMG_Restriction.cpp                       |  152 +-
 paso/src/AMG_Root.cpp                              |  100 +-
 paso/src/AML.cpp.old                               |  918 ---
 paso/src/BOOMERAMG.cpp                             |   12 +-
 paso/src/BOOMERAMG.h                               |    4 +-
 paso/src/BiCGStab.cpp                              |   19 +-
 paso/src/BlockOps.h                                |   26 +-
 paso/src/Coupler.cpp                               |   60 +-
 paso/src/Coupler.h                                 |   42 +-
 paso/src/Distribution.cpp                          |   37 -
 paso/src/Distribution.h                            |  135 -
 paso/src/FCT_Solver.cpp                            |  102 +-
 paso/src/FCT_Solver.h                              |   10 +-
 paso/src/FluxLimiter.cpp                           |    9 +-
 paso/src/FluxLimiter.h                             |    2 +-
 paso/src/Functions.cpp                             |   14 +-
 paso/src/Functions.h                               |   13 +-
 paso/src/GMRES.cpp                                 |   23 +-
 paso/src/GMRES2.cpp                                |   21 +-
 paso/src/GSMPI.cpp.old                             |  605 --
 paso/src/ILU.cpp                                   |  692 +-
 paso/src/LocalAMG.cpp                              |  633 +-
 paso/src/LocalAMG_Prolongation.cpp                 |   66 +-
 paso/src/MINRES.cpp                                |   39 +-
 paso/src/MKL.cpp                                   |   50 +-
 paso/src/MKL.h                                     |    2 +-
 paso/src/MergedSolver.cpp                          |   11 +-
 paso/src/MergedSolver.h                            |    3 +-
 paso/src/NewtonGMRES.cpp                           |   29 +-
 paso/src/Options.cpp                               |  213 +-
 paso/src/Options.h                                 |   20 +-
 paso/src/PCG.cpp                                   |   23 +-
 paso/src/Paso.h                                    |   38 +-
 .../src/PasoException.h                            |   32 +-
 paso/src/PasoUtil.cpp                              |  227 +-
 paso/src/PasoUtil.h                                |   10 +-
 paso/src/Pattern.cpp                               |   24 +-
 paso/src/Pattern.h                                 |    6 +-
 paso/src/Pattern_mis.cpp                           |    4 +-
 paso/src/Pattern_reduceBandwidth.cpp               |   47 +-
 paso/src/Preconditioner.cpp                        |   15 +-
 paso/src/Preconditioner.h                          |   19 +-
 paso/src/RILU.cpp                                  |  325 +-
 paso/src/ReactiveSolver.cpp                        |   22 +-
 paso/src/ReactiveSolver.h                          |   12 +-
 paso/src/SConscript                                |   41 +-
 paso/src/SchurComplement.cpp                       |   23 +-
 paso/src/SharedComponents.h                        |   61 +-
 paso/src/Smoother.cpp                              |   70 +-
 paso/src/Solver.cpp                                |  452 +-
 paso/src/Solver.h                                  |   52 +-
 paso/src/Solver_Function.cpp                       |    6 +-
 paso/src/Solver_applyBlockDiagonalMatrix.cpp.old   |   85 -
 paso/src/SparseMatrix.cpp                          |  198 +-
 paso/src/SparseMatrix_MatrixMatrix.cpp             |   55 +-
 paso/src/SparseMatrix_MatrixMatrixTranspose.cpp    |   52 +-
 paso/src/SparseMatrix_MatrixVector.cpp             |    4 +
 paso/src/SparseMatrix_getSubmatrix.cpp             |   44 +-
 paso/src/SparseMatrix_nullifyRowsAndCols.cpp       |    1 -
 paso/src/SparseMatrix_saveHB.cpp                   |    5 +-
 paso/src/SystemMatrix.cpp                          |  300 +-
 paso/src/SystemMatrix.h                            |  113 +-
 paso/src/SystemMatrixPattern.cpp                   |   76 +-
 paso/src/SystemMatrixPattern.h                     |   19 +-
 paso/src/SystemMatrixPattern_unrollBlocks.cpp      |   25 +-
 paso/src/SystemMatrix_MatrixVector.cpp             |   62 +-
 paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp    |   78 +-
 paso/src/SystemMatrix_debug.cpp                    |   12 +-
 paso/src/SystemMatrix_extendedRows.cpp             |   40 +-
 paso/src/SystemMatrix_loadMM.cpp                   |  114 +-
 paso/src/SystemMatrix_mergeMainAndCouple.cpp       |   29 +-
 paso/src/TFQMR.cpp                                 |   24 +-
 paso/src/Transport.cpp                             |  141 +-
 paso/src/Transport.h                               |   51 +-
 paso/src/Transport_solve.cpp                       |  218 +-
 paso/src/UMFPACK.cpp                               |   48 +-
 paso/src/UMFPACK.h                                 |    2 +-
 paso/src/performance.cpp                           |   70 +-
 paso/src/performance.h                             |    4 +-
 paso/src/solve.cpp                                 |  134 +-
 pasowrap/py_src/__init__.py                        |   28 -
 pasowrap/py_src/pasowrap.py                        |   27 -
 pasowrap/src/PasoException.cpp                     |   48 -
 pasowrap/src/PasoException.h                       |  111 -
 pasowrap/src/SConscript                            |   83 -
 pasowrap/src/SystemMatrixAdapter.cpp               |  392 -
 pasowrap/src/SystemMatrixAdapter.h                 |  174 -
 pasowrap/src/TransportProblemAdapter.cpp           |  172 -
 pasowrap/src/TransportProblemAdapter.h             |  136 -
 pasowrap/src/pasowrapcpp.cpp                       |   66 -
 pasowrap/src/system_dep.h                          |   46 -
 pycad/test/python/SConscript                       |    5 +-
 pythonMPI/src/SConscript                           |   13 +-
 pythonMPI/src/ScriptMPI.cpp                        |    8 +-
 pythonMPI/src/ScriptMPIredirect.cpp                |    6 +-
 {pasowrap/py_src => ripley}/SConscript             |   24 +-
 ripley/py_src/SConscript                           |    6 +-
 ripley/src/Brick.cpp                               |  392 +-
 ripley/src/Brick.h                                 |   67 +-
 ripley/src/DefaultAssembler2D.cpp                  | 1616 ++--
 ripley/src/DefaultAssembler2D.h                    |   10 +-
 ripley/src/DefaultAssembler3D.cpp                  | 8335 ++++++++++----------
 ripley/src/DefaultAssembler3D.h                    |   10 +-
 ripley/src/LameAssembler2D.cpp                     |    5 +-
 ripley/src/LameAssembler2D.h                       |    4 +-
 ripley/src/LameAssembler3D.cpp                     |    5 +-
 ripley/src/LameAssembler3D.h                       |    4 +-
 ripley/src/MultiBrick.cpp                          |  227 +-
 ripley/src/MultiBrick.h                            |    9 +-
 ripley/src/MultiRectangle.cpp                      |  423 +-
 ripley/src/MultiRectangle.h                        |    8 +-
 ripley/src/Rectangle.cpp                           |  345 +-
 ripley/src/Rectangle.h                             |   41 +-
 ripley/src/Ripley.h                                |   11 +-
 ripley/src/RipleyDomain.cpp                        |  530 +-
 ripley/src/RipleyDomain.h                          |  102 +-
 ripley/src/RipleyException.cpp                     |   29 -
 ripley/src/RipleyException.h                       |   59 +-
 ripley/src/RipleySystemMatrix.cu                   |   11 +-
 ripley/src/RipleySystemMatrix.h                    |    6 +-
 ripley/src/SConscript                              |   84 +-
 ripley/src/WaveAssembler2D.cpp                     |   18 +-
 ripley/src/WaveAssembler2D.h                       |   16 +-
 ripley/src/WaveAssembler3D.cpp                     |   18 +-
 ripley/src/WaveAssembler3D.h                       |   16 +-
 ripley/src/blocktools.cpp                          |    5 +-
 ripley/src/blocktools.h                            |   11 +-
 ripley/src/blocktools2.cpp                         |    7 +-
 ripley/src/domainhelpers.cpp                       |    8 +-
 ripley/src/domainhelpers.h                         |    5 +-
 ripley/src/generate_assamblage.py                  |   56 +-
 ripley/src/ripleycpp.cpp                           |   38 +-
 ripley/src/system_dep.h                            |    3 +-
 ripley/test/SConscript                             |   16 +-
 ripley/test/SystemMatrixTestCase.cpp               |    7 +-
 ripley/test/SystemMatrixTestCase.h                 |    2 +-
 ripley/test/python/SConscript                      |   27 +-
 ripley/test/python/run_customAssemblersOnRipley.py |    8 +-
 ripley/test/python/run_escriptOnMultiResolution.py |   10 +-
 ripley/test/python/run_escriptOnRipley.py          |   12 +-
 ripley/test/python/run_linearPDEsOnRipley.py       |  206 +-
 ripley/test/python/run_pasoSolversOnMultiRes.py    |  156 +
 ripley/test/python/run_pasoSolversOnRipley.py      |  145 +
 ripley/test/python/run_readWriteOnMultiRes.py      |   11 +-
 ripley/test/python/run_readWriteOnRipley.py        |   12 +-
 ripley/test/python/run_simplesolveOnMultiRes.py    |  404 -
 ripley/test/python/run_simplesolveOnRipley.py      |  392 -
 ripley/test/python/run_splitworldOnRipley.py       |    4 +-
 .../test/python/run_trilinosSolversOnMultiRes.py   |  252 +
 ripley/test/python/run_trilinosSolversOnRipley.py  |  247 +
 ripley/test/ripley_UnitTest.cpp                    |    4 +-
 run-escript.in                                     |   10 +-
 scons/__init__.py                                  |    0
 scons/templates/README_FIRST                       |  320 +
 scons/templates/centos7_0_options.py               |  287 +-
 scons/templates/fedora21_5_options.py              |  287 +-
 scons/templates/freebsd10_0_options.py             |  280 +-
 scons/templates/homebrew_10.10_options.py          |  248 +-
 scons/templates/homebrew_11_options.py             |  253 +-
 scons/templates/homebrew_options.py                |  247 +-
 scons/templates/jessie_mpi_options.py              |    1 +
 scons/templates/jessie_options.py                  |  283 +-
 scons/templates/jessie_py3_mpi_options.py          |   13 +-
 scons/templates/jessie_py3_options.py              |   11 +-
 scons/templates/macports_10.10_options.py          |  283 +-
 scons/templates/macports_options.py                |  283 +-
 scons/templates/opensuse13_2_options.py            |  289 +-
 scons/templates/sid_mpi_options.py                 |    8 +-
 scons/templates/sid_options.py                     |  326 +-
 scons/templates/sid_py3_mpi_options.py             |   46 +-
 scons/templates/sid_py3_options.py                 |   45 +-
 scons/templates/trusty_options.py                  |  279 +-
 scons/templates/utopic_options.py                  |  279 +-
 scons/templates/vivid_mpi_options.py               |    6 +-
 scons/templates/vivid_options.py                   |  282 +-
 scons/templates/vivid_py3_mpi_options.py           |   15 +-
 scons/templates/vivid_py3_options.py               |   13 +-
 scons/templates/wheezy_mpi_options.py              |    6 +-
 scons/templates/wheezy_options.py                  |  281 +-
 scons/templates/wheezy_py3_mpi_options.py          |   11 +-
 scons/templates/wheezy_py3_options.py              |   11 +-
 scons/templates/windows_options.py                 |    4 +-
 scripts/extracttests.sh                            |   54 +-
 scripts/makesrc.sh                                 |    3 +-
 site_scons/dependencies.py                         |  290 +-
 site_scons/extractdebbuild.py                      |    4 +-
 site_scons/grouptest.py                            |  141 +-
 site_scons/site_init.py                            |   58 +-
 {pasowrap/py_src => speckley}/SConscript           |   22 +-
 speckley/py_src/SConscript                         |    6 +-
 speckley/src/AbstractAssembler.cpp                 |    4 -
 speckley/src/AbstractAssembler.h                   |    4 +-
 speckley/src/Brick.cpp                             |   66 +-
 speckley/src/Brick.h                               |   13 +-
 speckley/src/BrickGradients.cpp                    |    9 +-
 speckley/src/BrickIntegrals.cpp                    |   10 +-
 speckley/src/BrickReductions.cpp                   |    6 +-
 speckley/src/CrossDomainCoupler.cpp                |    9 +-
 speckley/src/CrossDomainCoupler.h                  |    5 +-
 speckley/src/DefaultAssembler2D.cpp                |    7 +-
 speckley/src/DefaultAssembler2D.h                  |    4 +-
 speckley/src/DefaultAssembler3D.cpp                |    6 +-
 speckley/src/DefaultAssembler3D.h                  |   12 +-
 speckley/src/Rectangle.cpp                         |   72 +-
 speckley/src/Rectangle.h                           |    9 +-
 speckley/src/RectangleGradients.cpp                |    9 +-
 speckley/src/RectangleIntegrals.cpp                |    9 +-
 speckley/src/RectangleReductions.cpp               |    6 +-
 speckley/src/SConscript                            |   70 +-
 speckley/src/Speckley.h                            |    9 +-
 speckley/src/SpeckleyDomain.cpp                    |   11 +-
 speckley/src/SpeckleyDomain.h                      |   16 +-
 speckley/src/SpeckleyException.cpp                 |   33 -
 speckley/src/SpeckleyException.h                   |   65 +-
 speckley/src/WaveAssembler2D.cpp                   |    7 +-
 speckley/src/WaveAssembler2D.h                     |    4 +-
 speckley/src/WaveAssembler3D.cpp                   |    7 +-
 speckley/src/WaveAssembler3D.h                     |   12 +-
 speckley/src/domainhelpers.cpp                     |    9 +-
 speckley/src/domainhelpers.h                       |    5 +-
 speckley/src/speckleycpp.cpp                       |   22 +-
 speckley/src/system_dep.h                          |    2 +-
 speckley/test/SConscript                           |   24 +-
 speckley/test/python/SConscript                    |   25 +-
 speckley/test/python/run_readWriteOnSpeckley.py    |   22 +-
 speckley/test/python/run_specialOnSpeckley.py      |   68 +-
 svn_version                                        |    2 +-
 tools/escriptconvert/SConscript                    |    1 -
 tools/escriptconvert/escriptconvert.cpp            |    8 +-
 tools/overlord/SConscript                          |    2 +-
 tools/overlord/{overlord.c => overlord.cpp}        |    0
 .../SConscript                                     |    7 +-
 trilinoswrap/src/AbstractMatrixWrapper.h           |   68 +
 trilinoswrap/src/Amesos2Wrapper.cpp                |  204 +
 trilinoswrap/src/Amesos2Wrapper.h                  |   43 +
 trilinoswrap/src/BelosWrapper.cpp                  |  106 +
 trilinoswrap/src/BelosWrapper.h                    |   44 +
 trilinoswrap/src/BlockCrsMatrixWrapper.cpp         |  274 +
 trilinoswrap/src/BlockCrsMatrixWrapper.h           |   74 +
 trilinoswrap/src/CrsMatrixWrapper.cpp              |  281 +
 trilinoswrap/src/CrsMatrixWrapper.h                |   79 +
 trilinoswrap/src/PreconditionerFactory.cpp         |  195 +
 trilinoswrap/src/PreconditionerFactory.h           |   40 +
 trilinoswrap/src/SConscript                        |   61 +
 .../src/TrilinosAdapterException.h                 |   24 +-
 trilinoswrap/src/TrilinosMatrixAdapter.cpp         |  197 +
 trilinoswrap/src/TrilinosMatrixAdapter.h           |  105 +
 trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.cpp |   60 +
 trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.h   |   46 +
 trilinoswrap/src/types.h                           |   89 +
 trilinoswrap/src/util.h                            |   91 +
 {pasowrap/py_src => weipa}/SConscript              |   22 +-
 weipa/py_src/SConscript                            |    6 +-
 weipa/src/DataVar.cpp                              |   28 +-
 weipa/src/EscriptDataset.cpp                       |   58 +-
 weipa/src/EscriptDataset.h                         |    6 +-
 weipa/src/FinleyDomain.cpp                         |   62 +-
 weipa/src/FinleyElements.cpp                       |  122 +-
 weipa/src/FinleyElements.h                         |   15 +-
 weipa/src/FinleyNodes.cpp                          |  108 +-
 weipa/src/FinleyNodes.h                            |    7 +-
 weipa/src/RipleyDomain.cpp                         |    9 +-
 weipa/src/RipleyElements.cpp                       |   14 +-
 weipa/src/RipleyNodes.cpp                          |   13 +-
 weipa/src/SConscript                               |  126 +-
 weipa/src/SpeckleyDomain.cpp                       |   11 +-
 weipa/src/SpeckleyElements.cpp                     |   14 +-
 weipa/src/SpeckleyNodes.cpp                        |   14 +-
 weipa/src/VisItControl.cpp                         |   11 +-
 weipa/src/VisItData.cpp                            |    8 +-
 weipa/src/weipa.h                                  |    8 +-
 weipa/src/weipacpp.cpp                             |    4 -
 weipa/test/EscriptDatasetTestCase.cpp              |   23 +-
 weipa/test/SConscript                              |   17 +-
 weipa/test/python/SConscript                       |    4 +-
 weipa/test/python/meshes/hex_2D_macro.msh          |    1 +
 weipa/test/python/meshes/hex_2D_order2.msh         |    1 +
 weipa/test/python/meshes/hex_2D_order2p.msh        |    1 +
 weipa/test/python/meshes/hex_3D_macro.msh          |    1 +
 weipa/test/python/meshes/hex_3D_order2p.msh        |    1 +
 weipa/test/python/meshes/hex_contact_2D_order1.msh |    1 +
 .../python/meshes/hex_contact_2D_order1_onFace.msh |    1 +
 weipa/test/python/meshes/hex_contact_2D_order2.msh |    1 +
 .../python/meshes/hex_contact_2D_order2_onFace.msh |    1 +
 weipa/test/python/meshes/hex_contact_3D_order1.msh |    1 +
 .../python/meshes/hex_contact_3D_order1_onFace.msh |    1 +
 weipa/test/python/meshes/hex_contact_3D_order2.msh |    1 +
 .../python/meshes/hex_contact_3D_order2_onFace.msh |    1 +
 weipa/test/python/meshes/tet_2D_dudley.fly         |    1 +
 weipa/test/python/meshes/tet_2D_macro.fly          |    1 +
 weipa/test/python/meshes/tet_2D_order1.fly         |    1 +
 weipa/test/python/meshes/tet_2D_order2.fly         |    1 +
 weipa/test/python/meshes/tet_3D_dudley.fly         |    3 +-
 weipa/test/python/meshes/tet_3D_macro.fly          |    1 +
 weipa/test/python/meshes/tet_3D_order1.fly         |    1 +
 weipa/test/python/meshes/tet_3D_order2.fly         |    1 +
 weipa/test/weipa_UnitTest.cpp                      |   11 +-
 858 files changed, 62032 insertions(+), 74703 deletions(-)

diff --git a/CREDITS.txt b/CREDITS
similarity index 81%
rename from CREDITS.txt
rename to CREDITS
index e277cbd..8aa967e 100644
--- a/CREDITS.txt
+++ b/CREDITS
@@ -2,10 +2,9 @@
 The current research and development team for escript is:
 
 Cihan Altinay,
-Artak Amirbekyan,
 Joel Fenwick,
-Lin Gao,
-Lutz Gross.
+Lutz Gross,
+Jaco Du Plessis.
 
 For more detailed information and previous contributors please see the user guide.
 
diff --git a/README_LICENSE b/LICENSE
similarity index 100%
rename from README_LICENSE
rename to LICENSE
diff --git a/README b/README
new file mode 100644
index 0000000..95f1958
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+
+For the impatient:
+- Install at least g++, python, scons, boost, numpy
+- READ the file scons/templates/README_FIRST
+- Copy a suitable template options file from scons/templates/ to
+  scons/`hostname`_options.py and modify as required.
+- type: scons to build escript
+
+For more information read the install guide (scons install_pdf)
+and to get started using escript see the user guide and the cookbook.
+
diff --git a/SConstruct b/SConstruct
index 84a6349..be1344b 100644
--- a/SConstruct
+++ b/SConstruct
@@ -23,7 +23,7 @@ from site_init import *
 
 # Version number to check for in options file. Increment when new features are
 # added or existing options changed.
-REQUIRED_OPTS_VERSION=202
+REQUIRED_OPTS_VERSION=203
 
 # MS Windows support, many thanks to PH
 IS_WINDOWS = (os.name == 'nt')
@@ -54,7 +54,7 @@ if not os.path.isfile(options_file):
 
 default_prefix='/usr'
 mpi_flavours=('no', 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI')
-lapack_flavours=('none', 'clapack', 'mkl')
+all_domains = ['dudley','finley','ripley','speckley']
 
 #Note that scons construction vars the the following purposes:
 #  CPPFLAGS -> to the preprocessor
@@ -74,8 +74,6 @@ vars.AddVariables(
   ('cc_optim', 'Additional (C and C++) flags for a non-debug build', 'default'),
   ('cc_debug', 'Additional (C and C++) flags for a debug build', 'default'),
   ('cxx_extra', 'Extra C++ compiler flags', ''),
-  ('cpp_flags', 'C Pre-processor flags', ''),
-  ('cpp_extra', 'Extra C Pre-processor flags', ''),
   ('ld_extra', 'Extra linker flags', ''),
   ('nvcc', 'Path to CUDA compiler', 'default'),
   ('nvccflags', 'Base CUDA compiler flags', 'default'),
@@ -111,17 +109,21 @@ vars.AddVariables(
   BoolVariable('boomeramg', 'Enable BoomerAMG', False),
   ('boomeramg_prefix', 'Prefix/Paths to BoomerAMG installation', default_prefix),
   ('boomeramg_libs', 'BoomerAMG libraries to link with', ['boomeramg']),
-  EnumVariable('lapack', 'Set LAPACK flavour', 'none', allowed_values=lapack_flavours),
+  TristateVariable('lapack', 'Enable LAPACK', 'auto'),
   ('lapack_prefix', 'Prefix/Paths to LAPACK installation', default_prefix),
   ('lapack_libs', 'LAPACK libraries to link with', []),
   BoolVariable('silo', 'Enable the Silo file format in weipa', False),
   ('silo_prefix', 'Prefix/Paths to Silo installation', default_prefix),
   ('silo_libs', 'Silo libraries to link with', ['siloh5', 'hdf5']),
+  BoolVariable('trilinos', 'Enable the Trilinos solvers', False),
+  ('trilinos_prefix', 'Prefix/Paths to Trilinos installation', default_prefix),
+  ('trilinos_libs', 'Trilinos libraries to link with', []),
   BoolVariable('visit', 'Enable the VisIt simulation interface', False),
   ('visit_prefix', 'Prefix/Paths to VisIt installation', default_prefix),
   ('visit_libs', 'VisIt libraries to link with', ['simV2']),
-  ListVariable('domains', 'Which domains to build', 'all',\
-               ['dudley','finley','ripley','speckley']),
+  ListVariable('domains', 'Which domains to build', 'all', all_domains),
+  BoolVariable('paso', 'Build Paso solver library', True),
+  BoolVariable('weipa', 'Build Weipa data export library', True),
 # Advanced settings
   ('launcher', 'Launcher command (e.g. mpirun)', 'default'),
   ('prelaunch', 'Command to execute before launcher (e.g. mpdboot)', 'default'),
@@ -133,14 +135,13 @@ vars.AddVariables(
   # An option for specifying the compiler tools
   ('tools_names', 'Compiler tools to use', ['default']),
   ('env_export', 'Environment variables to be passed to tools',[]),
-  EnumVariable('forcelazy', 'For testing use only - set the default value for autolazy', 'leave_alone', allowed_values=('leave_alone', 'on', 'off')),
-  EnumVariable('forcecollres', 'For testing use only - set the default value for force resolving collective ops', 'leave_alone', allowed_values=('leave_alone', 'on', 'off')),
-  ('build_shared', 'Build dynamic libraries only', False),
+  TristateVariable('forcelazy', 'For testing use only - set the default value for autolazy', 'auto'),
+  TristateVariable('forcecollres', 'For testing use only - set the default value for force resolving collective ops', 'auto'),
+  BoolVariable('build_shared', '(deprecated option, ignored)', True),
   ('sys_libs', 'Extra libraries to link with', []),
   ('escript_opts_version', 'Version of options file (do not specify on command line)'),
   ('SVN_VERSION', 'Do not use from options file', -2),
-  ('pythoncmd', 'which python to compile with','python'),
-  ('usepython3', 'Is this a python3 build?', False),
+  ('pythoncmd', 'which python to compile with', sys.executable),
   ('pythonlibname', 'Name of the python library to link. (This is found automatically for python2.X.)', ''),
   ('pythonlibpath', 'Path to the python library. (You should not need to set this unless your python has moved)',''),
   ('pythonincpath','Path to python include files. (You should not need to set this unless your python has moved',''),
@@ -151,8 +152,7 @@ vars.AddVariables(
   ('papi_prefix', 'Prefix/Paths to PAPI installation', default_prefix),
   ('papi_libs', 'PAPI libraries to link with', ['papi']),
   BoolVariable('papi_instrument_solver', 'Use PAPI to instrument each iteration of the solver', False),
-  BoolVariable('osx_dependency_fix', 'Fix dependencies for libraries to have absolute paths (OSX)',
-False)
+  BoolVariable('osx_dependency_fix', 'Fix dependencies for libraries to have absolute paths (OSX)', False)
 )
 
 ##################### Create environment and help text #######################
@@ -186,8 +186,8 @@ if options_file:
         print("Using options in %s." % options_file)
     else:
         print("\nOptions file %s" % options_file)
-        print("is outdated! Please update the file by examining one of the TEMPLATE")
-        print("files in the scons/ subdirectory and setting escript_opts_version to %d.\n"%REQUIRED_OPTS_VERSION)
+        print("is outdated! Please update the file after reading scons/templates/README_FIRST")
+        print("and setting escript_opts_version to %d.\n"%REQUIRED_OPTS_VERSION)
         Exit(1)
 
 # Generate help text (scons -h)
@@ -207,16 +207,18 @@ if env['cuda']:
 if 'dudley' in env['domains']:
     env['domains'].append('finley')
 
+env['domains'] = sorted(set(env['domains']))
+
 # create dictionary which will be populated with info for buildvars file
-env['buildvars']={}
+env['buildvars'] = {}
 # create list which will be populated with warnings if there are any
-env['warnings']=[]
+env['warnings'] = []
 
 #################### Make sure install directories exist #####################
 
-env['BUILD_DIR']=Dir(env['build_dir']).abspath
-prefix=Dir(env['prefix']).abspath
-env['buildvars']['prefix']=prefix
+env['BUILD_DIR'] = Dir(env['build_dir']).abspath
+prefix = Dir(env['prefix']).abspath
+env['buildvars']['prefix'] = prefix
 env['incinstall'] = os.path.join(prefix, 'include')
 env['bininstall'] = os.path.join(prefix, 'bin')
 env['libinstall'] = os.path.join(prefix, 'lib')
@@ -233,15 +235,11 @@ env.Append(LIBPATH = [env['libinstall']])
 
 ################# Fill in compiler options if not set above ##################
 
-if env['cxx'] != 'default': env['CXX']=env['cxx']
-
-# version >=9 of intel C++ compiler requires use of icpc to link in C++
-# runtimes (icc does not)
-if not IS_WINDOWS and os.uname()[4]=='ia64' and env['CXX']=='icpc':
-    env['LINK'] = env['CXX']
+if env['cxx'] != 'default':
+    env['CXX'] = env['cxx']
 
 # default compiler/linker options
-cc_flags = ''
+cc_flags = '-std=c++11'
 cc_optim = ''
 cc_debug = ''
 omp_flags = ''
@@ -254,11 +252,12 @@ cc_name=os.path.basename(env['CXX'])
 
 if cc_name == 'icpc':
     # Intel compiler
+    # #1478: class "std::auto_ptr<...>" was declared deprecated
     # #1875: offsetof applied to non-POD types is nonstandard (in boost)
     # removed -std=c99 because icpc doesn't like it and we aren't using c anymore
-    cc_flags    = "-fPIC -w2 -wd1875 -Wno-unknown-pragmas"
+    cc_flags    = "-std=c++11 -fPIC -w2 -wd1875 -wd1478 -Wno-unknown-pragmas"
     cc_optim    = "-O3 -ftz -fno-alias -inline-level=2 -ipo -xHost"
-    cc_debug    = "-g -O0 -DDOASSERT -DDOPROF -DBOUNDS_CHECK"
+    cc_debug    = "-g -O0 -DDOASSERT -DDOPROF -DBOUNDS_CHECK -DSLOWSHARECHECK"
     omp_flags   = "-openmp"
     omp_ldflags = "-openmp -openmp_report=1"
     fatalwarning = "-Werror"
@@ -266,10 +265,10 @@ elif cc_name[:3] == 'g++':
     # GNU C++ on any system
     # note that -ffast-math is not used because it breaks isnan(),
     # see mantis #691
-    cc_flags     = "-pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing -finline-functions"
+    cc_flags     = "-std=c++11 -pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing -finline-functions"
     cc_optim     = "-O3"
     #max-vartrack-size: avoid vartrack limit being exceeded with escriptcpp.cpp
-    cc_debug     = "-g3 -O0 -D_GLIBCXX_DEBUG -DDOASSERT -DDOPROF -DBOUNDS_CHECK --param=max-vartrack-size=100000000"
+    cc_debug     = "-g3 -O0 -D_GLIBCXX_DEBUG -DDOASSERT -DDOPROF -DBOUNDS_CHECK -DSLOWSHARECHECK --param=max-vartrack-size=100000000"
     omp_flags    = "-fopenmp"
     omp_ldflags  = "-fopenmp"
     fatalwarning = "-Werror"
@@ -298,8 +297,6 @@ if env['omp_flags']   == 'default': env['omp_flags'] = omp_flags
 if env['omp_ldflags'] == 'default': env['omp_ldflags'] = omp_ldflags
 if env['cxx_extra'] != '': env.Append(CXXFLAGS = env['cxx_extra'])
 if env['ld_extra']  != '': env.Append(LINKFLAGS = env['ld_extra'])
-if env['cpp_flags'] != '': env.Append(CPPFLAGS = env['cpp_flags'])
-if env['cpp_extra'] != '': env.Append(CPPFLAGS = " "+env['cpp_extra'])
 
 if env['nvccflags'] != 'default':
     env['NVCCFLAGS'] = env['nvccflags']
@@ -308,19 +305,16 @@ if env['nvccflags'] != 'default':
 if env['longindices']:
     env.Append(CPPDEFINES = ['ESYS_INDEXTYPE_LONG'])
 
-if env['usepython3']:
-    env.Append(CPPDEFINES=['ESPYTHON3'])
-
 # set up the autolazy values
-if env['forcelazy'] == 'on':
+if env['forcelazy'] == 1:
     env.Append(CPPDEFINES=['FAUTOLAZYON'])
-elif env['forcelazy'] == 'off':
+elif env['forcelazy'] == 0:
     env.Append(CPPDEFINES=['FAUTOLAZYOFF'])
 
 # set up the collective resolve values
-if env['forcecollres'] == 'on':
+if env['forcecollres'] == 1:
     env.Append(CPPDEFINES=['FRESCOLLECTON'])
-elif env['forcecollres'] == 'off':
+elif env['forcecollres'] == 0:
     env.Append(CPPDEFINES=['FRESCOLLECTOFF'])
 
 # allow non-standard C if requested
@@ -354,40 +348,6 @@ env.Append(CCFLAGS = env['cc_flags'])
 # add system libraries
 env.AppendUnique(LIBS = env['sys_libs'])
 
-# set defaults for launchers if not otherwise specified
-if env['prelaunch'] == 'default':
-    if env['mpi'] == 'INTELMPI' and env['openmp']:
-        env['prelaunch'] = "export I_MPI_PIN_DOMAIN=omp"
-    elif env['mpi'] == 'OPENMPI':
-        # transform comma-separated list to '-x a -x b -x c ...'
-        env['prelaunch'] = "EE=$(echo -x %e|sed -e 's/,/ -x /g')"
-    elif env['mpi'] == 'MPT':
-        env['prelaunch'] = "export MPI_NUM_MEMORY_REGIONS=0"
-    elif env['mpi'] == 'MPICH2':
-        env['prelaunch'] = "mpdboot -n %n -r ssh -f %f"
-    else:
-        env['prelaunch'] = ""
-
-if env['launcher'] == 'default':
-    if env['mpi'] == 'INTELMPI':
-        env['launcher'] = "mpirun -hostfile %f -n %N -ppn %p %b"
-    elif env['mpi'] == 'OPENMPI':
-        env['launcher'] = "mpirun ${AGENTOVERRIDE} --gmca mpi_warn_on_fork 0 ${EE} --host %h -bynode -bind-to-core --cpus-per-rank %t -np %N %b"
-    elif env['mpi'] == 'MPT':
-        env['launcher'] = "mpirun %h -np %p %b"
-    elif env['mpi'] == 'MPICH':
-        env['launcher'] = "mpirun -machinefile %f -np %N %b"
-    elif env['mpi'] == 'MPICH2':
-        env['launcher'] = "mpiexec -genvlist %e -np %N %b"
-    else:
-        env['launcher'] = "%b"
-
-if env['postlaunch'] == 'default':
-    if env['mpi'] == 'MPICH2':
-        env['postlaunch'] = "mpdallexit"
-    else:
-        env['postlaunch'] = ""
-
 # determine svn revision
 global_revision=ARGUMENTS.get('SVN_VERSION', None)
 if global_revision:
@@ -407,11 +367,6 @@ env['svn_revision']=global_revision
 env['buildvars']['svn_revision']=global_revision
 env.Append(CPPDEFINES=['SVN_VERSION='+global_revision])
 
-if IS_WINDOWS:
-    if not env['build_shared']:
-        env.Append(CPPDEFINES = ['ESYSUTILS_STATIC_LIB'])
-        env.Append(CPPDEFINES = ['PASO_STATIC_LIB'])
-
 env['IS_WINDOWS']=IS_WINDOWS
 env['IS_OSX']=IS_OSX
 
@@ -458,14 +413,6 @@ if IS_OSX:
   except KeyError:
     pass
 
-
-# these shouldn't be needed
-#for key in 'C_INCLUDE_PATH','CPLUS_INCLUDE_PATH','LIBRARY_PATH':
-#    try:
-#        env['ENV'][key] = os.environ[key]
-#    except KeyError:
-#        pass
-
 try:
     env['ENV']['PYTHONPATH'] = os.environ['PYTHONPATH']
 except KeyError:
@@ -473,6 +420,11 @@ except KeyError:
 
 ######################## Add some custom builders ############################
 
+# Takes care of prefix and suffix for Python modules:
+def build_python_module(env, target, source):
+    return env.SharedLibrary(target, source, SHLIBPREFIX='', SHLIBSUFFIX='.so')
+env.AddMethod(build_python_module, "PythonModule")
+
 if env['pythoncmd']=='python':
     py_builder = Builder(action = build_py, suffix = '.pyc', src_suffix = '.py', single_source=True)
 else:
@@ -502,32 +454,70 @@ env=checkPython(env)
 ######## boost & boost-python (required)
 env=checkBoost(env)
 
-######## NVCC version (optional)
-if env['cuda']:
-    env=checkCudaVersion(env)
-    env=checkCUDA(env)
-
 ######## numpy (required) and numpy headers (optional)
 env=checkNumpy(env)
 
 ######## CppUnit (required for tests)
 env=checkCppUnit(env)
 
+######## NVCC version (optional)
+if env['cuda'] and 'ripley' in env['domains']:
+    env=checkCudaVersion(env)
+    env=checkCUDA(env)
+
 ######## optional python modules (sympy, pyproj)
 env=checkOptionalModules(env)
 
 ######## optional dependencies (netCDF, PAPI, MKL, UMFPACK, Lapack, Silo, ...)
 env=checkOptionalLibraries(env)
 
-#use gmsh info to set some defines
-if env['gmsh'] == 's':
-    env.Append(CPPDEFINES=['GMSH'])
-elif env['gmsh'] == 'm':
-    env.Append(CPPDEFINES=['GMSH','GMSH_MPI'])
-
 ######## PDFLaTeX (for documentation)
 env=checkPDFLatex(env)
 
+# set defaults for launchers if not otherwise specified
+if env['prelaunch'] == 'default':
+    if env['mpi'] == 'INTELMPI' and env['openmp']:
+        env['prelaunch'] = "export I_MPI_PIN_DOMAIN=omp"
+    elif env['mpi'] == 'OPENMPI':
+        # transform comma-separated list to '-x a -x b -x c ...'
+        env['prelaunch'] = "EE=$(echo -x %e|sed -e 's/,/ -x /g')"
+    elif env['mpi'] == 'MPT':
+        env['prelaunch'] = "export MPI_NUM_MEMORY_REGIONS=0"
+    elif env['mpi'] == 'MPICH2':
+        env['prelaunch'] = "mpdboot -n %n -r ssh -f %f"
+    else:
+        env['prelaunch'] = ""
+
+if env['launcher'] == 'default':
+    if env['mpi'] == 'INTELMPI':
+        env['launcher'] = "mpirun -hostfile %f -n %N -ppn %p %b"
+    elif env['mpi'] == 'OPENMPI':
+        # default to OpenMPI version 1.10 or higher
+        env['launcher'] = "mpirun ${AGENTOVERRIDE} --gmca mpi_warn_on_fork 0 ${EE} --host %h --map-by node:pe=%t -bind-to core -np %N %b"
+        if 'orte_version' in env:
+            major,minor,point = [int(i) for i in env['orte_version'].split('.')]
+            if major == 1 and minor < 10:
+                env['launcher'] = "mpirun ${AGENTOVERRIDE} --gmca mpi_warn_on_fork 0 ${EE} --host %h --cpus-per-rank %t -np %N %b"
+    elif env['mpi'] == 'MPT':
+        env['launcher'] = "mpirun %h -np %p %b"
+    elif env['mpi'] == 'MPICH':
+        env['launcher'] = "mpirun -machinefile %f -np %N %b"
+    elif env['mpi'] == 'MPICH2':
+        env['launcher'] = "mpiexec -genvlist %e -np %N %b"
+    else:
+        env['launcher'] = "%b"
+
+if env['postlaunch'] == 'default':
+    if env['mpi'] == 'MPICH2':
+        env['postlaunch'] = "mpdallexit"
+    else:
+        env['postlaunch'] = ""
+
+# dependency sanity checks
+
+if len(env['domains']) == 0:
+   env['warnings'].append("No domains have been built, escript will not be very useful!")
+
 # keep some of our install paths first in the list for the unit tests
 env.PrependENVPath(LD_LIBRARY_PATH_KEY, env['libinstall'])
 env.PrependENVPath('PYTHONPATH', prefix)
@@ -545,15 +535,9 @@ if not env['verbose']:
     env['PDFLATEXCOMSTR'] = "Building $TARGET from LaTeX input $SOURCES"
     #Progress(['Checking -\r', 'Checking \\\r', 'Checking |\r', 'Checking /\r'], interval=17)
 
-####################### Configure the subdirectories #########################
+########################### Configure the targets ############################
 
-# remove obsolete files
-if not env['usempi']:
-    Execute(Delete(os.path.join(env['libinstall'], 'pythonMPI')))
-    Execute(Delete(os.path.join(env['bininstall'], 'escript-overlord')))
-    Execute(Delete(os.path.join(env['libinstall'], 'pythonMPIredirect')))
-
-from grouptest import *
+from grouptest import GroupTest
 TestGroups=[]
 
 # keep an environment without warnings-as-errors
@@ -572,152 +556,86 @@ Export(
   ]
 )
 
-#do not auto build
-env.SConscript(dirs = ['tools/escriptconvert'], variant_dir='$BUILD_DIR/$PLATFORM/tools/escriptconvert', duplicate=0)
-env.SConscript(dirs = ['tools/overlord'], variant_dir='$BUILD_DIR/$PLATFORM/tools/overlord', duplicate=0)
-env.SConscript(dirs = ['paso/src'], variant_dir='$BUILD_DIR/$PLATFORM/paso', duplicate=0)
-env.SConscript(dirs = ['weipa/src'], variant_dir='$BUILD_DIR/$PLATFORM/weipa', duplicate=0)
-env.SConscript(dirs = ['escript/py_src'], variant_dir='$BUILD_DIR/$PLATFORM/escript', duplicate=0)
-
-env.SConscript(dirs = ['cusplibrary'])
-
-#This will pull in the escriptcore/py_src and escriptcore/test
-env.SConscript(dirs = ['escriptcore/src'], variant_dir='$BUILD_DIR/$PLATFORM/escriptcore', duplicate=0)
-env.SConscript(dirs = ['esysUtils/src'], variant_dir='$BUILD_DIR/$PLATFORM/esysUtils', duplicate=0)
-env.SConscript(dirs = ['pasowrap/src'], variant_dir='$BUILD_DIR/$PLATFORM/pasowrap', duplicate=0)
-if 'dudley' in env['domains']:
-    env.SConscript(dirs = ['dudley/src'], variant_dir='$BUILD_DIR/$PLATFORM/dudley', duplicate=0)
-if 'finley' in env['domains']:
-    env.SConscript(dirs = ['finley/src'], variant_dir='$BUILD_DIR/$PLATFORM/finley', duplicate=0)
-if 'ripley' in env['domains']:
-    env.SConscript(dirs = ['ripley/src'], variant_dir='$BUILD_DIR/$PLATFORM/ripley', duplicate=0)
-if 'speckley' in env['domains']:
-    env.SConscript(dirs = ['speckley/src'], variant_dir='$BUILD_DIR/$PLATFORM/speckley', duplicate=0)
-env.SConscript(dirs = ['downunder/py_src'], variant_dir='$BUILD_DIR/$PLATFORM/downunder', duplicate=0)
-env.SConscript(dirs = ['modellib/py_src'], variant_dir='$BUILD_DIR/$PLATFORM/modellib', duplicate=0)
-env.SConscript(dirs = ['pycad/py_src'], variant_dir='$BUILD_DIR/$PLATFORM/pycad', duplicate=0)
-env.SConscript(dirs = ['pythonMPI/src'], variant_dir='$BUILD_DIR/$PLATFORM/pythonMPI', duplicate=0)
-env.SConscript(dirs = ['doc'], variant_dir='$BUILD_DIR/$PLATFORM/doc', duplicate=0)
-env.SConscript(dirs = ['paso/profiling'], variant_dir='$BUILD_DIR/$PLATFORM/paso/profiling', duplicate=0)
-
-
-######################## Populate the buildvars file #########################
-
-write_buildvars(env)
-
-write_launcher(env)
-
-################### Targets to build and install libraries ###################
-
 target_init = env.Command(os.path.join(env['pyinstall'],'__init__.py'), None, Touch('$TARGET'))
 env.Alias('target_init', [target_init])
-# delete buildvars upon cleanup
-env.Clean('target_init', os.path.join(env['libinstall'], 'buildvars'))
-
-# The headers have to be installed prior to build in order to satisfy
-# #include <paso/Common.h>
-env.Alias('build_esysUtils', ['install_esysUtils_headers', 'build_esysUtils_lib'])
-env.Alias('install_esysUtils', ['build_esysUtils', 'install_esysUtils_lib'])
-
-env.Alias('build_paso', ['install_paso_headers', 'build_paso_lib'])
-env.Alias('install_paso', ['build_paso', 'install_paso_lib'])
 
-env.Alias('build_escript', ['install_escript_headers', 'build_escript_lib', 'build_escriptcpp_lib'])
-env.Alias('install_escript', ['build_escript', 'install_escript_lib', 'install_escriptcpp_lib', 'install_escriptcore_py', 'install_escript_py'])
+# escript can't be turned off
+build_all_list = ['build_escript']
+install_all_list = ['target_init', 'install_escript']
 
-env.Alias('build_pasowrap', ['install_pasowrap_headers', 'build_pasowrap_lib', 'build_pasowrapcpp_lib'])
-env.Alias('install_pasowrap', ['build_pasowrap', 'install_pasowrap_lib', 'install_pasowrapcpp_lib', 'install_pasowrap_py'])
-
-if 'dudley' in env['domains']:
-    env.Alias('build_dudley', ['install_dudley_headers', 'build_dudley_lib', 'build_dudleycpp_lib'])
-    env.Alias('install_dudley', ['build_dudley', 'install_dudley_lib', 'install_dudleycpp_lib', 'install_dudley_py'])
-
-if 'finley' in env['domains']:
-    env.Alias('build_finley', ['install_finley_headers', 'build_finley_lib', 'build_finleycpp_lib'])
-    env.Alias('install_finley', ['build_finley', 'install_finley_lib', 'install_finleycpp_lib', 'install_finley_py'])
-
-if 'ripley' in env['domains']:
-    env.Alias('build_ripley', ['install_cusp_headers', 'install_ripley_headers', 'build_ripley_lib', 'build_ripleycpp_lib'])
-    env.Alias('install_ripley', ['build_ripley', 'install_ripley_lib', 'install_ripleycpp_lib', 'install_ripley_py'])
-
-if 'speckley' in env['domains']:
-    env.Alias('build_speckley', ['install_speckley_headers', 'build_speckley_lib', 'build_speckleycpp_lib'])
-    env.Alias('install_speckley', ['build_speckley', 'install_speckley_lib', 'install_speckleycpp_lib', 'install_speckley_py'])
-
-env.Alias('build_weipa', ['install_weipa_headers', 'build_weipa_lib', 'build_weipacpp_lib'])
-env.Alias('install_weipa', ['build_weipa', 'install_weipa_lib', 'install_weipacpp_lib', 'install_weipa_py'])
-
-env.Alias('build_escriptreader', ['install_weipa_headers', 'build_escriptreader_lib'])
-env.Alias('install_escriptreader', ['build_escriptreader', 'install_escriptreader_lib'])
-
-# Now gather all the above into some easy targets: build_all and install_all
-build_all_list = []
-build_all_list += ['build_esysUtils']
-build_all_list += ['build_paso']
-build_all_list += ['build_escript']
-build_all_list += ['build_pasowrap']
-if 'dudley' in env['domains']: build_all_list += ['build_dudley']
-if 'finley' in env['domains']: build_all_list += ['build_finley']
-if 'ripley' in env['domains']: build_all_list += ['build_ripley']
-if 'speckley' in env['domains']: build_all_list += ['build_speckley']
-build_all_list += ['build_weipa']
-if not IS_WINDOWS and 'finley' in env['domains']:
-    build_all_list += ['build_escriptreader']
 if env['usempi']:
     build_all_list += ['build_pythonMPI', 'build_overlord']
-env.Alias('build_all', build_all_list)
-
-install_all_list = []
-install_all_list += ['target_init']
-install_all_list += ['install_esysUtils']
-install_all_list += ['install_paso']
-install_all_list += ['install_escript']
-install_all_list += ['install_pasowrap']
-if 'dudley' in env['domains']: install_all_list += ['install_dudley']
-if 'finley' in env['domains']: install_all_list += ['install_finley']
-if 'ripley' in env['domains']: install_all_list += ['install_ripley']
-if 'speckley' in env['domains']: install_all_list += ['install_speckley']
-install_all_list += ['install_weipa']
-if not IS_WINDOWS and 'finley' in env['domains']:
-    install_all_list += ['install_escriptreader']
+    install_all_list += ['install_pythonMPI', 'install_overlord']
+
+env['buildvars']['paso'] = int(env['paso'])
+if env['paso']:
+    env.Append(CPPDEFINES = ['ESYS_HAVE_PASO'])
+    build_all_list += ['build_paso']
+    install_all_list += ['install_paso']
+
+env['buildvars']['paso'] = int(env['trilinos'])
+if env['trilinos']:
+    build_all_list += ['build_trilinoswrap']
+    install_all_list += ['install_trilinoswrap']
+
+env['buildvars']['domains'] = ','.join(env['domains'])
+for domain in env['domains']:
+    env.Append(CPPDEFINES = ['ESYS_HAVE_'+domain.upper()])
+    build_all_list += ['build_%s'%domain]
+    install_all_list += ['install_%s'%domain]
+
+env['buildvars']['weipa'] = int(env['weipa'])
+if env['weipa']:
+    env.Append(CPPDEFINES = ['ESYS_HAVE_WEIPA'])
+    build_all_list += ['build_weipa']
+    install_all_list += ['install_weipa']
+    if 'finley' in env['domains'] or 'dudley' in env['domains']:
+        build_all_list += ['build_escriptreader']
+        install_all_list += ['install_escriptreader']
+
+variant='$BUILD_DIR/$PLATFORM/'
+env.SConscript('escriptcore/SConscript', variant_dir=variant+'escriptcore', duplicate=0)
+env.SConscript('escript/py_src/SConscript', variant_dir=variant+'escript', duplicate=0)
+env.SConscript('pythonMPI/src/SConscript', variant_dir=variant+'pythonMPI', duplicate=0)
+env.SConscript('tools/overlord/SConscript', variant_dir=variant+'tools/overlord', duplicate=0)
+env.SConscript('paso/SConscript', variant_dir=variant+'paso', duplicate=0)
+env.SConscript('trilinoswrap/SConscript', variant_dir=variant+'trilinoswrap', duplicate=0)
+env.SConscript('cusplibrary/SConscript')
+env.SConscript('dudley/SConscript', variant_dir=variant+'dudley', duplicate=0)
+env.SConscript('finley/SConscript', variant_dir=variant+'finley', duplicate=0)
+env.SConscript('ripley/SConscript', variant_dir=variant+'ripley', duplicate=0)
+env.SConscript('speckley/SConscript', variant_dir=variant+'speckley', duplicate=0)
+env.SConscript('weipa/SConscript', variant_dir=variant+'weipa', duplicate=0)
+env.SConscript(dirs = ['downunder/py_src'], variant_dir=variant+'downunder', duplicate=0)
+env.SConscript(dirs = ['modellib/py_src'], variant_dir=variant+'modellib', duplicate=0)
+env.SConscript(dirs = ['pycad/py_src'], variant_dir=variant+'pycad', duplicate=0)
+env.SConscript('tools/escriptconvert/SConscript', variant_dir=variant+'tools/escriptconvert', duplicate=0)
+env.SConscript('doc/SConscript', variant_dir=variant+'doc', duplicate=0)
+
+env.Alias('build', build_all_list)
+
 install_all_list += ['install_downunder_py']
 install_all_list += ['install_modellib_py']
 install_all_list += ['install_pycad_py']
-if env['usempi']:
-    install_all_list += ['install_pythonMPI', 'install_overlord']
-install_all_list += ['install_weipa_py']    
-install_all_list += [env.Install(os.path.join(env['build_dir'],'scripts'), os.path.join('scripts', 'release_sanity.py'))]
-
+install_all_list += [env.Install(Dir('scripts',env['build_dir']), os.path.join('scripts', 'release_sanity.py'))]
 
 if env['osx_dependency_fix']:
     print("Require dependency fix")
-    install_all=env.Command('install_all',install_all_list,'scripts/moveall.sh')
+    install_all=env.Command('install', install_all_list, 'scripts/moveall.sh')
 else:
-    install_all=env.Alias('install_all', install_all_list)
-
-
-
-
-# Default target is install
-#env.Default('install_all')
-
+    install_all=env.Alias('install', install_all_list)
 
 sanity=env.Alias('sanity', env.Command('dummy','',os.path.join(env['prefix'], 'bin', 'run-escript')+' '+os.path.join(env['build_dir'],'scripts', 'release_sanity.py')))
 env.Depends('dummy', install_all)
 if env['usempi']:
-   #env.Requires('dummy', ['build_pythonMPI', 'install_pythonMPI'])
-   #env.Requires('dummy', env['prefix']+"/lib/pythonMPI")
-   env.Depends('dummy', ['build_pythonMPI', 'install_pythonMPI'])
-   env.Depends('dummy', env['prefix']+"/lib/pythonMPI")   
-
-if 'install_dudley' in install_all_list and \
-   'install_finley' in install_all_list and \
-   'install_ripley' in install_all_list and \
-   'install_speckley' in install_all_list:
+   env.Depends('dummy', ['install_pythonMPI'])
+
+# if all domains are built:
+if env['domains'] == all_domains:
        env.AlwaysBuild('sanity')
        env.Default('sanity')
 else:
-    env.Default('install_all')
+    env.Default('install')
 
 ################## Targets to build and run the test suite ###################
 
@@ -725,19 +643,18 @@ if not env['cppunit']:
     test_msg = env.Command('.dummy.', None, '@echo "Cannot run C++ unit tests, CppUnit not found!";exit 1')
     env.Alias('run_tests', test_msg)
     env.Alias('build_tests', '')
-env.Alias('run_tests', ['install_all'])
-env.Alias('all_tests', ['install_all', 'run_tests', 'py_tests'])
-env.Alias('build_full',['install_all','build_tests','build_py_tests'])
-env.Alias('build_PasoTests','$BUILD_DIR/$PLATFORM/paso/profiling/PasoTests')
-Requires('py_tests', 'install_all')
+env.Alias('run_tests', ['install'])
+env.Alias('all_tests', ['install', 'run_tests', 'py_tests'])
+env.Alias('build_full',['install','build_tests','build_py_tests'])
+Requires('py_tests', 'install')
 
 ##################### Targets to build the documentation #####################
 
 env.Alias('pdfdocs',['user_pdf', 'install_pdf', 'cookbook_pdf', 'inversion_pdf'])
 env.Alias('basedocs', ['pdfdocs','examples_tarfile', 'examples_zipfile', 'api_doxygen'])
 env.Alias('docs', ['basedocs', 'sphinxdoc'])
-env.Alias('release_prep', ['docs', 'install_all'])
-env.Alias('release_prep_old', ['basedocs', 'api_epydoc', 'install_all'])
+env.Alias('release_prep', ['docs', 'install'])
+env.Alias('release_prep_old', ['basedocs', 'api_epydoc', 'install'])
 
 # The test scripts are always generated, this target allows us to
 # generate the testscripts without doing a full build
@@ -746,44 +663,51 @@ env.Alias('testscripts',[])
 if not IS_WINDOWS:
     generateTestScripts(env, TestGroups)
 
+######################## Populate the buildvars file #########################
+
+write_buildvars(env)
+# delete buildvars upon cleanup - target_init is default so use it
+env.Clean('target_init', File('buildvars', env['libinstall']))
+
+write_launcher(env)
+
+# remove obsolete files
+if not env['usempi']:
+    Execute(Delete(File(['pythonMPI','pythonMPIredirect'], env['libinstall'])))
+    Execute(Delete(File('escript-overlord', env['bininstall'])))
 
 ######################## Summarize our environment ###########################
 def print_summary():
+    d_list=[]
     print("")
     print("*** Config Summary (see config.log and <prefix>/lib/buildvars for details) ***")
-    print("Escript/Finley revision %s"%global_revision)
+    print("Escript revision %s"%global_revision)
     print("  Install prefix:  %s"%env['prefix'])
-    print("          Python:  %s"%sysconfig.PREFIX)
+    print("          Python:  %s (Version %s)"%(env['pythoncmd'],env['python_version']))
     print("           boost:  %s (Version %s)"%(env['boost_prefix'],env['boost_version']))
     if env['numpy_h']:
         print("           numpy:  YES (with headers)")
     else:
         print("           numpy:  YES (without headers)")
     if env['usempi']:
-        print("             MPI:  YES (flavour: %s)"%env['mpi'])
+        if 'orte_version' in env:
+            print("             MPI:  %s (Version %s)"%(env['mpi'], env['orte_version']))
+        else:
+            print("             MPI:  YES (flavour: %s)"%env['mpi'])
     else:
-        print("             MPI:  NO")
+        d_list.append('mpi')
     if env['parmetis']:
         print("        ParMETIS:  %s (Version %s)"%(env['parmetis_prefix'],env['parmetis_version']))
     else:
-        print("        ParMETIS:  NO")
+        d_list.append('parmetis')
     if env['uselapack']:
         print("          LAPACK:  YES (flavour: %s)"%env['lapack'])
     else:
-        print("          LAPACK:  NO")
+        d_list.append('lapack')
     if env['cuda']:
         print("            CUDA:  YES (nvcc: %s)"%env['nvcc_version'])
     else:
-        print("            CUDA:  NO")
-    d_list=[]
-    e_list=[]
-    for i in 'debug','openmp','boomeramg','cppunit','gdal','mkl','netcdf','papi','pyproj','scipy','silo','sympy','umfpack','visit':
-        if env[i]: e_list.append(i)
-        else: d_list.append(i)
-    for i in e_list:
-        print("%16s:  YES"%i)
-    for i in d_list:
-        print("%16s:  NO"%i)
+        d_list.append('cuda')
     if env['gmshpy']:
         gmshpy=" + python module"
     else:
@@ -796,8 +720,45 @@ def print_summary():
         if env['gmshpy']:
             print("            gmsh:  python module only")
         else:
-            print("            gmsh:  NO")
-    print(    "            gzip:  " + ("YES" if env['compressed_files'] else "NO"))
+            d_list.append('gmsh')
+    if env['compressed_files']:
+        print("            gzip:  YES")
+    else:
+        d_list.append('gzip')
+
+    solvers = []
+    direct = []
+    if env['paso']:
+        solvers.append('paso')
+        if env['mkl']:
+            direct.append('mkl')
+        if env['umfpack']:
+            direct.append('umfpack')
+    else:
+        d_list.append('paso')
+    if env['trilinos']:
+        solvers.append('trilinos')
+        direct.append('trilinos')
+    else:
+        d_list.append('trilinos')
+
+    print("  Solver library:  %s"%(", ".join(solvers)))
+    if len(direct) > 0:
+        print("   Direct solver:  YES (%s)"%(", ".join(direct)))
+    else:
+        print("   Direct solver:  NONE")
+    print("         domains:  %s"%(", ".join(env['domains'])))
+
+    e_list=[]
+    for i in 'weipa','debug','openmp','boomeramg','cppunit','gdal','mkl',\
+             'netcdf','papi','pyproj','scipy','silo','sympy','umfpack','visit':
+        if env[i]: e_list.append(i)
+        else: d_list.append(i)
+
+    d_list += set(all_domains).difference(env['domains'])
+    for i in e_list:
+        print("%16s:  YES"%i)
+    print("\n  DISABLED features: %s"%(" ".join(sorted(d_list))))
 
     if ((fatalwarning != '') and (env['werror'])):
         print("  Treating warnings as errors")
diff --git a/debian/changelog b/debian/changelog
index 8f26417..9d0749b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+python-escript (5.0-1) unstable; urgency=medium
+
+  * Dummy commit to be patched by gladk
+
+ -- Joel Fenwick <j.oelpublic at gmail.com>  Mon, 19 Sep 2016 12:44:21 +1000
+
 python-escript (4.2.0.1-4) unstable; urgency=medium
 
   [ Joel Fenwick ]
diff --git a/debian/patches/10_use_python35.patch b/debian/patches/10_use_python35.patch
deleted file mode 100644
index acd4106..0000000
--- a/debian/patches/10_use_python35.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-Description: Use Python 3.5
-Author: Anton Gladky <gladk at debian.org>
-Last-Update: 2016-05-16
-
-Index: python-escript/scons/templates/jessie_py3_options.py
-===================================================================
---- python-escript.orig/scons/templates/jessie_py3_options.py
-+++ python-escript/scons/templates/jessie_py3_options.py
-@@ -17,9 +17,9 @@
- from .jessie_options import *
- 
- # boost-python library/libraries to link against
--boost_libs = ['boost_python-py34']
-+boost_libs = ['boost_python-py35']
- usepython3=True
- pythoncmd='python3'
--pythonlibname='python3.4m'
--pythonincpath='/usr/include/python3.4'
-+pythonlibname='python3.5m'
-+pythonincpath='/usr/include/python3.5'
- 
-Index: python-escript/scons/templates/jessie_py3_mpi_options.py
-===================================================================
---- python-escript.orig/scons/templates/jessie_py3_mpi_options.py
-+++ python-escript/scons/templates/jessie_py3_mpi_options.py
-@@ -20,7 +20,7 @@ from .jessie_options import *
- boost_libs = ['boost_python-py34']
- usepython3=True
- pythoncmd='python3'
--pythonlibname='python3.4m'
--pythonincpath='/usr/include/python3.4'
-+pythonlibname='python3.5m'
-+pythonincpath='/usr/include/python3.5'
- 
- mpi='OPENMPI'
diff --git a/debian/patches/11_use_c++03 b/debian/patches/11_use_c++03
deleted file mode 100644
index 86b37df..0000000
--- a/debian/patches/11_use_c++03
+++ /dev/null
@@ -1,11 +0,0 @@
---- a/scons/templates/sid_options.py
-+++ b/scons/templates/sid_options.py
-@@ -56,7 +56,7 @@ escript_opts_version = 202
- # Flags to use with the C++ compiler. Do not set unless you know
- # what you are doing - use cxx_extra to specify additional flags!
- # DEFAULT: compiler-dependent
--#cc_flags = ''
-+cc_flags = '-std=c++03'
- 
- # Additional compiler (optimization) flags for non-debug builds
- # DEFAULT: compiler-dependent
diff --git a/debian/patches/13_cpp_overlord b/debian/patches/13_cpp_overlord
deleted file mode 100644
index ebb1a87..0000000
--- a/debian/patches/13_cpp_overlord
+++ /dev/null
@@ -1,84 +0,0 @@
---- a/tools/overlord/SConscript
-+++ b/tools/overlord/SConscript
-@@ -19,7 +19,7 @@ Import('*')
- local_env = env.Clone()
- 
- if not local_env['IS_WINDOWS']:
--    prog = local_env.Program('escript-overlord', ['overlord.c'])
-+    prog = local_env.Program('escript-overlord', ['overlord.cpp'])
-     env.Alias('build_overlord', prog)
-     install_overlord = local_env.Install(local_env['bininstall'], prog)
-     env.Alias('install_overlord', install_overlord) #oh no! our freedoms!
---- /dev/null
-+++ b/tools/overlord/overlord.cpp
-@@ -0,0 +1,70 @@
-+/*****************************************************************************
-+*
-+* Copyright (c) 2003-2016 by The University of Queensland
-+* http://www.uq.edu.au
-+*
-+* Primary Business: Queensland, Australia
-+* Licensed under the Apache License, version 2.0
-+* http://www.apache.org/licenses/LICENSE-2.0
-+*
-+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-+* Development 2012-2013 by School of Earth Sciences
-+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-+*
-+*****************************************************************************/
-+
-+#include <stdlib.h>
-+#include <unistd.h>
-+#include <stdio.h>
-+#include <string.h>
-+#include <errno.h>
-+#include <sys/socket.h>
-+#include <netinet/in.h>
-+#include <arpa/inet.h>
-+
-+int main(int argc, char **argv) {
-+    int key = 0, port = 0, sfd = 0;
-+    FILE *escript = NULL;
-+    struct sockaddr_in sa;
-+
-+    if (argc < 4) {
-+        fprintf(stderr, "Missing minimum arguments: %s port key cmd [args]\n",
-+                argv[0]);
-+        return 1;
-+    }
-+    key = atoi(argv[2]);
-+    port = atoi(argv[1]);
-+    
-+    
-+    sa.sin_family = AF_INET;
-+    sa.sin_port = htons(port);
-+    sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-+    memset(sa.sin_zero, '\0', sizeof(sa.sin_zero));
-+    
-+    sfd = socket(PF_INET, SOCK_STREAM, 0);
-+    if (sfd < 0) {
-+        perror("overlord socket creation failed");
-+        return 1;
-+    }
-+
-+    if (connect(sfd, (struct sockaddr*)&sa, sizeof(sa)) < 0) {
-+        perror("overlord connect() call failed");
-+        return 1;
-+    }
-+    
-+    escript = fdopen(sfd, "w");
-+    if (escript == NULL) {
-+        perror("overlord failed to open file descriptor for writes");
-+        return 1;
-+    }
-+    if (fwrite(&key, sizeof(int), 1, escript) != 1) {
-+        fprintf(stderr, "overlord failed to initialise communication with escript\n");
-+        return 1;
-+    }
-+        
-+    fflush(escript);
-+    execvp(argv[3], argv+3);
-+    perror("overlord exec failed");
-+    return 1;
-+}
-+
diff --git a/debian/patches/series b/debian/patches/series
index 35f21f1..e69de29 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +0,0 @@
-10_use_python35.patch
-11_use_c++03
-13_cpp_overlord
diff --git a/doc/SConscript b/doc/SConscript
index e8041fa..2100f9e 100644
--- a/doc/SConscript
+++ b/doc/SConscript
@@ -165,8 +165,6 @@ example_deps.append('inversion/data/QLDWestMagnetic.nc')
 example_deps.append('inversion/data/HalfSphere_v1.4.msh')
 
 
-
-
 if len(skipped_tests)>0:
     env['warnings'].append("gmsh not available. Skipping tests %s!"%' '.join(skipped_tests))
 
diff --git a/doc/docguide.tex b/doc/docguide.tex
index 823787c..6dbffad 100644
--- a/doc/docguide.tex
+++ b/doc/docguide.tex
@@ -30,8 +30,7 @@ Here is a rough guide to what goes where.
 \begin{tabular}{rp{11cm}}
  \textbf{install.pdf} & ``Installation guide for \emph{esys-Escript}'': 
  Instructions for compiling \emph{escript} for your system from its
- source code. 
- Also briefly covers installing \texttt{.deb} packages for Debian and Ubuntu.\\
+ source code.  \\
  &\\
  \textbf{cookbook.pdf} & ``The \textit{escript} COOKBOOK'':
  An introduction to \emph{escript} for new users from a geophysics perspective.\\
diff --git a/doc/doxygen/doxygen_esys b/doc/doxygen/doxygen_esys
index 41d41e1..6455b23 100644
--- a/doc/doxygen/doxygen_esys
+++ b/doc/doxygen/doxygen_esys
@@ -754,11 +754,9 @@ WARN_LOGFILE           =
 # Note: If this tag is empty the current directory is searched.
 
 INPUT                  = escriptcore/src \
-                         esysUtils/src \
                          dudley/src \
                          finley/src \
                          paso/src \
-                         pasowrap/src \
                          pythonMPI/src \
                          ripley/src \
                          speckley/src \
@@ -2363,3 +2361,4 @@ GENERATE_LEGEND        = YES
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
+
diff --git a/doc/examples/SConscript b/doc/examples/SConscript
index a2f2057..0b0aae3 100644
--- a/doc/examples/SConscript
+++ b/doc/examples/SConscript
@@ -14,7 +14,7 @@
 #
 ##############################################################################
 
-from grouptest import *
+from grouptest import GroupTest
 import os
 Import('*')
 local_env = env.Clone()
@@ -24,57 +24,42 @@ example_files = example_files_allow_mpi + example_files_no_mpi
 src_dir = local_env.Dir('.').srcnode().abspath
 test_dir= Dir('test', local_env.Dir('.'))
 cc=Command(test_dir, [], Mkdir("$TARGET"))
-# Execute(Delete(test_dir.srcnode().abspath))
-# Execute(Mkdir(test_dir.srcnode().abspath))
-#
-#   create copy of all non-py files:
-#
-data_files=[]
-for i in example_files:
-    if not i.endswith('.py'):
-        f_in=File(os.path.join(src_dir,i))
-        f_out=File(i,test_dir)
-        data_files.append(Command(f_out, f_in, Copy("$TARGET", "$SOURCE")))
 
+# create copy of all non-py files:
 # We need this because of cblib.py which needs to be present for other tests
 # to work
+data_files=[]
 for i in example_deps:
     f_in=File(os.path.join(src_dir,i))
     f_out=File(i,test_dir)
     data_files.append(Command(f_out, f_in, Copy("$TARGET", "$SOURCE")))
-Depends(cc, data_files)
 
-np=int(local_env['ENV']['ESCRIPT_NUM_NODES'])
 programs=[]
 copies=[]
 for i in example_files:
-#    if i.endswith('.py') and (np==1 or not i in example_files_no_mpi):
-    if i.endswith('.py') and (local_env['mpi']=='none' or not i in example_files_no_mpi):
+    if local_env['mpi']=='none' or not i in example_files_no_mpi:
         f_in=File(os.path.join(src_dir,i))
         f_out=File(i, test_dir)
         c=Command(f_out, f_in, Copy("$TARGET", "$SOURCE"))
         copies+=c
         programs+=local_env.RunPyExample(c)
 
-#env.Alias('build_py_tests', programs)
 env.Alias('build_py_tests', data_files)
 env.Alias('build_py_tests', copies)
 env.Alias('py_tests', programs)
 Depends(programs, data_files)
 Depends(cc, data_files)
 
-# Add a group of tests
-# find all directories:
-for single_processor_only in [True, False]:
-   dirs=[]
-   for i in example_files:
-      if i.endswith('.py'): 
-         if single_processor_only == (i in example_files_no_mpi):
-               d=os.path.split(i)[0]
-               if not d in dirs: dirs.append(d)
-   for d in dirs:
-      runs=[]
-      for i in example_files:
-          if i.startswith(d) and i.endswith('.py') and (single_processor_only == (i in example_files_no_mpi)): runs.append(os.path.split(i)[1])
-      TestGroups.append(GroupTest("$PYTHONRUNNER ",(),"",os.path.join("$BATCH_ROOT/doc/examples",d),runs, single_processor_only=single_processor_only))
+# Add group of tests
+dirs = set([os.path.split(i)[0] for i in example_files])
+for d in dirs:
+    single_runs=[]
+    runs=[]
+    for i in example_files_no_mpi:
+        if i.startswith(d):
+            single_runs.append(os.path.split(i)[1])
+    for i in example_files_allow_mpi:
+        if i.startswith(d):
+            runs.append(os.path.split(i)[1])
+    TestGroups.append(GroupTest(d, "$PYTHONRUNNER ", (), "", os.path.join("$BATCH_ROOT/doc/examples",d), runs, single_runs))
 
diff --git a/doc/examples/cookbook/example01a.py b/doc/examples/cookbook/example01a.py
index 1d0cd6d..b29e181 100644
--- a/doc/examples/cookbook/example01a.py
+++ b/doc/examples/cookbook/example01a.py
@@ -31,6 +31,7 @@ Author: Antony Hallam antony.hallam at uqconnect.edu.au
 import sys
 #######################################################EXTERNAL MODULES
 # To solve the problem it is necessary to import the modules we require.
+import os
 from esys.escript import * # This imports everything from the escript library
 from esys.escript.unitsSI import * 
 from esys.escript.linearPDEs import LinearPDE # This defines LinearPDE as LinearPDE
diff --git a/doc/examples/cookbook/example03b.py b/doc/examples/cookbook/example03b.py
index 0d81395..adbff41 100644
--- a/doc/examples/cookbook/example03b.py
+++ b/doc/examples/cookbook/example03b.py
@@ -44,6 +44,7 @@ from esys.weipa import saveVTK
 # A useful unit handling package which will make sure all our units
 # match up in the equations under SI.
 from esys.escript.unitsSI import *
+import os
 try:
     # This imports the rectangle domain function 
     from esys.finley import Rectangle
diff --git a/doc/examples/cookbook/example08a.py b/doc/examples/cookbook/example08a.py
index ce0d852..39da9a9 100644
--- a/doc/examples/cookbook/example08a.py
+++ b/doc/examples/cookbook/example08a.py
@@ -101,7 +101,7 @@ if HAVE_FINLEY:
     src_length = 20; print("src_length = ",src_length)
     # set initial values for first two time steps with source terms
     y=U0*(cos(length(x-xc)*3.1415/src_length)+1)*whereNegative(length(x-xc)-src_length)
-    src_dir=numpy.array([0.,-1.]) # defines direction of point source as down
+    src_dir=np.array([0.,-1.]) # defines direction of point source as down
     y=y*src_dir
     mypde.setValue(y=y) #set the source as a function on the boundary
     # initial value of displacement at point source is constant (U0=0.01)
diff --git a/doc/examples/cookbook/example08b.py b/doc/examples/cookbook/example08b.py
index 92195b6..5d3b9d9 100644
--- a/doc/examples/cookbook/example08b.py
+++ b/doc/examples/cookbook/example08b.py
@@ -188,7 +188,7 @@ if HAVE_FINLEY:
     src_length = 40; print("src_length = ",src_length)
     # set initial values for first two time steps with source terms
     y=source[0]*(cos(length(x-xc)*3.1415/src_length)+1)*whereNegative(length(x-xc)-src_length)
-    src_dir=numpy.array([0.,1.]) # defines direction of point source as down
+    src_dir=np.array([0.,1.]) # defines direction of point source as down
     y=y*src_dir
     mypde.setValue(y=y) #set the source as a function on the boundary
     # turn lumping on for more efficient solving
diff --git a/doc/examples/cookbook/example09a.py b/doc/examples/cookbook/example09a.py
index 2747077..825f39f 100644
--- a/doc/examples/cookbook/example09a.py
+++ b/doc/examples/cookbook/example09a.py
@@ -154,7 +154,7 @@ if HAVE_FINLEY:
     yx=(cos(length(xb-xc)*3.1415/src_rad)+1)*whereNegative(length(xb-xc)-src_rad)
     stop=Scalar(0.0,FunctionOnBoundary(domain))
     stop.setTaggedValue("stop",1.0)
-    src_dir=numpy.array([0.,0.,1.0]) # defines direction of point source as down
+    src_dir=np.array([0.,0.,1.0]) # defines direction of point source as down
 
     mypde.setValue(y=source[0]*yx*src_dir*stop) #set the source as a function on the boundary
     # initial value of displacement at point source is constant (U0=0.01)
diff --git a/doc/examples/cookbook/example10m.py b/doc/examples/cookbook/example10m.py
index dbc7b6a..4c3238a 100644
--- a/doc/examples/cookbook/example10m.py
+++ b/doc/examples/cookbook/example10m.py
@@ -28,11 +28,12 @@ Author: Antony Hallam antony.hallam at uqconnect.edu.au
 
 ############################################################FILE HEADER
 # example10m.py
-# Create a simple 2D mesh, which is optimised for cells close to the 
+# Create a simple 2D mesh, which is optimised for cells close to the
 # source. Larger elements are used to decrease computational requirements
 # and to properly fullfil the boundary conditions.
 #
 #######################################################EXTERNAL MODULES
+from esys.escript import mkDir, getMPISizeWorld, hasFeature
 from esys.pycad import * #domain constructor
 from esys.pycad.extras import layer_cake
 from esys.pycad.gmsh import Design #Finite Element meshing package
@@ -43,18 +44,15 @@ try:
 except ImportError:
     print("Finley module not available")
     HAVE_FINLEY = False
-from esys.escript import mkDir, getMPISizeWorld
-import os
-import subprocess as sp
-########################################################MPI WORLD CHECK
-if getMPISizeWorld() > 1:
-        import sys
-        print("This example will not run in an MPI world.")
-        sys.exit(0)
 
-if HAVE_FINLEY:
-    # make sure path exists 
-    save_path= os.path.join("data","example10m") 
+########################################################MPI WORLD CHECK
+if getMPISizeWorld() > 1 or (hasFeature('mpi') and hasFeature('gmsh_mpi')):
+    print("This example will not run in an MPI world!")
+elif HAVE_FINLEY:
+    import os
+    import subprocess as sp
+    # make sure path exists
+    save_path = os.path.join("data","example10m")
     mkDir(save_path)
 
     ################################################BIG DOMAIN
@@ -119,3 +117,4 @@ if HAVE_FINLEY:
             os.path.join(save_path,"example10m_small.geo")+" "+
             os.path.join(save_path,"example10m_big.geo")+" -o "+
             os.path.join(save_path,"example10m.msh"),shell=True)
+
diff --git a/doc/examples/inversion/grav_ermapper.py b/doc/examples/inversion/grav_ermapper.py
index bee7439..a284790 100644
--- a/doc/examples/inversion/grav_ermapper.py
+++ b/doc/examples/inversion/grav_ermapper.py
@@ -74,21 +74,22 @@ def work():
   print("All done. Have a nice day.!")
 
 try:
-  import pyproj
-  havepyproj=True
+    import pyproj
+    HAVE_PYPROJ = True
 except ImportError:
-  havepyproj=False
+    HAVE_PYPROJ = False
 
 try:
-  import esys.ripley
-  HAVE_RIPLEY = True
+    import esys.ripley
+    HAVE_RIPLEY = True
 except ImportError:
-  HAVE_RIPLEY = False
+    HAVE_RIPLEY = False
 
 
-if havepyproj and HAVE_RIPLEY:
+if HAVE_PYPROJ and HAVE_RIPLEY:
   work()
 elif HAVE_RIPLEY:
   print("This example requires the pyproj package which does not appear to be accessible.")
 else:
   print("This example requires the ripley module, which is not available")
+
diff --git a/doc/examples/inversion/grav_netcdf.py b/doc/examples/inversion/grav_netcdf.py
index a6f3b88..27fb726 100644
--- a/doc/examples/inversion/grav_netcdf.py
+++ b/doc/examples/inversion/grav_netcdf.py
@@ -27,10 +27,7 @@ __url__="https://launchpad.net/escript-finley"
 from esys.downunder import *
 from esys.weipa import *
 from esys.escript import unitsSI as U
-from esys.escript import saveDataCSV,getEscriptParamInt
-
-
-haveNetcdf=(getEscriptParamInt("NETCDF_BUILD",0)==1)
+from esys.escript import saveDataCSV
 
 # Set parameters
 DATASET = 'data/GravitySmall.nc'
@@ -75,28 +72,26 @@ def work():
 
   saveDataCSV("result_gravity.csv", density=density, x=density.getFunctionSpace().getX())
   print("Results saved in result_gravity.csv")
-
   print("All done. Have a nice day!")
 
 try:
     import pyproj
+    HAVE_PYPROJ = True
 except ImportError:
-    print("This example requires pyproj to be installed.")
-    import sys
-    sys.exit(0)
+    HAVE_PYPROJ = False
 
 try:
-  import esys.ripley
-  HAVE_RIPLEY = True
+    import esys.ripley
+    HAVE_RIPLEY = True
 except ImportError:
-  HAVE_RIPLEY = False
+    HAVE_RIPLEY = False
 
 if 'NetCdfData' not in dir():
     print("This example requires scipy's netcdf support which does not appear to be installed.")
 elif not HAVE_RIPLEY:
     print("Ripley module not available")
-elif not haveNetcdf:
-    print("netCDF not available.")    
+elif not HAVE_PYPROJ:
+    print("This example requires pyproj to be installed.")
 else:
     work()
 
diff --git a/doc/examples/inversion/gravmag_netcdf.py b/doc/examples/inversion/gravmag_netcdf.py
index 901e0ff..da6785a 100644
--- a/doc/examples/inversion/gravmag_netcdf.py
+++ b/doc/examples/inversion/gravmag_netcdf.py
@@ -26,11 +26,9 @@ __url__="https://launchpad.net/escript-finley"
 # Import required modules
 from esys.downunder import *
 from esys.escript import unitsSI as U
-from esys.escript import saveDataCSV, getEscriptParamInt
+from esys.escript import saveDataCSV
 from esys.weipa import *
 
-haveNetcdf=(getEscriptParamInt("NETCDF_BUILD",0)==1)
-
 # Set parameters
 MAGNETIC_DATASET = 'data/MagneticSmall.nc'
 MAG_UNITS = U.Nano * U.Tesla
@@ -90,23 +88,22 @@ def work():
 
 try:
     import pyproj
+    HAVE_PYPROJ = True
 except ImportError:
-    print("This example requires pyproj to be installed.")
-    import sys
-    sys.exit(0)
+    HAVE_PYPROJ = False
 
 try:
-  import esys.ripley
-  HAVE_RIPLEY = True
+    import esys.ripley
+    HAVE_RIPLEY = True
 except ImportError:
-  HAVE_RIPLEY = False
+    HAVE_RIPLEY = False
 
 if 'NetCdfData' not in dir():
     print("This example requires scipy's netcdf support which does not appear to be installed.")
 elif not HAVE_RIPLEY:
     print("Ripley module not available")
-elif not haveNetcdf:
-    print("netCDF not available.")
+elif not HAVE_PYPROJ:
+    print("This example requires pyproj to be installed.")
 else:
     work()
 
diff --git a/doc/examples/inversion/gravmag_nodriver.py b/doc/examples/inversion/gravmag_nodriver.py
index 962a3f3..f03f864 100644
--- a/doc/examples/inversion/gravmag_nodriver.py
+++ b/doc/examples/inversion/gravmag_nodriver.py
@@ -33,7 +33,6 @@ from esys.escript import unitsSI as U
 from esys.escript import *
 from esys.weipa import *
 
-haveNetcdf=(getEscriptParamInt("NETCDF_BUILD",0)==1)
 try:
     import pyproj
     havePyProj=True
@@ -151,8 +150,6 @@ if 'NetCdfData' not in dir():
     print("This example requires scipy's netcdf support which does not appear to be installed.")
 elif not HAVE_RIPLEY:
     print("Ripley module not available")
-elif not haveNetcdf:
-    print("netCDF not available.")     
 elif not havePyProj:
     print("This example requires pyproj.")
 else:
diff --git a/doc/examples/inversion/mag_netcdf.py b/doc/examples/inversion/mag_netcdf.py
index 679289f..bbed4a6 100644
--- a/doc/examples/inversion/mag_netcdf.py
+++ b/doc/examples/inversion/mag_netcdf.py
@@ -27,10 +27,7 @@ __url__="https://launchpad.net/escript-finley"
 from esys.downunder import *
 from esys.weipa import *
 from esys.escript import unitsSI as U
-from esys.escript import saveDataCSV,getEscriptParamInt
-
-
-haveNetcdf=(getEscriptParamInt("NETCDF_BUILD",0)==1)
+from esys.escript import saveDataCSV
 
 # Set parameters
 DATASET = 'data/MagneticSmall.nc'
@@ -84,22 +81,21 @@ def work():
 
 try:
     import pyproj
+    HAVE_PYPROJ = True
 except ImportError:
-    print("This example requires pyproj to be installed.")
-    import sys
-    sys.exit(0)
+    HAVE_PYPROJ = False
 
 try:
-  import esys.ripley
-  HAVE_RIPLEY = True
+    import esys.ripley
+    HAVE_RIPLEY = True
 except ImportError:
-  HAVE_RIPLEY = False
+    HAVE_RIPLEY = False
 
 if 'NetCdfData' not in dir():
     print("This example requires scipy's netcdf support which does not appear to be installed.")
 elif not HAVE_RIPLEY:
     print("Ripley module required but not available")
-elif not haveNetcdf:
-    print("netCDF not available.")    
+elif not HAVE_PYPROJ:
+    print("This example requires pyproj to be installed.")
 else:
     work()
diff --git a/doc/examples/inversion/synthetic_HTI.py b/doc/examples/inversion/synthetic_HTI.py
index 81010b6..6d03821 100644
--- a/doc/examples/inversion/synthetic_HTI.py
+++ b/doc/examples/inversion/synthetic_HTI.py
@@ -23,191 +23,200 @@ __url__="https://launchpad.net/escript-finley"
 from esys.escript import *
 from esys.escript import unitsSI as U
 from esys.escript.pdetools import Locator
-from esys.speckley import Brick, Rectangle
 from esys.weipa import saveSilo
 from esys.downunder import Ricker, SimpleSEGYWriter, HTIWave
 from math import ceil
 from time import time
 
+try:
+    from esys.speckley import Brick, Rectangle
+    HAVE_SPECKLEY=True
+except ImportError:
+    HAVE_SPECKLEY=False
 
-DIM=2          # spatial dimension
+if HAVE_SPECKLEY:
+    DIM=2          # spatial dimension
 
 
-ORDER = 5
-ne_z= 20
+    ORDER = 5
+    ne_z= 20
 
-# layers from the bottom up:
-layers=[20*U.m, 180*U.m ]
-v_Ps=[i*U.km/U.sec for i in [3, 2.5]]
-v_Ss= [i*U.km/U.sec for i in [3, 2]]
-rhos=[i*U.kg/U.m**3 for i in [2.6, 2.1]]
-epss=[0, .110]
-gammas=[0, 0.035]
-deltas=[0, 0.255]
-src_dir=[0,0,1]
+    # layers from the bottom up:
+    layers=[20*U.m, 180*U.m ]
+    v_Ps=[i*U.km/U.sec for i in [3, 2.5]]
+    v_Ss= [i*U.km/U.sec for i in [3, 2]]
+    rhos=[i*U.kg/U.m**3 for i in [2.6, 2.1]]
+    epss=[0, .110]
+    gammas=[0, 0.035]
+    deltas=[0, 0.255]
+    src_dir=[0,0,1]
 
-t_end=0.01*U.sec #increase this end time as desired
-frq=50.*U.Hz
-sampling_interval=2*U.msec
-numRcvPerLine=101
-rangeRcv=200*U.m
+    t_end=0.01*U.sec #increase this end time as desired
+    frq=50.*U.Hz
+    sampling_interval=2*U.msec
+    numRcvPerLine=101
+    rangeRcv=200*U.m
 
 
-# location of source
-if DIM == 2:
-    src_locations = [(0, 0)]
-else:
-    src_locations = [(0, 0, 0)]
-
-# domain dimensions
-width_x=rangeRcv
-width_y=width_x
-depth=sum(layers)
-#
-# create array
-#
-receiver_line=[i * (rangeRcv/(numRcvPerLine-1)) for i in range(numRcvPerLine)]
-#
-#   set source location with tag "source""
-#
-src_tags=["source"]
-
-src_loc_2D=(0, 0)
-
-
-
-#
-#   create sensor arrays:
-#
-# East-west line of receivers
-rcvEW_locations=[]
-# North-south line of receivers (if 3 dimensional problem)
-rcvNS_locations=[]
-rgEW=[]
-rgNS=[]
-mid_point=receiver_line[len(receiver_line)//2]
-
-for ix in range(len(receiver_line)):
-    rgEW.append((receiver_line[ix], 0))
+    # location of source
     if DIM == 2:
-        rcvEW_locations.append((receiver_line[ix],  0))
+        src_locations = [(0, 0)]
     else:
-        rcvEW_locations.append((receiver_line[ix], 0, 0))
-        rcvNS_locations.append((0, receiver_line[ix], 0))
-        rgNS.append((0, receiver_line[ix]))
-# North-south line of receivers
-if DIM == 3:
-     for iy in range(len(receiver_line)):
-            rcv_locations.append((mid_point, receiver_line[iy],  0))
-            rg.append( (  mid_point, receiver_line[iy]) )
-#
-# create domain:
-#
-if DIM == 2:
-    domain = Rectangle(ORDER,
-            ceil(ne_z*width_x/depth), ne_z ,l0=width_x, l1=(-depth,0),
-            diracPoints=src_locations, diracTags=src_tags)
-    #suppress the x-component on the x boundary
-    q = whereZero(domain.getX()[0])*[1,0]
-else:
-    domain=Brick(ORDER,
-            ceil(ne_z*width_x/depth), ceil(ne_z*width_y/depth), ne_z,
-            l0=width_x, l1=width_y, l2=(-depth,0),
-            diracPoints=src_locations, diracTags=src_tags)
-    q = wherePositive(
-            #suppress the x-component on the x boundary
-            whereZero(domain.getX()[0])*[1,0,0]
-            + #logical or
-            #suppress the y-component on the y boundary at the source
-            whereZero(domain.getX()[1])*[0,1,0])
-
-# set up reciever locations
-locEW=Locator(domain,rcvEW_locations)
-tracerEW_x=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='x-displacement - east-west line')
-tracerEW_z=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='z-displacement - east-west line')
-if DIM==3:
-    locNS=Locator(domain,rcvNS_locations)
-    tracerEW_y=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
+        src_locations = [(0, 0, 0)]
+
+    # domain dimensions
+    width_x=rangeRcv
+    width_y=width_x
+    depth=sum(layers)
+    #
+    # create array
+    #
+    receiver_line=[i * (rangeRcv/(numRcvPerLine-1)) for i in range(numRcvPerLine)]
+    #
+    #   set source location with tag "source""
+    #
+    src_tags=["source"]
+
+    src_loc_2D=(0, 0)
+
+
+
+    #
+    #   create sensor arrays:
+    #
+    # East-west line of receivers
+    rcvEW_locations=[]
+    # North-south line of receivers (if 3 dimensional problem)
+    rcvNS_locations=[]
+    rgEW=[]
+    rgNS=[]
+    mid_point=receiver_line[len(receiver_line)//2]
+
+    for ix in range(len(receiver_line)):
+        rgEW.append((receiver_line[ix], 0))
+        if DIM == 2:
+            rcvEW_locations.append((receiver_line[ix],  0))
+        else:
+            rcvEW_locations.append((receiver_line[ix], 0, 0))
+            rcvNS_locations.append((0, receiver_line[ix], 0))
+            rgNS.append((0, receiver_line[ix]))
+    # North-south line of receivers
+    if DIM == 3:
+         for iy in range(len(receiver_line)):
+                rcv_locations.append((mid_point, receiver_line[iy],  0))
+                rg.append( (  mid_point, receiver_line[iy]) )
+    #
+    # create domain:
+    #
+    if DIM == 2:
+        domain = Rectangle(ORDER,
+                ceil(ne_z*width_x/depth), ne_z ,l0=width_x, l1=(-depth,0),
+                diracPoints=src_locations, diracTags=src_tags)
+        #suppress the x-component on the x boundary
+        q = whereZero(domain.getX()[0])*[1,0]
+    else:
+        domain=Brick(ORDER,
+                ceil(ne_z*width_x/depth), ceil(ne_z*width_y/depth), ne_z,
+                l0=width_x, l1=width_y, l2=(-depth,0),
+                diracPoints=src_locations, diracTags=src_tags)
+        q = wherePositive(
+                #suppress the x-component on the x boundary
+                whereZero(domain.getX()[0])*[1,0,0]
+                + #logical or
+                #suppress the y-component on the y boundary at the source
+                whereZero(domain.getX()[1])*[0,1,0])
+
+    # set up reciever locations
+    locEW=Locator(domain,rcvEW_locations)
+    tracerEW_x=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
             sampling_interval=sampling_interval,
             text='x-displacement - east-west line')
-    tracerNS_x=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
-            sampling_interval=sampling_interval,
-            text='x-displacement - north-south line')
-    tracerNS_y=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+    tracerEW_z=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
             sampling_interval=sampling_interval,
-            text='y-displacement - north-south line')
-    tracerNS_z=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
-            sampling_interval=sampling_interval,
-            text='z-displacement - north-south line')
-if not tracerEW_x.obspy_available():
-    print("\nWARNING: obspy not available, SEGY files will not be written\n")
-elif getMPISizeWorld() > 1:
-    print("\nWARNING: SEGY files cannot be written with multiple processes\n")
-
-
-#======================================================================
-z=ReducedFunction(domain).getX()[DIM-1]
-z_bottom=-depth
-v_p=0
-v_s=0
-delta=0
-vareps=0
-gamma=0
-rho=0
-for l in range(len(layers)):
-    m=wherePositive(z-z_bottom)*whereNonPositive(z-(z_bottom+layers[l]))
-    v_p=v_p*(1-m)+v_Ps[l]*m
-    v_s=v_s*(1-m)+v_Ss[l]*m
-    rho=rho*(1-m)+rhos[l]*m
-    vareps=vareps*(1-m)+epss[l]*m
-    gamma=gamma*(1-m)+gammas[l]*m
-    delta=delta*(1-m)+deltas[l]*m
-    z_bottom+=layers[l]
-
-wl=Ricker(frq)
-dt=min((1./5.)*min(inf(domain.getSize()/v_p), inf(domain.getSize()/v_s)), wl.getTimeScale())
-
-sw=HTIWave(domain, v_p, v_s, wl, src_tags[0], source_vector = src_dir,
-        eps=vareps, gamma=gamma, delta=delta, rho=rho,
-        absorption_zone=None, absorption_cut=1e-2, lumping=True, dt=dt)
-sw.setQ(q)
-
-locEW=Locator(domain, rcvEW_locations)
-if DIM == 3:
-    locNS=Locator(domain, rcvNS_locations)
-
-mkDir('output')
-
-t=0.
-n=0
-k=0
-u=None
-while t < t_end:
-    start = time()
-    t,u = sw.update(t+sampling_interval)
-    tracerEW_x.addRecord(locEW(u[0]))
-    tracerEW_z.addRecord(locEW(u[DIM-1]))
+            text='z-displacement - east-west line')
     if DIM==3:
-           tracerEW_y.addRecord(locEW(u[1]))
-           tracerNS_x.addRecord(locNS(u[0]))
-           tracerNS_y.addRecord(locNS(u[1]))
-           tracerNS_z.addRecord(locNS(u[2]))
-    print(t, locEW(u[DIM-1])[len(rgEW)//2-4:len(rgEW)//2+1], wl.getValue(t))
-    k+=1
-    if k%5 == 0:
+        locNS=Locator(domain,rcvNS_locations)
+        tracerEW_y=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
+                sampling_interval=sampling_interval,
+                text='x-displacement - east-west line')
+        tracerNS_x=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+                sampling_interval=sampling_interval,
+                text='x-displacement - north-south line')
+        tracerNS_y=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+                sampling_interval=sampling_interval,
+                text='y-displacement - north-south line')
+        tracerNS_z=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+                sampling_interval=sampling_interval,
+                text='z-displacement - north-south line')
+    if not tracerEW_x.obspy_available():
+        print("\nWARNING: obspy not available, SEGY files will not be written\n")
+    elif getMPISizeWorld() > 1:
+        print("\nWARNING: SEGY files cannot be written with multiple processes\n")
+
+
+    #======================================================================
+    z=ReducedFunction(domain).getX()[DIM-1]
+    z_bottom=-depth
+    v_p=0
+    v_s=0
+    delta=0
+    vareps=0
+    gamma=0
+    rho=0
+    for l in range(len(layers)):
+        m=wherePositive(z-z_bottom)*whereNonPositive(z-(z_bottom+layers[l]))
+        v_p=v_p*(1-m)+v_Ps[l]*m
+        v_s=v_s*(1-m)+v_Ss[l]*m
+        rho=rho*(1-m)+rhos[l]*m
+        vareps=vareps*(1-m)+epss[l]*m
+        gamma=gamma*(1-m)+gammas[l]*m
+        delta=delta*(1-m)+deltas[l]*m
+        z_bottom+=layers[l]
+
+    wl=Ricker(frq)
+    dt=min((1./5.)*min(inf(domain.getSize()/v_p), inf(domain.getSize()/v_s)), wl.getTimeScale())
+
+    sw=HTIWave(domain, v_p, v_s, wl, src_tags[0], source_vector = src_dir,
+            eps=vareps, gamma=gamma, delta=delta, rho=rho,
+            absorption_zone=None, absorption_cut=1e-2, lumping=True, dt=dt)
+    sw.setQ(q)
+
+    locEW=Locator(domain, rcvEW_locations)
+    if DIM == 3:
+        locNS=Locator(domain, rcvNS_locations)
+
+    mkDir('output')
+
+    t=0.
+    n=0
+    k=0
+    u=None
+    while t < t_end:
+        start = time()
+        t,u = sw.update(t+sampling_interval)
+        tracerEW_x.addRecord(locEW(u[0]))
+        tracerEW_z.addRecord(locEW(u[DIM-1]))
+        if DIM==3:
+               tracerEW_y.addRecord(locEW(u[1]))
+               tracerNS_x.addRecord(locNS(u[0]))
+               tracerNS_y.addRecord(locNS(u[1]))
+               tracerNS_z.addRecord(locNS(u[2]))
+        print(t, locEW(u[DIM-1])[len(rgEW)//2-4:len(rgEW)//2+1], wl.getValue(t))
+        k+=1
+        if k%5 == 0:
+            saveSilo("output/normalHTI_%d.silo"%(n,), v_p=v_p, u=u, cycle=k, time=t)
+            n += 1
+    if k%5 != 0:
         saveSilo("output/normalHTI_%d.silo"%(n,), v_p=v_p, u=u, cycle=k, time=t)
-        n += 1
-if k%5 != 0:
-    saveSilo("output/normalHTI_%d.silo"%(n,), v_p=v_p, u=u, cycle=k, time=t)
-if tracerEW_x.obspy_available() and getMPISizeWorld() == 1:
-    tracerEW_x.write('output/lineEW_x.sgy')
-    tracerEW_z.write('output/lineEW_z.sgy')
-    if DIM == 3: 
-        tracerEW_y.write('output/lineEW_y.sgy')
-        tracerNS_x.write('output/lineNS_x.sgy')
-        tracerNS_y.write('output/lineNS_y.sgy')
-        tracerNS_z.write('output/lineNS_z.sgy')
+    if tracerEW_x.obspy_available() and getMPISizeWorld() == 1:
+        tracerEW_x.write('output/lineEW_x.sgy')
+        tracerEW_z.write('output/lineEW_z.sgy')
+        if DIM == 3: 
+            tracerEW_y.write('output/lineEW_y.sgy')
+            tracerNS_x.write('output/lineNS_x.sgy')
+            tracerNS_y.write('output/lineNS_y.sgy')
+            tracerNS_z.write('output/lineNS_z.sgy')
+
+else: # no speckley
+    print("The Speckley module is not available")
+
diff --git a/doc/examples/inversion/synthetic_TTI.py b/doc/examples/inversion/synthetic_TTI.py
index 88f10c6..618a198 100644
--- a/doc/examples/inversion/synthetic_TTI.py
+++ b/doc/examples/inversion/synthetic_TTI.py
@@ -23,141 +23,152 @@ __url__="https://launchpad.net/escript-finley"
 from esys.escript import *
 from esys.escript import unitsSI as U
 from esys.escript.pdetools import Locator
-from esys.speckley import Rectangle
 from esys.weipa import saveSilo
 from esys.downunder import Ricker, TTIWave, SimpleSEGYWriter
 from math import ceil
 import time
+try:
+    from esys.speckley import Rectangle
+    HAVE_SPECKLEY=True
+except ImportError:
+    HAVE_SPECKLEY=False
 
-# these are the layers from the top down
-layers = [     400*U.m         ,    100*U.m  ,        1.*U.km,         ]
-v_P=     [    2.86* U.km/U.sec ,    1.5 * U.km/U.sec, 2.86 * U.km/U.sec     ]
-v_S=     [     1.79 * U.km/U.sec ,    0.7* U.km/U.sec, 1.8*U.km/U.sec     ]
-eps =    [    0.               ,    0.5,               0.1             ]
-delta=   [    0.               ,    0.5            ,    0.              ]
-tilt=    [    0.               ,    0.             ,    0.              ]
-rho=     [ 2000 * U.kg/U.m**3  , 2000 * U.kg/U.m**3, 2000 * U.kg/U.m**3 ]
-#
-#   other input:
-#
-t_end=0.008*U.sec                   #only this low for testing purposes
-frq=10.*U.Hz                         #  dominant frequnce in the Ricker (maximum frequence ~ 2 * frq)
-sampling_interval=4*U.msec          # sampling interval
-ne_z=None                           # number of elements in vertical direction, if none it is guessed 
-n_out = 5                         # a silo file is created every n_out's sample
-absorption_zone=100*U.m             # absorbtion zone to be added in horizantal direction to the area covered by receiver line 
-                                    # and subtracted from the lowest layer.
-# defines the receiver line 
-rangeRcv=800*U.m                    # width of the receveiver line
-numRcvPerLine=101                   # total number of receiver
-src_id=numRcvPerLine//2              # location of source in crossing array lines with in 0..numRcvInLine 
-lumping = True
-src_dir=[0,1]
+if HAVE_SPECKLEY:
+    # these are the layers from the top down
+    layers = [     400*U.m         ,    100*U.m  ,        1.*U.km,         ]
+    v_P=     [    2.86* U.km/U.sec ,    1.5 * U.km/U.sec, 2.86 * U.km/U.sec     ]
+    v_S=     [     1.79 * U.km/U.sec ,    0.7* U.km/U.sec, 1.8*U.km/U.sec     ]
+    eps =    [    0.               ,    0.5,               0.1             ]
+    delta=   [    0.               ,    0.5            ,    0.              ]
+    tilt=    [    0.               ,    0.             ,    0.              ]
+    rho=     [ 2000 * U.kg/U.m**3  , 2000 * U.kg/U.m**3, 2000 * U.kg/U.m**3 ]
+    #
+    #   other input:
+    #
+    t_end=0.008*U.sec                   # only this low for testing purposes
+    frq=10.*U.Hz                        # dominant frequnce in the Ricker (maximum frequence ~ 2 * frq)
+    sampling_interval=4*U.msec          # sampling interval
+    ne_z=None                           # number of elements in vertical direction, if none it is guessed 
+    n_out = 5                         # a silo file is created every n_out's sample
+    absorption_zone=100*U.m             # absorbtion zone to be added in horizontal direction to the area covered by receiver line 
+                                        # and subtracted from the lowest layer.
+    # defines the receiver line 
+    rangeRcv=800*U.m                    # width of the receiver line
+    numRcvPerLine=101                   # total number of receivers
+    src_id=numRcvPerLine//2             # location of source in crossing array lines with in 0..numRcvInLine 
+    lumping = True
+    src_dir=[0,1]
 
-# domain dimension
-width_x=rangeRcv + 4*absorption_zone
-depth=sum(layers)
-if ne_z is None:
-    ne_z=int(ceil(depth*(2*frq)/min(v_P)))
-ne_x=int(ceil(ne_z*width_x/depth))
-#
-# create receiver array 
-#
-receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
-#
-#   set source location with tag "source""
-#
-src_tags=["source"]
-src_locations = [ (receiver_line[src_id], depth)]
-srcloc=(receiver_line[src_id], 0.)
-#
-#   output
-#
-print("%s"%(time.asctime(),))
-print("ne_x = %s"%(ne_x,))
-print("ne_z = %s"%(ne_z,))
-print("width = %s m"%(width_x,))
-print("depth = %s m"%(depth, ))
-print("absorption_zone = %s m"%(absorption_zone, ))
-print("sampling interval = %s ms"%(sampling_interval/U.msec,))
-print("t_end = %s sec"%(t_end,))
-print("ricker dominant freqency = %s Hz"%(frq,))
-print("length of receiver line = %s ms"%(rangeRcv,))
-print("number of receivers = %s"%(numRcvPerLine,))
-print("first receiver location = %s m"%(receiver_line[0],))
-print("last receiver location = %s m"%(receiver_line[-1],))
-print("source location = %s m"%(src_locations[0][0],))
-print("source orientation = %s"%(src_dir,))
-print("matrix lumping = %s"%(lumping,))
-print("Layer\tV_p\tV_s\teps\tdelta\ttilt\trho")
-for i in range(len(layers)):
-    print("%s\t%s\t%s\t%s\t%s\t%s\t%s"%( layers[i], v_P[i], v_S[i], eps[i], delta[i], tilt[i], rho[i]))
-#
-# create domain:
-#
-order = 5
-domain=Rectangle(order, ne_x,ne_z, l0=width_x, l1=depth, 
-            diracPoints=src_locations, diracTags=src_tags, d0=getMPISizeWorld())
-#
-# create the wavelet:
-#
-wl=Ricker(frq)
-#
-#======================================================================
-#
-#  set 
-#
-z=ReducedFunction(domain).getX()[1]
-z_top=0
-V_P=0
-V_S=0
-Delta=0
-Eps=0
-Tilt=0
-Rho=0
-z_top=depth
+    # domain dimension
+    width_x=rangeRcv + 4*absorption_zone
+    depth=sum(layers)
+    if ne_z is None:
+        ne_z=int(ceil(depth*(2*frq)/min(v_P)))
+        if getMPISizeWorld() > 10:
+            ne_z = 2*ne_z-1
+    ne_x=int(ceil(ne_z*width_x/depth))
+    #
+    # create receiver array 
+    #
+    receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
+    #
+    #   set source location with tag "source""
+    #
+    src_tags=["source"]
+    src_locations = [ (receiver_line[src_id], depth)]
+    srcloc=(receiver_line[src_id], 0.)
+    #
+    #   output
+    #
+    print("%s"%(time.asctime(),))
+    print("ne_x = %s"%(ne_x,))
+    print("ne_z = %s"%(ne_z,))
+    print("width = %s m"%(width_x,))
+    print("depth = %s m"%(depth, ))
+    print("absorption_zone = %s m"%(absorption_zone, ))
+    print("sampling interval = %s ms"%(sampling_interval/U.msec,))
+    print("t_end = %s sec"%(t_end,))
+    print("ricker dominant freqency = %s Hz"%(frq,))
+    print("length of receiver line = %s ms"%(rangeRcv,))
+    print("number of receivers = %s"%(numRcvPerLine,))
+    print("first receiver location = %s m"%(receiver_line[0],))
+    print("last receiver location = %s m"%(receiver_line[-1],))
+    print("source location = %s m"%(src_locations[0][0],))
+    print("source orientation = %s"%(src_dir,))
+    print("matrix lumping = %s"%(lumping,))
+    print("Layer\tV_p\tV_s\teps\tdelta\ttilt\trho")
+    for i in range(len(layers)):
+        print("%s\t%s\t%s\t%s\t%s\t%s\t%s"%( layers[i], v_P[i], v_S[i], eps[i], delta[i], tilt[i], rho[i]))
+    #
+    # create domain:
+    #
+    order = 5
+    domain=Rectangle(order, ne_x,ne_z, l0=width_x, l1=depth, 
+                diracPoints=src_locations, diracTags=src_tags, d0=getMPISizeWorld())
+    #
+    # create the wavelet:
+    #
+    wl=Ricker(frq)
+    #
+    #======================================================================
+    #
+    #  set 
+    #
+    z=ReducedFunction(domain).getX()[1]
+    z_top=0
+    V_P=0
+    V_S=0
+    Delta=0
+    Eps=0
+    Tilt=0
+    Rho=0
+    z_top=depth
+
+    for l in range(len(layers)):
+        m=whereNonPositive(z-z_top)*wherePositive(z-(z_top-layers[l]))
+        V_P = V_P     * (1-m)  + v_P[l]  * m
+        V_S = V_S     * (1-m)  + v_S[l]  * m
+        Delta = Delta * (1-m)  + delta[l]* m
+        Eps = Eps     * (1-m)  + eps[l]  * m
+        Tilt = Tilt   * (1-m)  + tilt[l] * m
+        Rho = Rho     * (1-m)  + rho[l]  * m
+        z_top-=layers[l]
+
+    sw=TTIWave(domain, V_P, V_S, wl, src_tags[0], source_vector = src_dir,
+                    eps=Eps, delta=Delta, rho=Rho, theta=Tilt,
+                    absorption_zone=absorption_zone, absorption_cut=1e-2, lumping=lumping)
 
-for l in range(len(layers)):
-    m=whereNonPositive(z-z_top)*wherePositive(z-(z_top-layers[l]))
-    V_P = V_P     * (1-m)  + v_P[l]  * m
-    V_S = V_S     * (1-m)  + v_S[l]  * m
-    Delta = Delta * (1-m)  + delta[l]* m
-    Eps = Eps     * (1-m)  + eps[l]  * m
-    Tilt = Tilt   * (1-m)  + tilt[l] * m
-    Rho = Rho     * (1-m)  + rho[l]  * m
-    z_top-=layers[l]
+    srclog=Locator(domain, [ (r , depth) for r in receiver_line ] )
+    grploc=[ (x[0], 0.) for x in srclog.getX() ]
 
-sw=TTIWave(domain, V_P, V_S, wl, src_tags[0], source_vector = src_dir,
-                eps=Eps, delta=Delta, rho=Rho, theta=Tilt,
-                absorption_zone=absorption_zone, absorption_cut=1e-2, lumping=lumping)
+    tracer_x=SimpleSEGYWriter(receiver_group=grploc, source=srcloc, sampling_interval=sampling_interval, text='x-displacement')
+    tracer_z=SimpleSEGYWriter(receiver_group=grploc, source=srcloc, sampling_interval=sampling_interval, text='z-displacement')
 
-srclog=Locator(domain, [ (r , depth) for r in receiver_line ] )
-grploc=[ (x[0], 0.) for x in srclog.getX() ]
+    if not tracer_x.obspy_available():
+        print("\nWARNING: obspy not available, SEGY files will not be written\n")
+    elif getMPISizeWorld() > 1:
+        print("\nWARNING: SEGY files cannot be written with multiple processes\n")
 
-tracer_x=SimpleSEGYWriter(receiver_group=grploc, source=srcloc, sampling_interval=sampling_interval, text='x-displacement')
-tracer_z=SimpleSEGYWriter(receiver_group=grploc, source=srcloc, sampling_interval=sampling_interval, text='z-displacement')
+    t=0.
+    mkDir('output')
+    n=0
+    k_out=0
+    print("calculation starts @ %s"%(time.asctime(),))
+    while t < t_end:
+            t,u = sw.update(t+sampling_interval)
+            tracer_x.addRecord(srclog(u[0]))
+            tracer_z.addRecord(srclog(u[1]))
+            print("t=%s, src=%s: \t %s \t %s \t %s"%(t, wl.getValue(t),srclog(u[1])[0], srclog(u[1])[src_id], srclog(u[1])[-1]))
+            if not n_out is None and n%n_out == 0:
+                print("time step %s written to file %s"%(n_out, "output/u_%d.silo"%(k_out,)))
+                saveSilo("output/u_%d.silo"%(k_out,), u=u)
+                k_out+=1
+            n+=1
+    if tracer_x.obspy_available() and getMPISizeWorld() == 1:
+        tracer_x.write('output/lineX.sgy')
+        tracer_z.write('output/lineZ.sgy')
+    print("calculation completed @ %s"%(time.asctime(),))
 
-if not tracer_x.obspy_available():
-    print("\nWARNING: obspy not available, SEGY files will not be written\n")
-elif getMPISizeWorld() > 1:
-    print("\nWARNING: SEGY files cannot be written with multiple processes\n")
+else: # no speckley
+    print("The Speckley module is not available")
 
-t=0.
-mkDir('output')
-n=0
-k_out=0
-print("calculation starts @ %s"%(time.asctime(),))
-while t < t_end:
-        t,u = sw.update(t+sampling_interval)
-        tracer_x.addRecord(srclog(u[0]))
-        tracer_z.addRecord(srclog(u[1]))
-        print("t=%s, src=%s: \t %s \t %s \t %s"%(t, wl.getValue(t),srclog(u[1])[0], srclog(u[1])[src_id], srclog(u[1])[-1]))
-        if not n_out is None and n%n_out == 0:
-            print("time step %s written to file %s"%(n_out, "output/u_%d.silo"%(k_out,)))
-            saveSilo("output/u_%d.silo"%(k_out,), u=u)
-            k_out+=1
-        n+=1
-if tracer_x.obspy_available() and getMPISizeWorld() == 1:
-    tracer_x.write('output/lineX.sgy')
-    tracer_z.write('output/lineZ.sgy')
-print("calculation completed @ %s"%(time.asctime(),))
diff --git a/doc/examples/inversion/synthetic_VTI.py b/doc/examples/inversion/synthetic_VTI.py
index 9a49527..c9b14a4 100644
--- a/doc/examples/inversion/synthetic_VTI.py
+++ b/doc/examples/inversion/synthetic_VTI.py
@@ -23,150 +23,159 @@ __url__="https://launchpad.net/escript-finley"
 from esys.escript import *
 from esys.escript import unitsSI as U
 from esys.escript.pdetools import Locator
-from esys.speckley import Brick, Rectangle
 from esys.weipa import saveSilo
 from esys.downunder import Ricker, VTIWave, SimpleSEGYWriter
 from math import ceil
 
+try:
+    from esys.speckley import Brick, Rectangle
+    HAVE_SPECKLEY=True
+except ImportError:
+    HAVE_SPECKLEY=False
+
+if HAVE_SPECKLEY:
+    DIM=2          # spatial dimension
+
+    depth=1*U.km    # depth 
+    v_p_top=1.5*U.km/U.sec
+    v_p_bottom=3*U.km/U.sec
+    absorption_zone=100*U.m
+    ne_z=50.
+
+    reflector_at=0.5*depth
+
+
+    t_end=0.008*U.sec #only this low for testing purposes
+    frq=8.*U.Hz
+    sampling_interval=4*U.msec
+    numRcvPerLine=101
+    rangeRcv=800*U.m
+
+    # location of source in crossing array lines with in 0..numRcvInLine one needs to be None
+    srcEW=numRcvPerLine//2
+    srcNS=None
+
+    # dommain dimension
+    width_x=rangeRcv + 4*absorption_zone
+    width_y=width_x
+    #
+    # create array 
+    #
+    receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
+    #
+    #   set source location with tag "source""
+    #
+    src_tags=["source"]
+    if DIM == 2:
+       src_locations = [ (receiver_line[srcEW], depth)]
+       src_loc_2D=(receiver_line[srcEW], 0.)
+    else:
+       if srcEW:
+          srcNS=numRcvPerLine//2
+       elif srcNS:
+          srcEW=numRcvPerLine//2
+       else:
+           raise ValueError("on of the variables srcEW or srcNS must be None!")
+       src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
+       src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
+    #
+    #   create sensor arrays:
+    #
+    # East-west line of receiver
+    rcvEW_locations=[]
+    rgEW=[]
+    mid_point=receiver_line[len(receiver_line)//2]
+
+    for ix in range(len(receiver_line)):
+            if DIM == 2:
+                rcvEW_locations.append((receiver_line[ix], depth))
+                rgEW.append( ( receiver_line[ix], 0.) ) 
+            else:
+               rcvEW_locations.append((receiver_line[ix], mid_point, depth))
+               rgEW.append( ( receiver_line[ix], mid_point) ) 
+    # North-south line of receiver
+    if DIM == 3:
+       rcvNS_locations=[]
+       rgNS=[]
+
+       for iy in range(len(receiver_line)):
+           rcvNS_locations.append((mid_point, receiver_line[iy],  depth))
+           rgNS.append( (  mid_point, receiver_line[iy]) ) 
+    #
+    # create domain:
+    #
+    order = 5
+    if DIM == 2:
+        domain=Rectangle(order, ceil(ne_z*width_x/depth),ne_z,l0=width_x,l1=depth, 
+                    diracPoints=src_locations, diracTags=src_tags)
+    else:
+        domain=Brick(order, ceil(ne_z*width_x/depth), ceil(ne_z*width_y/depth),
+                    ne_z, l0=width_x, l1=width_y, l2=depth,
+                    diracPoints=src_locations, diracTags=src_tags)
+    wl=Ricker(frq)
+
+    #======================================================================
+    # m=whereNegative(Function(domain).getX()[DIM-1]-reflector_at)
+    # v_p=v_p_bottom*m+v_p_top*(1-m)
+    v_p=2*U.km/U.sec
+    v_s=0.9*U.km/U.sec
+    vareps=0.1*0
+    gamma=0.15*0
+    delta=0.05*0
+    rho=2000*U.kg/U.m**3
+    src_dir=[0,0,1]
+
+    sw=VTIWave(domain, v_p, v_s, wl, src_tags[0], source_vector = src_dir,
+                    eps=vareps, gamma=gamma, delta=delta, rho=rho,
+                    absorption_zone=300*U.m, absorption_cut=1e-2, lumping=True)
+
+    locEW=Locator(domain,rcvEW_locations)
+    tracerEW_x=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval, text='x-displacement - east-west line')
+    tracerEW_z=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval, text='z-displacement - east-west line')
+    if DIM==3:
+        locNS=Locator(domain,rcvNS_locations)
+        tracerEW_y=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
+            sampling_interval=sampling_interval,
+            text='x-displacement - east-west line')
+        tracerNS_x=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+            sampling_interval=sampling_interval,
+            text='x-displacement - north-south line')
+        tracerNS_y=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+            sampling_interval=sampling_interval,
+            text='y-displacement - north-south line')
+        tracerNS_z=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
+            sampling_interval=sampling_interval,
+            text='z-displacement - north-south line')
+    if not tracerEW_x.obspy_available():
+        print("\nWARNING: obspy not available, SEGY files will not be written\n")
+    elif getMPISizeWorld() > 1:
+        print("\nWARNING: SEGY files cannot be written with multiple processes\n")
+
+    t=0.
+    mkDir('tmp')
+    n=0
+    while t < t_end:
+            t,u = sw.update(t+sampling_interval)
+            tracerEW_x.addRecord(locEW(u[0]))
+            tracerEW_z.addRecord(locEW(u[DIM-1]))
+            if DIM==3:
+                   tracerEW_y.addRecord(locEW(u[1]))
+                   tracerNS_x.addRecord(locNS(u[0]))
+                   tracerNS_y.addRecord(locNS(u[1]))
+                   tracerNS_z.addRecord(locNS(u[2]))
+            print(t, locEW(u[DIM-1])[len(rgEW)//2-4:len(rgEW)//2+1], wl.getValue(t))
+            #if n%5 == 0 : saveSilo("tmp/u_%d.silo"%(n/5,), u=u)
+            saveSilo("tmp/u_%d.silo"%(n,), u=u, cycle=n, time=t)
+            n+=1
+    if tracerEW_x.obspy_available() and getMPISizeWorld() == 1:
+        tracerEW_x.write('lineEW_x.sgy')
+        tracerEW_z.write('lineEW_z.sgy')
+        if DIM == 3: 
+            tracerEW_y.write('lineEW_y.sgy')
+            tracerNS_x.write('lineNS_x.sgy')
+            tracerNS_y.write('lineNS_y.sgy')
+            tracerNS_z.write('lineNS_z.sgy')
+
+else: # no speckley
+    print("The Speckley module is not available")
 
-DIM=2          # spatial dimension
-
-depth=1*U.km    # depth 
-v_p_top=1.5*U.km/U.sec
-v_p_bottom=3*U.km/U.sec
-absorption_zone=100*U.m
-ne_z=50.
-
-reflector_at=0.5*depth
-
-
-t_end=0.008*U.sec #only this low for testing purposes
-frq=8.*U.Hz
-sampling_interval=4*U.msec
-numRcvPerLine=101
-rangeRcv=800*U.m
-
-# location of source in crossing array lines with in 0..numRcvInLine one needs to be None
-srcEW=numRcvPerLine//2
-srcNS=None
-
-# dommain dimension
-width_x=rangeRcv + 4*absorption_zone
-width_y=width_x
-#
-# create array 
-#
-receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
-#
-#   set source location with tag "source""
-#
-src_tags=["source"]
-if DIM == 2:
-   src_locations = [ (receiver_line[srcEW], depth)]
-   src_loc_2D=(receiver_line[srcEW], 0.)
-else:
-   if srcEW:
-      srcNS=numRcvPerLine//2
-   elif srcNS:
-      srcEW=numRcvPerLine//2
-   else:
-       raise ValueError("on of the variables srcEW or srcNS must be None!")
-   src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
-   src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
-#
-#   create sensor arrays:
-#
-# East-west line of receiver
-rcvEW_locations=[]
-rgEW=[]
-mid_point=receiver_line[len(receiver_line)//2]
-
-for ix in range(len(receiver_line)):
-        if DIM == 2:
-            rcvEW_locations.append((receiver_line[ix], depth))
-            rgEW.append( ( receiver_line[ix], 0.) ) 
-        else:
-           rcvEW_locations.append((receiver_line[ix], mid_point, depth))
-           rgEW.append( ( receiver_line[ix], mid_point) ) 
-# North-south line of receiver
-if DIM == 3:
-   rcvNS_locations=[]
-   rgNS=[]
-
-   for iy in range(len(receiver_line)):
-       rcvNS_locations.append((mid_point, receiver_line[iy],  depth))
-       rgNS.append( (  mid_point, receiver_line[iy]) ) 
-#
-# create domain:
-#
-order = 5
-if DIM == 2:
-    domain=Rectangle(order, ceil(ne_z*width_x/depth),ne_z,l0=width_x,l1=depth, 
-                diracPoints=src_locations, diracTags=src_tags)
-else:
-    domain=Brick(order, ceil(ne_z*width_x/depth), ceil(ne_z*width_y/depth),
-                ne_z, l0=width_x, l1=width_y, l2=depth,
-                diracPoints=src_locations, diracTags=src_tags)
-wl=Ricker(frq)
-
-#======================================================================
-# m=whereNegative(Function(domain).getX()[DIM-1]-reflector_at)
-# v_p=v_p_bottom*m+v_p_top*(1-m)
-v_p=2*U.km/U.sec
-v_s=0.9*U.km/U.sec
-vareps=0.1*0
-gamma=0.15*0
-delta=0.05*0
-rho=2000*U.kg/U.m**3
-src_dir=[0,0,1]
-
-sw=VTIWave(domain, v_p, v_s, wl, src_tags[0], source_vector = src_dir,
-                eps=vareps, gamma=gamma, delta=delta, rho=rho,
-                absorption_zone=300*U.m, absorption_cut=1e-2, lumping=True)
-
-locEW=Locator(domain,rcvEW_locations)
-tracerEW_x=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval, text='x-displacement - east-west line')
-tracerEW_z=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval, text='z-displacement - east-west line')
-if DIM==3:
-    locNS=Locator(domain,rcvNS_locations)
-    tracerEW_y=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='x-displacement - east-west line')
-    tracerNS_x=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='x-displacement - north-south line')
-    tracerNS_y=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='y-displacement - north-south line')
-    tracerNS_z=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D,
-        sampling_interval=sampling_interval,
-        text='z-displacement - north-south line')
-if not tracerEW_x.obspy_available():
-    print("\nWARNING: obspy not available, SEGY files will not be written\n")
-elif getMPISizeWorld() > 1:
-    print("\nWARNING: SEGY files cannot be written with multiple processes\n")
-
-t=0.
-mkDir('tmp')
-n=0
-while t < t_end:
-        t,u = sw.update(t+sampling_interval)
-        tracerEW_x.addRecord(locEW(u[0]))
-        tracerEW_z.addRecord(locEW(u[DIM-1]))
-        if DIM==3:
-               tracerEW_y.addRecord(locEW(u[1]))
-               tracerNS_x.addRecord(locNS(u[0]))
-               tracerNS_y.addRecord(locNS(u[1]))
-               tracerNS_z.addRecord(locNS(u[2]))
-        print(t, locEW(u[DIM-1])[len(rgEW)//2-4:len(rgEW)//2+1], wl.getValue(t))
-        #if n%5 == 0 : saveSilo("tmp/u_%d.silo"%(n/5,), u=u)
-        saveSilo("tmp/u_%d.silo"%(n,), u=u, cycle=n, time=t)
-        n+=1
-if tracerEW_x.obspy_available() and getMPISizeWorld() == 1:
-    tracerEW_x.write('lineEW_x.sgy')
-    tracerEW_z.write('lineEW_z.sgy')
-    if DIM == 3: 
-        tracerEW_y.write('lineEW_y.sgy')
-        tracerNS_x.write('lineNS_x.sgy')
-        tracerNS_y.write('lineNS_y.sgy')
-        tracerNS_z.write('lineNS_z.sgy')
diff --git a/doc/examples/inversion/synthetic_sonic.py b/doc/examples/inversion/synthetic_sonic.py
index bad37cc..22be445 100644
--- a/doc/examples/inversion/synthetic_sonic.py
+++ b/doc/examples/inversion/synthetic_sonic.py
@@ -23,117 +23,125 @@ __url__="https://launchpad.net/escript-finley"
 from esys.escript import *
 from esys.escript import unitsSI as U
 from esys.escript.pdetools import Locator
-from esys.speckley import Brick, Rectangle
 from esys.weipa import saveSilo
 from esys.downunder import Ricker, SonicWave, SimpleSEGYWriter
 from math import ceil
-
-
-DIM=2          # spatial dimension
-
-depth=1*U.km    # depth 
-v_p_top=1.5*U.km/U.sec
-v_p_bottom=3*U.km/U.sec
-absorption_zone=300*U.m
-ne_z=40
-
-reflector_at=0.5*depth
-
-
-t_end=0.008*U.sec #only this low for testing purposes
-frq=20.*U.Hz
-sampling_interval=4*U.msec
-numRcvPerLine=101
-rangeRcv=800*U.m
-
-# location of source in crossing array lines with in 0..numRcvInLine one needs to be None
-srcEW=numRcvPerLine//2
-srcNS=None
-
-# dommain dimension
-width_x=rangeRcv + 4*absorption_zone
-width_y=width_x
-#
-# create array 
-#
-receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
-#
-#   set source location with tag "source""
-#
-src_tags=["source"]
-if DIM == 2:
-   src_locations = [ (receiver_line[srcEW], depth)]
-   src_loc_2D=(receiver_line[srcEW], 0.)
-else:
-   if srcEW:
-      srcNS=numRcvPerLine//2
-   elif srcNS:
-      srcNS=numRcvPerLine//2
-   else:
-       raise ValueError("on of the variables srcEW or srcNS must be None!")
-   src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
-   src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
-#
-#   create sensor arrays:
-#
-# East-west line of receiver
-rcvEW_locations=[]
-rgEW=[]
-mid_point=receiver_line[len(receiver_line)//2]
-
-for ix in range(len(receiver_line)):
-        if DIM == 2:
-            rcvEW_locations.append((receiver_line[ix], depth))
-            rgEW.append( ( receiver_line[ix], 0.) ) 
-        else:
-           rcvEW_locations.append((receiver_line[ix], mid_point, depth))
-           rgEW.append( ( receiver_line[ix], mid_point) ) 
-# North-south line of receiver
-if DIM == 3:
-   rcvNS_locations=[]
-   rgNS=[]
-
-   for iy in range(len(receiver_line)):
-       rcvNS_locations.append((mid_point, receiver_line[iy],  depth))
-       rgNS.append( (  mid_point, receiver_line[iy]) ) 
-#
-# create domain:
-#
-order = 5
-if DIM == 2:
-   domain=Rectangle(order, ceil(ne_z*width_x/depth),ne_z,l0=width_x,l1=depth, 
-        diracPoints=src_locations, diracTags=src_tags)
-else:
-   domain=Brick(order, ceil(ne_z*width_x/depth),ceil(ne_z*width_y/depth),ne_z,l0=width_x,l1=width_y,l2=depth, 
-        diracPoints=src_locations, diracTags=src_tags)
-wl=Ricker(frq)
-m=whereNegative(Function(domain).getX()[DIM-1]-reflector_at)
-v_p=v_p_bottom*m+v_p_top*(1-m)
-
-sw=SonicWave(domain, v_p, source_tag=src_tags[0], wavelet=wl, absorption_zone=absorption_zone, lumping=True)
-
-locEW=Locator(domain,rcvEW_locations)
-tracerEW=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval)
-if DIM==3:
-   locNS=Locator(domain,rcvNS_locations)
-   tracerNS=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D, sampling_interval=sampling_interval)
-
-if not tracerEW.obspy_available():
-    print("\nWARNING: obspy not available, SEGY files will not be written\n")
-elif getMPISizeWorld() > 1:
-    print("\nWARNING: SEGY files cannot be written with multiple processes\n")
-
-t=0.
-mkDir('tmp')
-n=0
-while t < t_end:
-    t,p = sw.update(t+sampling_interval)
-    tracerEW.addRecord(locEW(p))
-    if DIM==3: tracerNS.addRecord(locNS(p))
-    print(t, locEW(p)[:4], wl.getValue(t))
-    if n%5 == 0 : saveSilo("tmp/u_%d.silo"%(n//5,), p=p)
-    n+=1
-if tracerEW.obspy_available() and getMPISizeWorld() == 1:
-    tracerEW.write('lineEW.sgy')
+try:
+    from esys.speckley import Brick, Rectangle
+    HAVE_SPECKLEY=True
+except ImportError:
+    HAVE_SPECKLEY=False
+
+if HAVE_SPECKLEY:
+    DIM=2          # spatial dimension
+
+    depth=1*U.km    # depth 
+    v_p_top=1.5*U.km/U.sec
+    v_p_bottom=3*U.km/U.sec
+    absorption_zone=300*U.m
+    ne_z=40
+
+    reflector_at=0.5*depth
+
+
+    t_end=0.008*U.sec #only this low for testing purposes
+    frq=20.*U.Hz
+    sampling_interval=4*U.msec
+    numRcvPerLine=101
+    rangeRcv=800*U.m
+
+    # location of source in crossing array lines with in 0..numRcvInLine one needs to be None
+    srcEW=numRcvPerLine//2
+    srcNS=None
+
+    # dommain dimension
+    width_x=rangeRcv + 4*absorption_zone
+    width_y=width_x
+    #
+    # create array 
+    #
+    receiver_line=[2*absorption_zone + i * (rangeRcv//(numRcvPerLine-1)) for i in range(numRcvPerLine) ]
+    #
+    #   set source location with tag "source""
+    #
+    src_tags=["source"]
+    if DIM == 2:
+       src_locations = [ (receiver_line[srcEW], depth)]
+       src_loc_2D=(receiver_line[srcEW], 0.)
+    else:
+       if srcEW:
+          srcNS=numRcvPerLine//2
+       elif srcNS:
+          srcNS=numRcvPerLine//2
+       else:
+           raise ValueError("on of the variables srcEW or srcNS must be None!")
+       src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
+       src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
+    #
+    #   create sensor arrays:
+    #
+    # East-west line of receiver
+    rcvEW_locations=[]
+    rgEW=[]
+    mid_point=receiver_line[len(receiver_line)//2]
+
+    for ix in range(len(receiver_line)):
+            if DIM == 2:
+                rcvEW_locations.append((receiver_line[ix], depth))
+                rgEW.append( ( receiver_line[ix], 0.) ) 
+            else:
+               rcvEW_locations.append((receiver_line[ix], mid_point, depth))
+               rgEW.append( ( receiver_line[ix], mid_point) ) 
+    # North-south line of receiver
     if DIM == 3:
-        tracerNS.write('lineNS.sgy')
+       rcvNS_locations=[]
+       rgNS=[]
+
+       for iy in range(len(receiver_line)):
+           rcvNS_locations.append((mid_point, receiver_line[iy],  depth))
+           rgNS.append( (  mid_point, receiver_line[iy]) ) 
+    #
+    # create domain:
+    #
+    order = 5
+    if DIM == 2:
+       domain=Rectangle(order, ceil(ne_z*width_x/depth),ne_z,l0=width_x,l1=depth, 
+            diracPoints=src_locations, diracTags=src_tags)
+    else:
+       domain=Brick(order, ceil(ne_z*width_x/depth),ceil(ne_z*width_y/depth),ne_z,l0=width_x,l1=width_y,l2=depth, 
+            diracPoints=src_locations, diracTags=src_tags)
+    wl=Ricker(frq)
+    m=whereNegative(Function(domain).getX()[DIM-1]-reflector_at)
+    v_p=v_p_bottom*m+v_p_top*(1-m)
+
+    sw=SonicWave(domain, v_p, source_tag=src_tags[0], wavelet=wl, absorption_zone=absorption_zone, lumping=True)
+
+    locEW=Locator(domain,rcvEW_locations)
+    tracerEW=SimpleSEGYWriter(receiver_group=rgEW, source=src_loc_2D, sampling_interval=sampling_interval)
+    if DIM==3:
+       locNS=Locator(domain,rcvNS_locations)
+       tracerNS=SimpleSEGYWriter(receiver_group=rgNS, source=src_loc_2D, sampling_interval=sampling_interval)
+
+    if not tracerEW.obspy_available():
+        print("\nWARNING: obspy not available, SEGY files will not be written\n")
+    elif getMPISizeWorld() > 1:
+        print("\nWARNING: SEGY files cannot be written with multiple processes\n")
+
+    t=0.
+    mkDir('tmp')
+    n=0
+    while t < t_end:
+        t,p = sw.update(t+sampling_interval)
+        tracerEW.addRecord(locEW(p))
+        if DIM==3: tracerNS.addRecord(locNS(p))
+        print(t, locEW(p)[:4], wl.getValue(t))
+        if n%5 == 0 : saveSilo("tmp/u_%d.silo"%(n//5,), p=p)
+        n+=1
+    if tracerEW.obspy_available() and getMPISizeWorld() == 1:
+        tracerEW.write('lineEW.sgy')
+        if DIM == 3:
+            tracerNS.write('lineNS.sgy')
+
+else: # no speckley
+    print("The Speckley module is not available")
+
diff --git a/doc/examples/inversion/synthetic_sonicHTI.py b/doc/examples/inversion/synthetic_sonicHTI.py
index 7d4098f..debbd0f 100644
--- a/doc/examples/inversion/synthetic_sonicHTI.py
+++ b/doc/examples/inversion/synthetic_sonicHTI.py
@@ -25,157 +25,166 @@ __url__="https://launchpad.net/escript-finley"
 from esys.escript import *
 from esys.escript import unitsSI as U
 from esys.escript.pdetools import Locator
-from esys.ripley import Brick, Rectangle
 from esys.weipa import saveSilo
 from esys.downunder import Ricker, SonicHTIWave, SimpleSEGYWriter
 from math import ceil
 import time, os
 
-DIM=2          # spatial dimension
+try:
+    from esys.ripley import Brick, Rectangle
+    HAVE_RIPLEY = True
+except ImportError:
+    HAVE_RIPLEY = False
+
+if HAVE_RIPLEY:
+    DIM=2          # spatial dimension
+
+    # layers from the bottom up:
+    layers = [ 1*U.km     , 1*U.km  ,700*U.m, 500*U.m, 800*U.m ]
+    v_Ps= [ 3.8 * U.km/U.sec , 3. * U.km/U.sec, 2.5*U.km/U.sec, 1.9*U.km/U.sec, 1.5*U.km/U.sec]
+    epss =[   0., 0.24, 0, 0.1, 0]
+    deltas=[  0.,  0.1, 0.,0.03,0 ]
+    azmths=[  0.,0.,0,  0, 0.]
+
+    dt=0.5*U.msec
+
+    ne_z=40
+
+    dt=0.5*U.msec
+
+    t_end=0.008*U.sec #only this low for testing purposes
+    frq=15.*U.Hz
+    tcenter=None
+    sampling_interval=4*U.msec
+    numRcvPerLine=101
+    rangeRcv=4.*U.km
+    src_dir=[0,1]
+    absorption_zone=1000*U.m
+
+    # location of source in crossing array lines with in 0..numRcvInLine one needs to be None
+    srcEW=numRcvPerLine//2
+    srcNS=None
+    # dommain dimension
+    width_x=rangeRcv + 2*absorption_zone
+    width_y=width_x
+    depth=sum(layers)
+    ne_x=int(ceil(ne_z*width_x/depth))
+    #
+    # create array 
+    #
+    receiver_line=[  absorption_zone  + i * (rangeRcv//(numRcvPerLine-1) ) for i in range(numRcvPerLine) ]
+    #
+    #   set source location with tag "source""
+    #
+    src_tags=["source"]
+
+    if srcEW:
+          srcNS=numRcvPerLine//2
+    elif srcNS:
+          srcEW=numRcvPerLine//2
+    else:
+        raise ValueError("on of the variables srcEW or srcNS must be None!")
+    if DIM == 2:    
+        src_locations  = [ (receiver_line[srcEW], depth) ]
+        src_loc_2D=(receiver_line[srcEW], 0.)
+    else:
+        src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
+        src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
+
+    #
+    #   create sensor arrays:
+    #
+    # East-west line of receiver
+    rcv_locations=[]
+    rg=[]
+    mid_point=receiver_line[len(receiver_line)//2]
+
+    for ix in range(len(receiver_line)):
+            if DIM == 2:
+                rcv_locations.append((receiver_line[ix],  depth))
+                rg.append( ( receiver_line[ix], 0.) ) 
+            else:
+               rcv_locations.append((receiver_line[ix], mid_point, depth))
+               rg.append( ( receiver_line[ix], mid_point) ) 
+    # North-south line of receiver
+    if DIM == 3:
+         for iy in range(len(receiver_line)):
+                rcv_locations.append((mid_point, receiver_line[iy],  depth))
+                rg.append( (  mid_point, receiver_line[iy]) ) 
+    #
+    # create domain:
+    #
+    if DIM == 2:
+       domain=Rectangle(ne_x, ne_z ,l0=width_x, l1=depth, 
+            diracPoints=src_locations, diracTags=src_tags)
+    else:
+       domain=Brick(ne_x,ne_x,ne_z,l0=width_x,l1=width_y,l2=depth, 
+            diracPoints=src_locations, diracTags=src_tags)
+    wl=Ricker(frq, tcenter)
+
+    #======================================================================
+    z=Function(domain).getX()[DIM-1]
+    z_bottom=0
+    v_p=0
+    delta=0
+    vareps=0
+    azmth=0
+    rho=0
+    for l in range(len(layers)):
+           m=wherePositive(z-z_bottom)*whereNonPositive(z-(z_bottom+layers[l]))
+           v_p=v_p*(1-m)+v_Ps[l]*m
+           vareps=vareps*(1-m)+epss[l]*m
+           azmth=azmth*(1-m)+azmths[l]*m
+           delta=delta*(1-m)+deltas[l]*m
+           z_bottom+=layers[l]
+
+    sw=SonicHTIWave(domain, v_p, wl, src_tags[0], dt=dt, source_vector = src_dir, eps=vareps, delta=delta, azimuth=azmth,  \
+                         absorption_zone=absorption_zone, absorption_cut=1e-2, lumping=False)
+
+    #
+    #  print some info:
+    #
+    print("ne_x = ", ne_x)
+    print("ne_z = ", ne_z)
+    print("h_x = ", width_x/ne_x)
+    print("h_z = ", depth/ne_z)
+    print("dt = ", sw.getTimeStepSize()*1000, "msec")
+    print("width_x = ", width_x)
+    print("depth = ", depth)
+    print("number receivers = ", numRcvPerLine)
+    print("receiver spacing = ", receiver_line[1]-receiver_line[0])
+    print("sampling time = ", sampling_interval*1000,"msec")
+    print("source @ ", src_locations[0])
+    #
+    loc=Locator(domain,rcv_locations)
+    tracerP=SimpleSEGYWriter(receiver_group=rg, source=src_loc_2D, sampling_interval=sampling_interval, text='P')
+    tracerQ=SimpleSEGYWriter(receiver_group=rg, source=src_loc_2D, sampling_interval=sampling_interval, text='Q')
+
+    if not tracerP.obspy_available():
+        print("\nWARNING: obspy not available, SEGY files will not be written\n")
+    elif getMPISizeWorld() > 1:
+        print("\nWARNING: SEGY files cannot be written with multiple processes\n")
+
+    t=0.
+    OUT_DIR="out%sm%smus"%(int(width_x/ne_x),int(sw.getTimeStepSize()*1000000))
+    mkDir(OUT_DIR)
+    n=0
+    k=0
+    timer1=time.time()
+    while t < t_end:
+        t,u = sw.update(t+sampling_interval)
+        Plog=loc(u[1])
+        Qlog=loc(u[0])
+        tracerP.addRecord(Plog)
+        tracerQ.addRecord(Qlog)
+        print(t, wl.getValue(t)," :", Plog[0], Plog[srcEW], Plog[-1])
+    timer1=time.time()-timer1
+    print("time= %e sec; %s sec per step"%(timer1,timer1/max(sw.n,1)))
+
+    if tracerP.obspy_available() and getMPISizeWorld() == 1:
+        tracerP.write(os.path.join(OUT_DIR,'lineP.sgy'))
+        tracerQ.write(os.path.join(OUT_DIR,'lineQ.sgy'))
+
+else: # no ripley
+    print("The Ripley module is not available")
 
-
-# layers from the bottom up:
-layers = [ 1*U.km     , 1*U.km  ,700*U.m, 500*U.m, 800*U.m ]
-v_Ps= [ 3.8 * U.km/U.sec , 3. * U.km/U.sec, 2.5*U.km/U.sec, 1.9*U.km/U.sec, 1.5*U.km/U.sec]
-epss =[   0., 0.24, 0, 0.1, 0]
-deltas=[  0.,  0.1, 0.,0.03,0 ]
-azmths=[  0.,0.,0,  0, 0.]
-
-dt=0.5*U.msec
-
-ne_z=40
-
-dt=0.5*U.msec
-
-t_end=0.008*U.sec #only this low for testing purposes
-frq=15.*U.Hz
-tcenter=None
-sampling_interval=4*U.msec
-numRcvPerLine=101
-rangeRcv=4.*U.km
-src_dir=[0,1]
-absorption_zone=1000*U.m
-
-# location of source in crossing array lines with in 0..numRcvInLine one needs to be None
-srcEW=numRcvPerLine//2
-srcNS=None
-# dommain dimension
-width_x=rangeRcv + 2*absorption_zone
-width_y=width_x
-depth=sum(layers)
-ne_x=int(ceil(ne_z*width_x/depth))
-#
-# create array 
-#
-receiver_line=[  absorption_zone  + i * (rangeRcv//(numRcvPerLine-1) ) for i in range(numRcvPerLine) ]
-#
-#   set source location with tag "source""
-#
-src_tags=["source"]
-
-if srcEW:
-      srcNS=numRcvPerLine//2
-elif srcNS:
-      srcEW=numRcvPerLine//2
-else:
-    raise ValueError("on of the variables srcEW or srcNS must be None!")
-if DIM == 2:    
-    src_locations  = [ (receiver_line[srcEW], depth) ]
-    src_loc_2D=(receiver_line[srcEW], 0.)
-else:
-    src_locations  = [ (receiver_line[srcEW], receiver_line[srcNS], depth)]
-    src_loc_2D=(receiver_line[srcEW], receiver_line[srcNS])
-
-#
-#   create sensor arrays:
-#
-# East-west line of receiver
-rcv_locations=[]
-rg=[]
-mid_point=receiver_line[len(receiver_line)//2]
-
-for ix in range(len(receiver_line)):
-        if DIM == 2:
-            rcv_locations.append((receiver_line[ix],  depth))
-            rg.append( ( receiver_line[ix], 0.) ) 
-        else:
-           rcv_locations.append((receiver_line[ix], mid_point, depth))
-           rg.append( ( receiver_line[ix], mid_point) ) 
-# North-south line of receiver
-if DIM == 3:
-     for iy in range(len(receiver_line)):
-            rcv_locations.append((mid_point, receiver_line[iy],  depth))
-            rg.append( (  mid_point, receiver_line[iy]) ) 
-#
-# create domain:
-#
-if DIM == 2:
-   domain=Rectangle(ne_x, ne_z ,l0=width_x, l1=depth, 
-        diracPoints=src_locations, diracTags=src_tags)
-else:
-   domain=Brick(ne_x,ne_x,ne_z,l0=width_x,l1=width_y,l2=depth, 
-        diracPoints=src_locations, diracTags=src_tags)
-wl=Ricker(frq, tcenter)
-
-#======================================================================
-z=Function(domain).getX()[DIM-1]
-z_bottom=0
-v_p=0
-delta=0
-vareps=0
-azmth=0
-rho=0
-for l in range(len(layers)):
-       m=wherePositive(z-z_bottom)*whereNonPositive(z-(z_bottom+layers[l]))
-       v_p=v_p*(1-m)+v_Ps[l]*m
-       vareps=vareps*(1-m)+epss[l]*m
-       azmth=azmth*(1-m)+azmths[l]*m
-       delta=delta*(1-m)+deltas[l]*m
-       z_bottom+=layers[l]
-
-sw=SonicHTIWave(domain, v_p, wl, src_tags[0], dt=dt, source_vector = src_dir, eps=vareps, delta=delta, azimuth=azmth,  \
-                     absorption_zone=absorption_zone, absorption_cut=1e-2, lumping=False)
-
-#
-#  print some info:
-#
-print("ne_x = ", ne_x)
-print("ne_z = ", ne_z)
-print("h_x = ", width_x/ne_x)
-print("h_z = ", depth/ne_z)
-print("dt = ", sw.getTimeStepSize()*1000, "msec")
-print("width_x = ", width_x)
-print("depth = ", depth)
-print("number receivers = ", numRcvPerLine)
-print("receiver spacing = ", receiver_line[1]-receiver_line[0])
-print("sampling time = ", sampling_interval*1000,"msec")
-print("source @ ", src_locations[0])
-#
-loc=Locator(domain,rcv_locations)
-tracerP=SimpleSEGYWriter(receiver_group=rg, source=src_loc_2D, sampling_interval=sampling_interval, text='P')
-tracerQ=SimpleSEGYWriter(receiver_group=rg, source=src_loc_2D, sampling_interval=sampling_interval, text='Q')
-
-if not tracerP.obspy_available():
-    print("\nWARNING: obspy not available, SEGY files will not be written\n")
-elif getMPISizeWorld() > 1:
-    print("\nWARNING: SEGY files cannot be written with multiple processes\n")
-
-t=0.
-OUT_DIR="out%sm%smus"%(int(width_x/ne_x),int(sw.getTimeStepSize()*1000000))
-mkDir(OUT_DIR)
-n=0
-k=0
-timer1=time.time()
-while t < t_end:
-    t,u = sw.update(t+sampling_interval)
-    Plog=loc(u[1])
-    Qlog=loc(u[0])
-    tracerP.addRecord(Plog)
-    tracerQ.addRecord(Qlog)
-    print(t, wl.getValue(t)," :", Plog[0], Plog[srcEW], Plog[-1])
-timer1=time.time()-timer1
-print("time= %e sec; %s sec per step"%(timer1,timer1/max(sw.n,1)))
-
-if tracerP.obspy_available() and getMPISizeWorld() == 1:
-    tracerP.write(os.path.join(OUT_DIR,'lineP.sgy'))
-    tracerQ.write(os.path.join(OUT_DIR,'lineQ.sgy'))
diff --git a/doc/examples/inversion/test_commemi1.py b/doc/examples/inversion/test_commemi1.py
index a701528..47ebe3a 100644
--- a/doc/examples/inversion/test_commemi1.py
+++ b/doc/examples/inversion/test_commemi1.py
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -25,14 +25,20 @@ import matplotlib
 # you would like to display the final plot in a window instead.
 matplotlib.use('agg')
 
-
-import esys.downunder.magtel2d as mt2d
-import numpy
 import datetime
+import numpy
+import esys.downunder.magtel2d as mt2d
 import esys.escript            as escript
-import esys.finley             as finley
 import esys.escript.pdetools   as pdetools
 
+try:
+    import esys.finley         as finley
+    HAVE_FINLEY = True
+except ImportError:
+    HAVE_FINLEY = False
+
+HAVE_DIRECT = escript.hasFeature("PASO_DIRECT") or escript.hasFeature('trilinos')
+
 #-------------------------------------------------------------
 # The following functions create the mesh used by this example
 #-------------------------------------------------------------
@@ -355,96 +361,94 @@ def generateCommemi1Mesh():
 #-------------------------------------------------------------
 
 
-
-# ---
-# Initialisations
-# ---
-
-# Get timing:
-startTime = datetime.datetime.now()
-
-# Mode (TE includes air-layer, whereas TM does not):
-mode = 'TE'
-
-# Read the mesh file and define the 'finley' domain:
-#mesh_file = "data/commemi1_te.fly"
-#domain = finley.ReadMesh(mesh_file, numDim=2)
-if escript.getEscriptParamInt('GMSH_SUPPORT'):
-    domain = generateCommemi1Mesh()
-
-# Sounding frequencies (in Hz):
-freq_def = {"high":1.0e+1,"low":1.0e+1,"step":1}
-# Frequencies will be mapped on a log-scale from
-# 'high' to 'low' with 'step' points per decade.
-# (also only one frequency must be passed via dict)
-
-# Step sizes for sampling along vertical and horizontal axis (in m):
-xstep=400
-zstep=200
-
-
-
-# ---
-# Resistivity model
-# ---
-
-# Resistivity values assigned to tagged regions (in Ohm.m):
-rho  = [
-        1.0e+14, # 0: air
-        100.0  , # 1: host
-          0.5    # 2: anomaly
-       ]
-
-# Tags must match those in the file:
-tags = ["domain_air", "domain_host", "domain_anomaly"]
-
-
-# ---
-# Layer definitions for 1D response at boundaries.
-# ---
-
-# List with resistivity values for left and right boundary.
-rho_1d_left  = [ rho[0], rho[1] ]
-rho_1d_rght  = [ rho[0], rho[1] ]
-
-# Associated interfaces for 1D response left and right (must match the mesh file).
-ifc_1d_left = [ 20000, 0, -20000]
-ifc_1d_rght = [ 20000, 0, -20000]
-
-# Save in dictionary with layer interfaces and resistivities left and right:
-ifc_1d = {"left":ifc_1d_left , "right":ifc_1d_rght}
-rho_1d = {"left":rho_1d_left , "right":rho_1d_rght}
-
-
-
-# ---
-# Run MT_2D
-# ---
-
-# Class options:
-mt2d.MT_2D._solver = "DIRECT"
-mt2d.MT_2D._debug   = False
-
-if mt2d.MT_2D._solver == "DIRECT" and escript.getMPISizeWorld() > 1:
-    print("Direct solvers and multiple MPI processes are not currently supported")
-elif mt2d.MT_2D._solver == "DIRECT" and not escript.getEscriptParamInt('PASO_DIRECT'):
-    print("escript was not built with support for direct solvers, aborting")
-elif not escript.getEscriptParamInt('GMSH_SUPPORT'):
-    print("This example requires gmsh")
-else:
-    # Instantiate an MT_2D object with required & optional parameters:
-    obj_mt2d = mt2d.MT_2D(domain, mode, freq_def, tags, rho, rho_1d, ifc_1d,
-            xstep=xstep ,zstep=zstep, maps=None, plot=True)
-
-    # Solve for fields, apparent resistivity and phase:
-    mt2d_fields, arho_2d, aphi_2d = obj_mt2d.pdeSolve()
-
-
-    #
-    print(datetime.datetime.now()-startTime)
-
-
-    print("Done!")
-
+if HAVE_FINLEY:
+    # ---
+    # Initialisations
+    # ---
+
+    # Get timing:
+    startTime = datetime.datetime.now()
+
+    # Mode (TE includes air-layer, whereas TM does not):
+    mode = 'TE'
+
+    # Read the mesh file and define the 'finley' domain:
+    #mesh_file = "data/commemi1_te.fly"
+    #domain = finley.ReadMesh(mesh_file, numDim=2)
+    if escript.hasFeature('gmsh'):
+        domain = generateCommemi1Mesh()
+
+    # Sounding frequencies (in Hz):
+    freq_def = {"high":1.0e+1,"low":1.0e+1,"step":1}
+    # Frequencies will be mapped on a log-scale from
+    # 'high' to 'low' with 'step' points per decade.
+    # (also only one frequency must be passed via dict)
+
+    # Step sizes for sampling along vertical and horizontal axis (in m):
+    xstep=400
+    zstep=200
+
+    # ---
+    # Resistivity model
+    # ---
+
+    # Resistivity values assigned to tagged regions (in Ohm.m):
+    rho  = [
+            1.0e+14, # 0: air
+            100.0  , # 1: host
+              0.5    # 2: anomaly
+           ]
+
+    # Tags must match those in the file:
+    tags = ["domain_air", "domain_host", "domain_anomaly"]
+
+
+    # ---
+    # Layer definitions for 1D response at boundaries.
+    # ---
+
+    # List with resistivity values for left and right boundary.
+    rho_1d_left  = [ rho[0], rho[1] ]
+    rho_1d_rght  = [ rho[0], rho[1] ]
+
+    # Associated interfaces for 1D response left and right (must match the mesh file).
+    ifc_1d_left = [ 20000, 0, -20000]
+    ifc_1d_rght = [ 20000, 0, -20000]
+
+    # Save in dictionary with layer interfaces and resistivities left and right:
+    ifc_1d = {"left":ifc_1d_left , "right":ifc_1d_rght}
+    rho_1d = {"left":rho_1d_left , "right":rho_1d_rght}
+
+    # ---
+    # Run MT_2D
+    # ---
+
+    # Class options:
+    mt2d.MT_2D._solver = "DIRECT"
+    mt2d.MT_2D._debug   = False
+
+    if mt2d.MT_2D._solver == "DIRECT" and not escript.hasFeature('paso'):
+        print("Trilinos direct solvers cannot currently handle PDE systems. Please compile with Paso.")
+    elif mt2d.MT_2D._solver == "DIRECT" and not HAVE_DIRECT:
+        if escript.getMPISizeWorld() > 1:
+            print("Direct solvers and multiple MPI processes are not currently supported.")
+        else:
+            print("escript was not built with support for direct solvers, aborting.")
+    elif not escript.hasFeature('gmsh'):
+        print("This example requires gmsh, aborting.")
+    else:
+        # Instantiate an MT_2D object with required & optional parameters:
+        obj_mt2d = mt2d.MT_2D(domain, mode, freq_def, tags, rho, rho_1d, ifc_1d,
+                xstep=xstep ,zstep=zstep, maps=None, plot=True)
+
+        # Solve for fields, apparent resistivity and phase:
+        mt2d_fields, arho_2d, aphi_2d = obj_mt2d.pdeSolve()
+
+        #
+        print(datetime.datetime.now()-startTime)
+        print("Done!")
+
+else: # no finley
+    print("Finley module not available.")
 
 
diff --git a/doc/examples/inversion/test_commemi4.py b/doc/examples/inversion/test_commemi4.py
index 9d3c6a0..2e41135 100644
--- a/doc/examples/inversion/test_commemi4.py
+++ b/doc/examples/inversion/test_commemi4.py
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -24,18 +24,20 @@ import matplotlib
 # you would like to display the final plot in a window instead.
 matplotlib.use('agg')
 
+import datetime
+import numpy
 
 import esys.downunder.magtel2d as mt2d
-import numpy
-import datetime
 import esys.escript            as escript
-import esys.finley             as finley
 import esys.escript.pdetools   as pdetools
 
+try:
+    import esys.finley         as finley
+    HAVE_FINLEY = True
+except ImportError:
+    HAVE_FINLEY = False
 
-
-
-
+HAVE_DIRECT = escript.hasFeature("PASO_DIRECT") or escript.hasFeature('trilinos')
 
 def setupMesh(mode, coord, elem_sizes):         
     #---------------------------------------------------------------------------
@@ -433,121 +435,118 @@ def generateCommemi4Mesh():
 
 
 
-# ---
-# Initialisations
-# ---
-
-# Get timing:
-startTime = datetime.datetime.now()
-
-# Mode (TE includes air-layer, whereas TM does not):
-mode = 'TE'
-
-# Read the mesh file and define the 'finley' domain:
-#mesh_file = "commemi4_tm.fly"
-#domain = finley.ReadMesh(mesh_file, numDim=2)
-if escript.getEscriptParamInt('GMSH_SUPPORT'):
-    domain=generateCommemi4Mesh()
-
-# Sounding frequencies (in Hz):
-freq_def = {"high":1.0e+0,"low":1.0e-0,"step":1}
-# Frequencies will be mapped on a log-scale from
-# 'high' to 'low' with 'step' points per decade.
-# (also only one frequency must be passed via dict)
-
-# Step sizes for sampling along vertical and horizontal axis (in m):
-xstep=300
-zstep=250
-
-
-
-# ---
-# Resistivity model
-# ---
-
-# Resistivity values assigned to tagged regions (in Ohm.m):
-rho  = [
-        1.0e+14, # 0: air     1.0e-30
-        25.0   , # 1: lyr1    0.04
-        10.0   , # 2: slab    0.1
-        2.5    , # 3: basin   0.4
-        1000.0 , # 4: lyr2    0.001
-        5.0      # 5: lyr3    0.2
-       ]
-
-# Tags must match those in the file:
-tags = ["air", "lyr1", "slab", "basin", "lyr2", "lyr3"]
-
-# Optional user defined map of resistivity:
-def f4(x,z,r): return escript.sqrt(escript.sqrt(x*x+z*z))/r
-maps = [None, None, None, None, f4, None]
-
-
-
-# ---
-# Layer definitions for 1D response at boundaries.
-# ---
-
-# List with resistivity values for left and right boundary.
-rho_1d_left  = [ rho[0], rho[1], rho[2], rho[4], rho[5] ]
-rho_1d_rght  = [ rho[0], rho[1], rho[3], rho[4], rho[5] ]
-
-# Associated interfaces for 1D response left and right (must match the mesh file).
-ifc_1d_left = [ 50000, 0, -500, -2000, -25000, -50000]
-ifc_1d_rght = [ 50000, 0, -500, -1000, -25000, -50000]
-
-# Save in dictionary with layer interfaces and resistivities left and right:
-ifc_1d = {"left":ifc_1d_left , "right":ifc_1d_rght}
-rho_1d = {"left":rho_1d_left , "right":rho_1d_rght}
-
-
-
-# ---
-# Adjust parameters here for TM mode
-# ---
-
-# Simply delete first element from lists:
-if mode.upper() == 'TM':
-  tags.pop(0)
-  rho.pop(0)
-  rho_1d['left'].pop(0)
-  rho_1d['right'].pop(0)
-  ifc_1d['left'].pop(0)
-  ifc_1d['right'].pop(0)
-  if maps is not None:
-    maps.pop(0)
-
-
-
-# ---
-# Run MT_2D
-# ---
-
-# Class options:
-mt2d.MT_2D._solver = "DIRECT"
-mt2d.MT_2D._debug   = False
-
-if mt2d.MT_2D._solver == "DIRECT" and escript.getMPISizeWorld() > 1:
-    print("Direct solvers and multiple MPI processes are not currently supported")
-elif mt2d.MT_2D._solver == "DIRECT" and not escript.getEscriptParamInt('PASO_DIRECT'):
-    print("escript was not built with support for direct solvers, aborting")
-elif not escript.getEscriptParamInt('GMSH_SUPPORT'):
-    print("This example requires gmsh")
-else:
-
-    # Instantiate an MT_2D object with required & optional parameters:
-    obj_mt2d = mt2d.MT_2D(domain, mode, freq_def, tags, rho, rho_1d, ifc_1d,
-            xstep=xstep ,zstep=zstep, maps=None, plot=True)
-
-    # Solve for fields, apparent resistivity and phase:
-    mt2d_fields, arho_2d, aphi_2d = obj_mt2d.pdeSolve()
-
-
-    #
-    print("Runtime:", datetime.datetime.now()-startTime)
-
-
-    print("Done!")
-
+if HAVE_FINLEY:
+    # ---
+    # Initialisations
+    # ---
+
+    # Get timing:
+    startTime = datetime.datetime.now()
 
+    # Mode (TE includes air-layer, whereas TM does not):
+    mode = 'TE'
+
+    # Read the mesh file and define the 'finley' domain:
+    #mesh_file = "commemi4_tm.fly"
+    #domain = finley.ReadMesh(mesh_file, numDim=2)
+    if escript.hasFeature('gmsh'):
+        domain=generateCommemi4Mesh()
+
+    # Sounding frequencies (in Hz):
+    freq_def = {"high":1.0e+0,"low":1.0e-0,"step":1}
+    # Frequencies will be mapped on a log-scale from
+    # 'high' to 'low' with 'step' points per decade.
+    # (also only one frequency must be passed via dict)
+
+    # Step sizes for sampling along vertical and horizontal axis (in m):
+    xstep=300
+    zstep=250
+
+
+    # ---
+    # Resistivity model
+    # ---
+
+    # Resistivity values assigned to tagged regions (in Ohm.m):
+    rho  = [
+            1.0e+14, # 0: air     1.0e-30
+            25.0   , # 1: lyr1    0.04
+            10.0   , # 2: slab    0.1
+            2.5    , # 3: basin   0.4
+            1000.0 , # 4: lyr2    0.001
+            5.0      # 5: lyr3    0.2
+           ]
+
+    # Tags must match those in the file:
+    tags = ["air", "lyr1", "slab", "basin", "lyr2", "lyr3"]
+
+    # Optional user defined map of resistivity:
+    def f4(x,z,r): return escript.sqrt(escript.sqrt(x*x+z*z))/r
+    maps = [None, None, None, None, f4, None]
+
+
+    # ---
+    # Layer definitions for 1D response at boundaries.
+    # ---
+
+    # List with resistivity values for left and right boundary.
+    rho_1d_left  = [ rho[0], rho[1], rho[2], rho[4], rho[5] ]
+    rho_1d_rght  = [ rho[0], rho[1], rho[3], rho[4], rho[5] ]
+
+    # Associated interfaces for 1D response left and right (must match the mesh file).
+    ifc_1d_left = [ 50000, 0, -500, -2000, -25000, -50000]
+    ifc_1d_rght = [ 50000, 0, -500, -1000, -25000, -50000]
+
+    # Save in dictionary with layer interfaces and resistivities left and right:
+    ifc_1d = {"left":ifc_1d_left , "right":ifc_1d_rght}
+    rho_1d = {"left":rho_1d_left , "right":rho_1d_rght}
+
+
+    # ---
+    # Adjust parameters here for TM mode
+    # ---
+
+    # Simply delete first element from lists:
+    if mode.upper() == 'TM':
+      tags.pop(0)
+      rho.pop(0)
+      rho_1d['left'].pop(0)
+      rho_1d['right'].pop(0)
+      ifc_1d['left'].pop(0)
+      ifc_1d['right'].pop(0)
+      if maps is not None:
+        maps.pop(0)
+
+
+    # ---
+    # Run MT_2D
+    # ---
+
+    # Class options:
+    mt2d.MT_2D._solver = "DIRECT"
+    mt2d.MT_2D._debug   = False
+
+    if mt2d.MT_2D._solver == "DIRECT" and not escript.hasFeature('paso'):
+        print("Trilinos direct solvers cannot currently handle PDE systems. Please compile with Paso.")
+    elif mt2d.MT_2D._solver == "DIRECT" and not HAVE_DIRECT:
+        if escript.getMPISizeWorld() > 1:
+            print("Direct solvers and multiple MPI processes are not currently supported.")
+        else:
+            print("escript was not built with support for direct solvers, aborting.")
+    elif not escript.hasFeature('gmsh'):
+        print("This example requires gmsh, aborting.")
+    else:
+        # Instantiate an MT_2D object with required & optional parameters:
+        obj_mt2d = mt2d.MT_2D(domain, mode, freq_def, tags, rho, rho_1d, ifc_1d,
+                xstep=xstep ,zstep=zstep, maps=None, plot=True)
+
+        # Solve for fields, apparent resistivity and phase:
+        mt2d_fields, arho_2d, aphi_2d = obj_mt2d.pdeSolve()
+
+        #
+        print("Runtime:", datetime.datetime.now()-startTime)
+        print("Done!")
+
+else: # no finley
+    print("Finley module not available.")
 
diff --git a/doc/examples/usersguide/voxet_reader.py b/doc/examples/usersguide/voxet_reader.py
index 2bff33a..6606f78 100644
--- a/doc/examples/usersguide/voxet_reader.py
+++ b/doc/examples/usersguide/voxet_reader.py
@@ -18,7 +18,6 @@ from __future__ import print_function, division
 import os
 from esys.downunder import CartesianReferenceSystem
 from esys.escript import ReducedFunction
-from esys.ripley import readBinaryGrid, BYTEORDER_BIG_ENDIAN, DATATYPE_FLOAT32, DATATYPE_FLOAT64
 
 def readVoxet(domain, filename, voproperty=1, origin=None, fillValue=0.,
               referenceSystem=CartesianReferenceSystem()):
@@ -50,6 +49,7 @@ def readVoxet(domain, filename, voproperty=1, origin=None, fillValue=0.,
                             axis accordingly
     :type referenceSystem: `ReferenceSystem`
     """
+    from esys.ripley import readBinaryGrid, BYTEORDER_BIG_ENDIAN, DATATYPE_FLOAT32, DATATYPE_FLOAT64
     header=open(filename).readlines()
     if not header[0].startswith('GOCAD Voxet'):
         raise ValueError("Voxet header not found. Invalid Voxet file?!")
@@ -198,23 +198,31 @@ def readVoxet(domain, filename, voproperty=1, origin=None, fillValue=0.,
 
 
 if __name__ == "__main__":
-    from esys.escript import *
-    from esys.escript.linearPDEs import Poisson
-    from esys.ripley import Brick
-    from esys.weipa import saveSilo, saveVoxet
-
-    dom = Brick(l0=1.,l1=1.,n0=9, n1=9, n2=9)
-    x = dom.getX()
-    gammaD = whereZero(x[0])+whereZero(x[1])
-    pde = Poisson(dom)
-    q = gammaD
-    pde.setValue(f=1, q=q)
-    u = pde.getSolution()
-    u=interpolate(u+dom.getX()[2], ReducedFunction(dom))
-    print(u)
-    saveVoxet('/tmp/poisson.vo', u=u)
-    print("-------")
-    dom = Brick(l0=1.,l1=1.,l2=4.,n0=18, n1=18, n2=36)
-    v=readVoxet(dom, '/tmp/poisson.vo', 'u', fillValue=0.5)
-    print(v)
-    #saveSilo('/tmp/poisson', v=v)
+    try:
+        from esys.ripley import Brick
+        HAVE_RIPLEY = True
+    except ImportError:
+        HAVE_RIPLEY = False
+        print("Ripley module not available")
+
+    if HAVE_RIPLEY:
+        from esys.escript import *
+        from esys.escript.linearPDEs import Poisson
+        from esys.weipa import saveSilo, saveVoxet
+
+        dom = Brick(l0=1.,l1=1.,n0=9, n1=9, n2=9)
+        x = dom.getX()
+        gammaD = whereZero(x[0])+whereZero(x[1])
+        pde = Poisson(dom)
+        q = gammaD
+        pde.setValue(f=1, q=q)
+        u = pde.getSolution()
+        u=interpolate(u+dom.getX()[2], ReducedFunction(dom))
+        print(u)
+        saveVoxet('/tmp/poisson.vo', u=u)
+        print("-------")
+        dom = Brick(l0=1.,l1=1.,l2=4.,n0=18, n1=18, n2=36)
+        v=readVoxet(dom, '/tmp/poisson.vo', 'u', fillValue=0.5)
+        print(v)
+        #saveSilo('/tmp/poisson', v=v)
+
diff --git a/doc/examples/usersguide/wave.py b/doc/examples/usersguide/wave.py
index 6f1ee85..0e57b28 100644
--- a/doc/examples/usersguide/wave.py
+++ b/doc/examples/usersguide/wave.py
@@ -26,6 +26,7 @@ matplotlib.use('agg')    #For interactive use, you can comment out this line
 #It's just here to make testing easier
 import matplotlib.pyplot as plt
 from numpy import zeros,ones
+import numpy
 from esys.escript import *
 from esys.escript.linearPDEs import LinearPDE, SolverOptions
 from esys.escript.pdetools import Locator
diff --git a/doc/install/cxx11.tex b/doc/install/cxx11.tex
new file mode 100644
index 0000000..0edce0e
--- /dev/null
+++ b/doc/install/cxx11.tex
@@ -0,0 +1,14 @@
+\chapter{Required compiler features}
+\label{app:cxxfeatures}
+
+Building escript from source requires that your c++ compiler supports at least the following features:
+\begin{itemize}
+ \item \texttt{std::complex<>}
+ \item Variables declared with type \texttt{auto}
+ \item Variables declared with type \texttt{decltype(T)}
+ \item \texttt{extern template class} to prevent instantiation of templates. 
+ \item \texttt{template class \textit{classname$<$type$>$};} to force instantiation of templates
+ \item \texttt{isnan()} is defined in the \texttt{std::} namespace
+\end{itemize}
+The above is not exhaustive and only lists language features which are more recent that our previous baseline of c++99 (or which
+we have recently begun to rely on).
diff --git a/doc/install/install.tex b/doc/install/install.tex
index 633fd81..d89ecde 100644
--- a/doc/install/install.tex
+++ b/doc/install/install.tex
@@ -53,8 +53,8 @@ Email: \email{esys at esscc.uq.edu.au}
 \tableofcontents
 
 \include{intro}
-% \include{debbin}
 \include{source}
 
 \esysappendix
+\include{cxx11}
 \end{document}
diff --git a/doc/install/intro.tex b/doc/install/intro.tex
index a4db23b..61489a7 100644
--- a/doc/install/intro.tex
+++ b/doc/install/intro.tex
@@ -15,25 +15,16 @@
 \chapter{Introduction}
 This document describes how to install \emph{esys-Escript}\footnote{For the rest of the document we will drop the \emph{esys-}} on to your computer.
 To learn how to use \esfinley please see the Cookbook, User's guide or the API documentation.
-If you use the Debian or Ubuntu and you have installed the \texttt{python-escript-doc} package then the documentation 
-will be available in the directory\\
-\file{/usr/share/doc/python-escript-doc}, otherwise (if you haven't done so already) you can download the documentation bundle 
-from launchpad.
-
-
 
 \esfinley is primarily developed on Linux desktop, SGI ICE and \macosx systems.
-This guide covers installing the packages from source.
-% It can be installed in two ways:
-% \begin{enumerate}
-%   \item Binary packages -- ready to run with no compilation required. These are available for recent Debian and Ubuntu distributions.
-%   \item From source -- that is, it must be compiled for your machine.
-%   This will be required if you are running anything other than Debian/Ubuntu 
-%   or if extra functionality is required.
-% \end{enumerate}
+It can be installed in two ways:
+\begin{enumerate}
+  \item Binary packages -- ready to run with no compilation required. These will hopefully be available in upcomming Debian and Ubuntu releases so just use your normal package manager (so you don't need this guide).
+  \item From source -- that is, it must be compiled for your machine.
+This is the topic of this guide.
+\end{enumerate}
 
 See the site \url{https://answers.launchpad.net/escript-finley} for online help.
-% Chapter~\ref{chap:bin} describes how to install binary packages on Debian/Ubuntu systems.
 Chapter~\ref{chap:source} covers installing from source.
 
 
diff --git a/doc/install/source.tex b/doc/install/source.tex
index 8478837..99e9bc7 100644
--- a/doc/install/source.tex
+++ b/doc/install/source.tex
@@ -33,18 +33,21 @@ There are two technologies which \escript can employ here.
 
 Escript is primarily tested on recent versions of the GNU and Intel suites (``g++'' / ``icpc'').
 However, it also passes our tests when compiled using ``clang++''.
+Escript now requires compiler support for some features of the C++11 standard.
+See Appendix~\ref{app:cxxfeatures} for a list.
+
 
 Our current test compilers include:
 \begin{itemize}
- \item g++ 4.7.2, 4.9.2
- \item clang++ (OSX 10.10 default)
- \item intel icpc v15
+ \item g++ (5 and 6)
+ \item clang++ (OSX 10.11 default)
+ \item intel icpc v15, v16
 \end{itemize}
 
 Note that:
 \begin{itemize}
  \item OpenMP will not function correctly for g++ $\leq$ 4.2.1 (and is not currently supported by clang).
- \item icpc v11 has a subtle bug involving OpenMP and c++ exception handling, so this combination should not be used.
+ \item icpc v11 has a subtle bug involving OpenMP and C++ exception handling, so this combination should not be used.
 \end{itemize}
 
 \subsection{What parallel technology do I need?}\label{sec:needpar}
@@ -62,7 +65,8 @@ If in the future you find escript useful and your scripts take significant time
 
 Note that even if your version of \escript has support for \openmp or MPI, you will still need to tell the system to 
 use it when you run your scripts.
-If you are using the \texttt{run-escript} launcher, then this is controlled  by the \texttt{-t} and \texttt{-p} options.
+If you are using the \texttt{run-escript} launcher, then this is controlled by
+the \texttt{-t}, \texttt{-p}, and \texttt{-n} options.
 If not, then consult the documentation for your MPI libraries (or the compiler documentation in the case of OpenMP
 \footnote{It may be enough to set the \texttt{OMP\_NUM\_THREADS} environment variable.}).
 
@@ -73,18 +77,18 @@ This release of \escript has only been tested on OSX 10.11.
 For this section we assume you are using either \texttt{homebrew} or \texttt{MacPorts} as a package 
 manager\footnote{Note that package managers will make changes to your computer based on programs configured by other people from 
 various places around the internet. It is important to satisfy yourself as to the security of those systems.}.
-You can of course install prerequisite software in other other ways.
+You can of course install prerequisite software in other ways.
 For example, we have had \emph{some} success changing the default 
 compilers used by those systems. However this is more complicated and we do not provide a guide here.
-% Successful combinations of OSX and package managers are given in the table below.
-% 
-% \begin{center}
-% \begin{tabular}{|c|c|c|}\hline
-%  & \texttt{homebrew} & \texttt{MacPorts} \\\hline
-% OSX 10.9 & Yes & No\\\hline
-% OSX 10.10& Yes & Yes\\\hline
-% \end{tabular}
-% \end{center}
+Successful combinations of OSX and package managers are given in the table below.
+
+\begin{center}
+\begin{tabular}{|c|c|c|}\hline
+ & \texttt{homebrew} & \texttt{MacPorts} \\\hline
+OSX 10.9 & Yes & No\\\hline
+OSX 10.10& Yes & Yes\\\hline
+\end{tabular}
+\end{center}
 
 \noindent Both of those systems require the XCode command line tools to be installed\footnote{As of OSX10.9, the 
 command \texttt{xcode-select --install} will allow you to download and install the commandline tools.}.
@@ -94,9 +98,9 @@ command \texttt{xcode-select --install} will allow you to download and install t
 To simplify things for people, we have prepared \texttt{_options.py} files for a number of 
 systems\footnote{These are correct a time of writing but later versions of those systems may require tweaks. 
 Also, these systems represent a cross section of possible platforms rather than meaning those systems get particular support.}.
-The \texttt{_options.py} files are located in the \texttt{scons/templates} directory. We suggest that the file most relevant to your os 
+The \texttt{_options.py} files are located in the \texttt{scons/templates} directory. We suggest that the file most relevant to your OS 
 be copied from the templates directory to the scons directory and renamed to the form XXXX_options.py where XXXX 
-should be replaced with your computer's name.
+should be replaced with your computer's (host-)name.
 If your particular system is not in the list below, or if you want a more customised 
 build, 
 see Section~\ref{sec:othersrc} for instructions.
@@ -118,12 +122,12 @@ All of these instructions assume that you have obtained the \escript source (and
 
 \begin{shellCode}
 sudo aptitude install python-dev python-numpy libboost-python-dev libnetcdf-dev 
-sudo aptitude install scons lsb-release  libboost-random-dev
+sudo aptitude install scons lsb-release libboost-random-dev
 sudo aptitude install python-sympy python-matplotlib python-scipy
 sudo aptitude install python-pyproj python-gdal 
 \end{shellCode}
 
-\noindent If you are running \textit{Jessie}, (or if \textit{wheezy-backports} is in your \texttt{apt} sources) you can use:
+\noindent If you are running \textit{Wheezy}, you can use:
 \begin{shellCode}
 sudo aptitude install gmsh 
 \end{shellCode}
@@ -137,7 +141,7 @@ sudo aptitude install zip texlive-latex-extra latex-xcolor
 \end{shellCode}
 \end{optionalstep}
 
-\noindent In the source directory execute the following (substitute wheezy for XXXX):
+\noindent In the source directory execute the following (substitute jessie for XXXX):
 \begin{shellCode}
 scons -j1 options_file=scons/templates/XXXX_options.py
 \end{shellCode}
@@ -184,10 +188,10 @@ These instructions were prepared using release $13.2$.
 
 \noindent Install packages from the main distribution:
 \begin{shellCode}
-sudo zypper install libboost_python1_54_0 libboost_random1_54_0 
-sudo zypper python-devel python-numpy libnetcdf_c++-devel
-sudo zypper install python-scipy python-sympy python-matplotlib 
-sudo zypper install gcc gcc-c++ scons boost-devel netcdf-devel
+sudo zypper in libboost_python1_54_0 libboost_random1_54_0 
+sudo zypper in python-devel python-numpy libnetcdf_c++-devel
+sudo zypper in python-scipy python-sympy python-matplotlib 
+sudo zypper in gcc gcc-c++ scons boost-devel netcdf-devel
 \end{shellCode}
 These will allow you to use most features except some parts of the \downunder inversion library.
 If you wish to use those, you will need some additional packages [python-pyproj, python-gdal].
@@ -212,9 +216,9 @@ scons -j1 py_tests options_file=scons/templates/opensuse13.1_options.py
 
 \noindent Now go to Section~\ref{sec:cleanup} for cleanup.
 
-\subsection{Centos}\label{sec:centossrc}
-These instructions were prepared using centos release $7.0$.
-The core of escript works, however some functionality is not availible because the default packages for some dependencies in Centos are too old.
+\subsection{CentOS}\label{sec:centossrc}
+These instructions were prepared using CentOS release $7.0$.
+The core of escript works, however some functionality is not available because the default packages for some dependencies in CentOS are too old.
 
 \noindent Add the \texttt{EPEL} repository.
 \begin{shellCode}
@@ -234,7 +238,7 @@ the \downunder inversion library.
 If you wish to use those it, you will need to install some additional packages.
 
 \noindent For some coordinate transformations, \downunder can also make use of the python interface to a tool called \texttt{proj}.
-There does not seem to be an obvious centos repository for this though.
+There does not seem to be an obvious CentOS repository for this though.
 If it turns out to be necessary for your particular application, the source can be downloaded. 
 
 \noindent Now to build escript itself.
@@ -268,7 +272,7 @@ scons -j1 py_tests options_file=scons/templates/fedora21_5_options.py
 
 \noindent Now go to Section~\ref{sec:cleanup} for cleanup.
 
-\subsection{MacOS 10.10/10.11 (macports)}\label{sec:macportsrc}
+\subsection{MacOS 10.10 (macports)}\label{sec:macportsrc}
 
 The following will install the capabilities needed for the \texttt{macports_10.10_options.py} file.
 
@@ -291,10 +295,10 @@ scons -j1 options_file=scons/templates/macports_10.10options.py
 \end{shellCode}
 
 
-\subsection{10.10/10.11 (homebrew)}\label{sec:homebrewsrc}
+\subsection{MacOS 10.9, 10.10 (homebrew)}\label{sec:homebrewsrc}
 
 The following will install the capabilities needed for the \texttt{homebrew_10.10_options.py} file.
-For OS 10.11 use \texttt{homebrew_11_options.py} instead.
+OSX 10.9 can use the same file.
 
 \begin{shellCode}
 brew install scons
@@ -403,7 +407,7 @@ usepython3=True
 \end{verbatim}
 
 \begin{verbatim}
-pythonlibname='whateveryourpython3libraryiscalled'
+pythonlibname='whatever_your_python3_library_is_called'
 \end{verbatim}
 
 
@@ -423,10 +427,11 @@ Once the build (and optional testing) is complete, you can remove everything exc
  \item esys
  \item lib
  \item doc
- \item CREDITS.TXT
- \item README_LICENSE
+ \item CREDITS
+ \item LICENSE
+ \item README
 \end{itemize}
-The last two aren't strictly required for operation.
+The last three aren't strictly required for operation.
 The \texttt{doc} directory is not required either but does contain examples of escript scripts.
 
 You can run escript using \texttt{\textit{path_to_escript_files}/bin/run-escript}.
@@ -441,18 +446,24 @@ The launcher will then take care of the rest of the environment.
 
 Some other packages which might be useful include:
 \begin{itemize}
- \item support for silo format (install the relevant libraries and enable them in the options file).
- \item Visit --- visualisation package. Can be used independently but our \texttt{weipa} library can make a Visit 
-plug-in to allow direct visualisation of escript files.
+ \item Lapack and UMFPACK --- direct solvers (install the relevant libraries and enable them in the options file).
+ \item support for the Silo file format (install the relevant libraries and enable them in the options file).
+ \item VisIt --- visualisation package. Can be used independently but our \texttt{weipa} library can make a Visit 
+plug-in to allow direct visualisation of escript simulations.
  \item gmsh --- meshing software used by our \texttt{pycad} library.
  \item Mayavi2 --- another visualisation tool.
 \end{itemize}
 
 
-%Need a better title but this is stuff like visit and silo (for non-debian distros)
-%Perhaps - optional extras
-
-
-
+\subsection{Trilinos}
+\escript has now some support for Trilinos\footnote{\url{https://trilinos.org/}}
+solvers and preconditioners.
+The most significant limitation is that the current Trilinos release does not
+support block matrices so \escript can only use Trilinos solvers for single
+PDEs (i.e. no PDE systems).
 
+If your distribution does not provide Trilinos packages you can build a working
+version from source.
+\ldots
+%provide cmake command here
 
diff --git a/doc/user/changes.tex b/doc/user/changes.tex
index c685ac7..3bca93e 100644
--- a/doc/user/changes.tex
+++ b/doc/user/changes.tex
@@ -16,17 +16,19 @@
 \chapter{Changes from previous releases}
 \label{app:changes}
 
-\subsection*{4.1 to 4.2}
+\subsection*{5.0}
 \begin{itemize}
- \item UMFPACK fixes.
- \item Work to support long indexes (very large numbers of elements)
- \item Improved decomposition for larger numbers of threads.
- \item Added build sanity check to build steps.
- \item Ripley now uses MPI-IO.
- \item Work to help with intended Debian packaging.
- \item Fixes to keep clang happy.
- \item Fixes to accomodate (OSX + clang)'s new library loading policies.
- \item Other bug fixes.
+  \item Minimum Python version is now 2.7.
+  \item Remaining code switched from C to C++
+  \item Compilers are now required to support c++11 at minimum.
+  \item Some support for storing and manipulating complex values in Data objects.
+  \item Some solver configurations can solve PDEs with complex values.
+  \item Experimental support for Trilinos solvers and preconditioners.
+  \item \ripley by default no longer adjusts domain lengths when number of MPI ranks is unsuitable. A new function controls this behaviour.
+  \item Some C++ exceptions are now translated to equivalent Python exceptions (e.g. ValueError).
+  \item New \escript feature test function to interrogate build features at runtime.
+  \item Support for 'long' indices for very large local domains.
+  \item \escript is now more modular. New options can be used to select what to build.
 \end{itemize}
 
 \subsection*{4.0 to 4.1}
diff --git a/doc/user/esys.bib b/doc/user/esys.bib
index ccc48cc..7e7f363 100644
--- a/doc/user/esys.bib
+++ b/doc/user/esys.bib
@@ -266,17 +266,6 @@ Created by Kbib version 0.6.5
 	year = 2002
 }
 
- at article{PASTIX,
-	author = "H{\'e}non, P. and Ramet, P. and Roman, J.",
-	journal = "Parallel Computing",
-	month = jan,
-	number = "2",
-	pages = "301--321",
-	title = "{{PaStiX}: A High-Performance Parallel Direct Solver for Sparse Symmetric Definite Systems}",
-	volume = "28",
-	year = "2002"
-}
-
 @article{Trilinos,
 	address = "New York, NY, USA",
 	author = "Heroux, Michael A. and Bartlett, Roscoe A. and Howle, Vicki E. and Hoekstra, Robert J. and Hu, Jonathan J. and Kolda, Tamara G. and Lehoucq, Richard B. and Long, Kevin R. and Pawlowski, Roger P. and Phipps, Eric T. and Salinger, Andrew G. and Thornquist, Heidi K. and Tuminaro, Ray S. and Willenbring, James M. and Williams, Alan and Stanley, Kendall S.",
diff --git a/doc/user/linearPDE.tex b/doc/user/linearPDE.tex
index 245821c..a22da0d 100644
--- a/doc/user/linearPDE.tex
+++ b/doc/user/linearPDE.tex
@@ -565,8 +565,7 @@ The value of \var{package} must be one of the constants:\\
  \member{SolverOptions.CUSP} -- CUDA sparse linear algebra package\\
  \member{SolverOptions.MKL} -- Intel MKL direct solver\\
  \member{SolverOptions.PASO} -- built-in PASO solver library\\
- \member{SolverOptions.PASTIX} -- Pastix direct solver package\\
- \member{SolverOptions.SUPER_LU} -- Super LU solver package\\
+ \member{SolverOptions.TRILINOS} -- Trilinos solver package\\
  \member{SolverOptions.UMFPACK} -- direct solver from the UMFPACK library.\\
 Not all packages are supported on all implementations. An exception may be
 thrown on some platforms if a particular package is requested.
@@ -984,12 +983,8 @@ if this is supported. In any case, it is advisable to apply reordering on the
 mesh to minimize fill-in.
 \end{memberdesc}
 
-\begin{memberdesc}[SolverOptions]{SUPER_LU}
-the SuperLU library~\cite{SuperLU} is used as a solver.
-\end{memberdesc}
-
-\begin{memberdesc}[SolverOptions]{PASTIX}
-the Pastix library~\cite{PASTIX} is used as a solver.
+\begin{memberdesc}[SolverOptions]{TRILINOS}
+the Trilinos library~\cite{Trilinos} is used as a solver.
 \end{memberdesc}
 
 \begin{memberdesc}[SolverOptions]{NO_PRECONDITIONER}
diff --git a/doc/user/subworlds.tex b/doc/user/subworlds.tex
index bd794c9..86e6b1d 100644
--- a/doc/user/subworlds.tex
+++ b/doc/user/subworlds.tex
@@ -1,6 +1,6 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Copyright (c) 2015-2016 by The University of Queensland
+% Copyright (c)2015-2016 by The University of Queensland
 % http://www.uq.edu.au
 %
 % Primary Business: Queensland, Australia
diff --git a/doc/verinfo.tex b/doc/verinfo.tex
index 36aebb8..fb732d1 100644
--- a/doc/verinfo.tex
+++ b/doc/verinfo.tex
@@ -1,6 +1,6 @@
 
-\newcommand{\relver}{4.2}
-\newcommand{\reldate}{15 January 2016}
+\newcommand{\relver}{5.0}
+\newcommand{\reldate}{19 Sep 2016}
 
 %\newcommand{\relver}{development}
 %\newcommand{\reldate}{\today}
diff --git a/downunder/py_src/coordinates.py b/downunder/py_src/coordinates.py
index 2d36cac..f463116 100644
--- a/downunder/py_src/coordinates.py
+++ b/downunder/py_src/coordinates.py
@@ -29,8 +29,7 @@ __all__ = ['ReferenceSystem', 'CartesianReferenceSystem',
     'GeodeticReferenceSystem', 'SphericalReferenceSystem',
     'WGS84ReferenceSystem', 'GRS80ReferenceSystem',
     'SpatialCoordinateTransformation', 'GeodeticCoordinateTransformation',
-    'CartesianCoordinateTransformation', 'makeTranformation',
-    'makeTransformation']
+    'CartesianCoordinateTransformation', 'makeTransformation']
 
 from esys.escript import unitsSI as U
 import esys.escript as esc
@@ -423,25 +422,6 @@ class GeodeticCoordinateTransformation(SpatialCoordinateTransformation):
         self._volumefactor=v
         self._scaling_factors = s
 
-def makeTranformation(domain, coordinates=None):
-    """
-    DEPRECATED
-    
-    returns a `SpatialCoordinateTransformation` for the given domain
-    
-    :param domain: domain in the domain of the coordinate transformation
-    :type domain: `esys.escript.AbstractDomain`
-    :param coordinates: the reference system or spatial coordinate system.
-    :type coordinates: `ReferenceSystem` or `SpatialCoordinateTransformation`
-    :return: the spatial coordinate system for the given domain of the specified 
-             reference system ``coordinates``. If ``coordinates`` is already spatial coordinate system based on the 
-             riven domain ``coordinates`` is returned. Otherwise an appropriate spatial coordinate system 
-             is created.
-    :rtype: `SpatialCoordinateTransformation` 
-    """
-    print("WARNING: makeTranformation is deprecated, use makeTransformation")
-    return makeTransformation(domain, coordinates)
-
 def makeTransformation(domain, coordinates=None):
     """
     returns a `SpatialCoordinateTransformation` for the given domain
diff --git a/downunder/py_src/forwardmodels/acoustic.py b/downunder/py_src/forwardmodels/acoustic.py
index 2eca830..6197e7c 100644
--- a/downunder/py_src/forwardmodels/acoustic.py
+++ b/downunder/py_src/forwardmodels/acoustic.py
@@ -27,11 +27,12 @@ __all__ = ['AcousticWaveForm']
 
 from .base import ForwardModel
 from esys.downunder.coordinates import makeTransformation
-from esys.escript import Data, DiracDeltaFunctions, FunctionOnBoundary
+from esys.escript import Data, DiracDeltaFunctions, FunctionOnBoundary, hasFeature
 from esys.escript.linearPDEs import LinearPDE, SolverOptions
 from esys.escript.util import *
 import numpy as np
 
+HAVE_DIRECT = hasFeature("PASO_DIRECT") or hasFeature('trilinos')
 
 class AcousticWaveForm(ForwardModel):
     """
@@ -193,9 +194,8 @@ class AcousticWaveForm(ForwardModel):
 
         :rtype: `LinearPDE`
         """
-        from esys.escript import getEscriptParamInt
         if self.__pde is None:
-            if getEscriptParamInt("PASO_DIRECT")==0:
+            if not HAVE_DIRECT:
                 raise ValueError("Either this build of escript or the current MPI configuration does not support direct solvers.")
             pde=LinearPDE(self.__domain, numEquations=2)
             D=pde.createCoefficient('D')
diff --git a/downunder/py_src/magtel2d.py b/downunder/py_src/magtel2d.py
index aaaf46c..c944c1e 100644
--- a/downunder/py_src/magtel2d.py
+++ b/downunder/py_src/magtel2d.py
@@ -213,7 +213,7 @@ class MT_2D(object):
     # ---
 
     # Types:
-    if not isinstance(domain, finley.finleycpp.MeshAdapter ):
+    if not isinstance(domain, escript.Domain):
       raise ValueError("Input parameter DOMAIN must be an Escript mesh")
     if not isinstance(mode, str):
       raise ValueError("Input parameter MODE must be a string")
diff --git a/downunder/py_src/seismic.py b/downunder/py_src/seismic.py
index 57c5250..0422be1 100644
--- a/downunder/py_src/seismic.py
+++ b/downunder/py_src/seismic.py
@@ -39,7 +39,11 @@ from esys.escript.linearPDEs import LinearSinglePDE, LinearPDESystem, WavePDE, S
 OBSPY_AVAILABLE = False
 try:
     from obspy import Trace, Stream, UTCDateTime
-    from obspy.segy.segy import SEGYTraceHeader, SEGYBinaryFileHeader
+    try:
+        # new interface
+        from obspy.io.segy.segy import SEGYTraceHeader, SEGYBinaryFileHeader
+    except:
+        from obspy.segy.segy import SEGYTraceHeader, SEGYBinaryFileHeader
     from obspy.core import AttribDict
     OBSPY_AVAILABLE = True
 except:
diff --git a/downunder/test/python/SConscript b/downunder/test/python/SConscript
index 0bc76e7..ad9b94d 100644
--- a/downunder/test/python/SConscript
+++ b/downunder/test/python/SConscript
@@ -43,9 +43,8 @@ program = local_env.RunPyUnitTest(alltestruns)
 Depends(program, 'build_py_tests')
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("DOWNUNDER_TEST_DATA_ROOT","$BATCH_ROOT/downunder/test/python/ref_data"),('DOWNUNDER_WORKDIR','$BUILD_DIR/downunder/test/python')),"$BATCH_ROOT/escript/test/python:$BATCH_ROOT/downunder/test/python","$BATCH_ROOT/downunder/test/python",testruns)
-tgroup.makeDir("$BUILD_DIR/downunder/test")
+from grouptest import GroupTest
+tgroup=GroupTest("downunder", "$PYTHONRUNNER ", (("DOWNUNDER_TEST_DATA_ROOT","$BATCH_ROOT/downunder/test/python/ref_data"),('DOWNUNDER_WORKDIR','$BUILD_DIR/downunder/test/python')), "$BATCH_ROOT/escript/test/python:$BATCH_ROOT/downunder/test/python", "$BATCH_ROOT/downunder/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/downunder/test/python")
 TestGroups.append(tgroup)
 
diff --git a/downunder/test/python/run_comm1.py b/downunder/test/python/run_comm1.py
index c449bbc..3e9f5c3 100644
--- a/downunder/test/python/run_comm1.py
+++ b/downunder/test/python/run_comm1.py
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -18,7 +18,7 @@ from __future__ import print_function, division
 Test script to run test model COMMEMI-4
 """
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
@@ -51,7 +51,8 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-HAVE_GMSH = escript.getEscriptParamInt("GMSH_SUPPORT")
+HAVE_GMSH = escript.hasFeature("gmsh")
+HAVE_DIRECT = escript.hasFeature("PASO_DIRECT") or escript.hasFeature('trilinos')
 
 
 # this is mainly to avoid warning messages
@@ -375,11 +376,9 @@ def generateCommemi1Mesh():
 
 
 class Test_COMMEMI1(unittest.TestCase):
-    @unittest.skipIf(not HAVE_FINLEY, "Test requires finley to be available")
-    @unittest.skipIf(not HAVE_GMSH, "Test requires gmsh to be available")
-    @unittest.skipIf(not escript.getEscriptParamInt("PASO_DIRECT"), "Missing direct solvers")
-    @unittest.skipIf(escript.getMPISizeWorld() > 1,
-            "Direct solvers and multiple MPI processes are currently incompatible")
+    @unittest.skipUnless(HAVE_FINLEY, "Test requires finley to be available")
+    @unittest.skipUnless(HAVE_GMSH, "Test requires gmsh to be available")
+    @unittest.skipUnless(HAVE_DIRECT, "Missing direct solver")
     def test_comm1(self):
         # ---
         # Initialisations
@@ -517,7 +516,7 @@ class Test_COMMEMI1(unittest.TestCase):
 
         # Setup interpolation to get values at specified stations (for comparison):
         fi = InterpolatedUnivariateSpline(x, y0)
-        # Save esscript values at comparison points in text file:
+        # Save escript values at comparison points in text file:
         # re-enable to allow comparisons
         #numpy.savetxt("commemi1_"+mode.lower()+".dat", numpy.column_stack((xs,fi(xs))), fmt='%g')
 
@@ -583,24 +582,24 @@ class Test_COMMEMI1(unittest.TestCase):
             indices.append(mindex)
 
         # The following are very simple checks based on the visual shape of the correct result
-        maxdiff=0
+        maxdiff = 0
         for i in range(len(indices)):
             if abs(y0[indices[i]]-ra[i])>maxdiff:
-                maxdiff=abs(y0[indices[i]]-ra[i])
+                maxdiff = abs(y0[indices[i]]-ra[i])
 
-        if maxdiff>5:           #Threshold is pretty arbitrary
-            raise RuntimeError("Mismatch with reference data")
+        # Threshold is pretty arbitrary
+        self.assertLess(maxdiff, 5) # "Mismatch with reference data"
 
         c=0
         for y in y1:
-            if y<46:
+            if y < 46:
                 c+=1
 
-        if not (74 < escript.Lsup(y1) < 81):
-            raise RuntimeError("Peak of bottom plot is off.")
+        self.assertLess(74, escript.Lsup(y1)) # "Peak of bottom plot is off."
+        self.assertLess(escript.Lsup(y1), 81) # "Peak of bottom plot is off."
 
-        if not (0.78 < c/len(y1) < 0.80):
-            raise RuntimeError("Bottom plot has too many high points")
+        self.assertLess(0.78, c/len(y1)) # "Bottom plot has too many high points"
+        self.assertLess(c/len(y1), 0.8) # "Bottom plot has too many high points"
 
         #
         print (datetime.datetime.now()-startTime)
diff --git a/downunder/test/python/run_comm4.py b/downunder/test/python/run_comm4.py
index 8857743..a52b28e 100644
--- a/downunder/test/python/run_comm4.py
+++ b/downunder/test/python/run_comm4.py
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -43,7 +43,8 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-HAVE_GMSH = escript.getEscriptParamInt("GMSH_SUPPORT")
+HAVE_GMSH = escript.hasFeature("gmsh")
+HAVE_DIRECT = escript.hasFeature("PASO_DIRECT") or escript.hasFeature('trilinos')
 
 # Matplotlib uses outdated code -- ignore the warnings until an update is available:
 import warnings
@@ -451,10 +452,9 @@ def generateCommemi4Mesh():
 # ==========================================================
 
 class Test_COMMEMI4(unittest.TestCase):
-    @unittest.skipIf(not HAVE_FINLEY, "Test requires finley to be available")
-    @unittest.skipIf(not HAVE_GMSH, "Test requires gmsh to be available")
-    @unittest.skipIf(not escript.getEscriptParamInt("PASO_DIRECT"), "Missing direct solvers")
-    @unittest.skipIf(escript.getMPISizeWorld() > 1, "Direct solvers and MPI are currently incompatible")
+    @unittest.skipUnless(HAVE_FINLEY, "Test requires finley to be available")
+    @unittest.skipUnless(HAVE_GMSH, "Test requires gmsh to be available")
+    @unittest.skipUnless(HAVE_DIRECT, "Missing direct solver")
     def test_comm4(self):
         # ---
         # Initialisations
diff --git a/downunder/test/python/run_datasources.py b/downunder/test/python/run_datasources.py
index b9c9821..829b2d8 100644
--- a/downunder/test/python/run_datasources.py
+++ b/downunder/test/python/run_datasources.py
@@ -28,7 +28,7 @@ import os
 import sys
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from esys.escript import inf,sup,saveDataCSV,getMPISizeWorld,getEscriptParamInt
+from esys.escript import inf, sup, saveDataCSV, getMPISizeWorld
 from esys.downunder.datasources import *
 from esys.downunder.domainbuilder import DomainBuilder
 from esys.downunder.coordinates import WGS84ReferenceSystem
@@ -60,9 +60,6 @@ try:
 except KeyError:
     WORKDIR='.'
 
-
-haveNetcdf=(getEscriptParamInt("NETCDF_BUILD",0)==1)
-    
 ERS32_DATA = os.path.join(TEST_DATA_ROOT, 'ermapper32_test.ers')
 ERS64_DATA = os.path.join(TEST_DATA_ROOT, 'ermapper64_test.ers')
 ERS_REF = os.path.join(TEST_DATA_ROOT, 'ermapper_test.csv')
@@ -264,7 +261,6 @@ class TestErMapperData(unittest.TestCase):
 class TestNetCdfData(unittest.TestCase):
     @unittest.skipIf(not haveProj, 'pyproj not available')
     @unittest.skipIf(mpisize>1, "more than 1 MPI rank")
-    @unittest.skipIf(not haveNetcdf, "not a netcdf build")
     def test_cdf_with_padding(self):
         source = NetCdfData(DataSource.GRAVITY, NC_DATA, ALT, scale_factor=1e-6)
         domainbuilder=DomainBuilder()
@@ -322,7 +318,6 @@ class TestNetCdfData(unittest.TestCase):
                 msg="Wrong values in padding area")
 
     @unittest.skipIf(mpisize>1, "more than 1 MPI rank")
-    @unittest.skipIf(not haveNetcdf, "not a netcdf build")    
     def test_cdf_with_padding_ellipsoid(self):
         ref=WGS84ReferenceSystem()
 
diff --git a/downunder/test/python/run_dcforward.py b/downunder/test/python/run_dcforward.py
index 5053163..8cc172a 100644
--- a/downunder/test/python/run_dcforward.py
+++ b/downunder/test/python/run_dcforward.py
@@ -37,7 +37,7 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-HAVE_GMSH = getEscriptParamInt("GMSH_SUPPORT")
+HAVE_GMSH = hasFeature("gmsh")
 
 mpisize = getMPISizeWorld()
 mpirank = getMPIRankWorld()
diff --git a/downunder/test/python/run_forward.py b/downunder/test/python/run_forward.py
index 8c3f022..07af844 100644
--- a/downunder/test/python/run_forward.py
+++ b/downunder/test/python/run_forward.py
@@ -34,7 +34,6 @@ from esys.escript import unitsSI as U
 from esys.escript import *
 from esys.weipa import saveSilo
 from esys.escript.linearPDEs import LinearSinglePDE, LinearPDE
-from esys.escript import getEscriptParamInt
 from esys.escript.pdetools import Locator
 
 try:
@@ -53,25 +52,20 @@ mpisize = getMPISizeWorld()
 # this is mainly to avoid warning messages
 logging.basicConfig(format='%(name)s: %(message)s', level=logging.INFO)
 
-try:
-    TEST_DATA_ROOT=os.environ['DOWNUNDER_TEST_DATA_ROOT']
-except KeyError:
-    TEST_DATA_ROOT='ref_data'
-
-try:
-    WORKDIR=os.environ['DOWNUNDER_WORKDIR']
-except KeyError:
-    WORKDIR='.'
+HAVE_DIRECT = hasFeature("PASO_DIRECT") or hasFeature('trilinos')
 
-
-have_direct=getEscriptParamInt("PASO_DIRECT")
-
- at unittest.skipIf(not HAVE_RIPLEY, "Ripley module not available")
- at unittest.skipIf(mpisize>1 or have_direct!=1, "more than 1 MPI rank or missing direct solver")
+ at unittest.skipUnless(HAVE_RIPLEY, "Ripley module not available")
+ at unittest.skipUnless(HAVE_DIRECT, "more than 1 MPI rank or missing direct solver")
 class TestAcousticInversion(unittest.TestCase):
     def test_API(self):
+        NE=20
+        for x in [int(sqrt(mpisize)),2,3,5,7,1]:
+            NX=x
+            NY=mpisize//x
+            if NX*NY == mpisize:
+                break
+        domain=ripRectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY, diracPoints=[(0.5,1.)], diracTags=['sss'])
 
-        domain=ripRectangle(20,20, diracPoints=[(0.5,1.)], diracTags=['sss'])
         omega=2.
 
         data=Data([1,2], FunctionOnBoundary(domain))
@@ -94,7 +88,14 @@ class TestAcousticInversion(unittest.TestCase):
         self.assertEqual(pde.getDomain(),  domain)
 
     def test_numeric2DscaleF(self):
-        domain=ripRectangle(100,100, diracPoints=[(0.5,1.)], diracTags=['sss'])
+        NE=40
+        for x in [int(sqrt(mpisize)),2,3,5,7,1]:
+            NX=x
+            NY=mpisize//x
+            if NX*NY == mpisize:
+                break
+        domain=ripRectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY, diracPoints=[(0.5,1.)], diracTags=['sss'])
+        #domain=ripRectangle(100,100, diracPoints=[(0.5,1.)], diracTags=['sss'])
         omega=2.
 
         # test solution is u = a * z where a is complex
@@ -175,7 +176,12 @@ class TestAcousticInversion(unittest.TestCase):
         self.assertLess( abs(d2-d0-integrate(dg0[1]*p)), 1e-2*abs(d2-d0) )
 
     def test_numeric2DnoscaleF(self):
-        domain=ripRectangle(10,20, diracPoints=[(0.5,1.)], diracTags=['sss'])
+        for x in [int(sqrt(mpisize)),2,3,5,7,1]:
+            NX=x
+            NY=mpisize//x
+            if NX*NY == mpisize:
+                break
+        domain=ripRectangle(n0=10*NX-1, n1=20*NY-1, l0=1., l1=1., d0=NX, d1=NY, diracPoints=[(0.5,1.)], diracTags=['sss'])
         omega=1.5
 
         # test solution is u = a * z where a is complex
@@ -208,8 +214,8 @@ class TestAcousticInversion(unittest.TestCase):
         args=acw.getArguments(sigma0)
         d0=acw.getDefect(sigma0, *args)
         self.assertTrue(isinstance(d0, float))
-        self.assertTrue(d0 >= 0)
-        self.assertTrue(d0 > 1e-10)
+        self.assertGreaterEqual(d0, 0)
+        self.assertGreater(d0, 1e-10)
 
         dg0=acw.getGradient(sigma0, *args)
         self.assertTrue(isinstance(dg0, Data))
@@ -239,7 +245,7 @@ class TestSubsidence(unittest.TestCase):
         lam=2.
         mu=1.
 
-        domain=ripBrick(20,20,19, d2=mpisize)
+        domain=ripBrick(20,20,max(19,2*mpisize-1), d2=mpisize)
 
         xb=FunctionOnBoundary(domain).getX()
         m=whereZero(xb[2]-1)
@@ -263,7 +269,7 @@ class TestSubsidence(unittest.TestCase):
         lam=2.
         mu=1.
         INC=0.01
-        domain=ripBrick(20,20,20*mpisize-1 , d2=mpisize)
+        domain=ripBrick(20,20,min(99,20*mpisize-1) , d2=mpisize)
 
         xb=FunctionOnBoundary(domain).getX()
         m=whereZero(xb[2]-1)
@@ -291,7 +297,7 @@ class TestSubsidence(unittest.TestCase):
         ref=abs((d2-d0)/INC)
         self.assertLess(abs((d2-d0)/INC+integrate(grad_d* dP)), ref * 1.e-5)
 
- at unittest.skipIf(not HAVE_FINLEY, "Finley module not available")
+ at unittest.skipUnless(HAVE_FINLEY, "Finley module not available")
 class TestDCResistivity(unittest.TestCase):
 
     def test_PDE2D(self):
@@ -424,8 +430,8 @@ class TestIsostaticPressure(unittest.TestCase):
         p_ref=-(1.-domain.getX()[2])*(981.+26700+1000)
         self.assertLess(Lsup(p0-p_ref), 1e-6 * Lsup(p_ref))
 
- at unittest.skipIf(not HAVE_RIPLEY, "Ripley module not available")
- at unittest.skipIf(mpisize>1 or have_direct!=1, "more than 1 MPI rank or missing direct solver")
+ at unittest.skipUnless(HAVE_RIPLEY, "Ripley module not available")
+ at unittest.skipUnless(HAVE_DIRECT, "more than 1 MPI rank or missing direct solver")
 class TestMT2DModelTEMode(unittest.TestCase):
     def test_API(self):
         domain=ripRectangle(25, 25, d1=mpisize)
@@ -460,7 +466,7 @@ class TestMT2DModelTEMode(unittest.TestCase):
         SIGMA=15.
         k=cmath.sqrt(1j*omega*mu0*SIGMA)  # Ex=exp(k*z)
         NE=101
-        domain=ripRectangle(NE,NE, d1=mpisize)
+        domain=ripRectangle(max(NE,30*mpisize-1),max(NE,30*mpisize-1), d1=mpisize)
 
         Z0=0.5
         H=1./NE
@@ -485,10 +491,10 @@ class TestMT2DModelTEMode(unittest.TestCase):
         args=model.getArguments(SIGMA)
         Ex=args[0]
         Exz=args[1]
-        self.assertTrue(Lsup(Ex[0]-Ex0_ex) <= 1e-4 * Lsup(Ex0_ex))
-        self.assertTrue(Lsup(Ex[1]-Ex1_ex) <= 1e-4 * Lsup(Ex1_ex))
-        self.assertTrue(Lsup(Exz[0]-Ex0_ex_z) <= 1e-2 * Lsup(Ex0_ex_z))
-        self.assertTrue(Lsup(Exz[1]-Ex1_ex_z) <= 1e-2 * Lsup(Ex1_ex_z))
+        self.assertLess(Lsup(Ex[0]-Ex0_ex), 1e-4 * Lsup(Ex0_ex))
+        self.assertLess(Lsup(Ex[1]-Ex1_ex), 1e-4 * Lsup(Ex1_ex))
+        self.assertLess(Lsup(Exz[0]-Ex0_ex_z), 1e-2 * Lsup(Ex0_ex_z))
+        self.assertLess(Lsup(Exz[1]-Ex1_ex_z), 1e-2 * Lsup(Ex1_ex_z))
 
         argsr=model.getArguments(0.)
         ref=model.getDefect(0., *argsr)
@@ -498,7 +504,7 @@ class TestMT2DModelTEMode(unittest.TestCase):
         d=model.getDefect(SIGMA, *args)
         self.assertTrue( d > 0.)
         self.assertTrue( ref > 0.)
-        self.assertTrue( d <= 3e-3 * ref ) # d should be zero (some sort of)
+        self.assertLess( d, 3e-3 * ref ) # d should be zero (some sort of)
 
         z=ReducedFunction(domain).getX()[1]
         Ex0_ex=cos(k.imag*(z-1))*exp(k.real*(z-1))
@@ -523,7 +529,7 @@ class TestMT2DModelTEMode(unittest.TestCase):
         k=cmath.sqrt(1j*omega*mu0*SIGMA)  # Ex=exp(k*z)
 
         NE=101
-        domain=ripRectangle(NE,NE, d1=mpisize)
+        domain=ripRectangle(max(NE,50*mpisize-1), max(NE,50*mpisize-1), d1=mpisize)
 
         Z0=0.5
         IMP=-(1j*omega*mu0)/k*(cmath.exp(k*Z0)-cmath.exp(-k*Z0))/(cmath.exp(k*Z0)+cmath.exp(-k*Z0))
@@ -575,8 +581,8 @@ class TestMT2DModelTEMode(unittest.TestCase):
         self.assertLess( abs( d1-d0-integrate(dg0*p) ), 1e-2*abs(d1-d0) )
 
 
- at unittest.skipIf(not HAVE_RIPLEY, "Ripley module not available")
- at unittest.skipIf(mpisize>1 or have_direct!=1, "more than 1 MPI rank or missing direct solver")
+ at unittest.skipUnless(HAVE_RIPLEY, "Ripley module not available")
+ at unittest.skipUnless(HAVE_DIRECT, "more than 1 MPI rank or missing direct solver")
 class TestMT2DModelTMMode(unittest.TestCase):
     def test_API(self):
         domain=ripRectangle(25, 25, d0=mpisize)
@@ -680,7 +686,7 @@ class TestMT2DModelTMMode(unittest.TestCase):
 
         L=1
         NE=101
-        domain=ripRectangle(NE,NE, d0=mpisize)
+        domain=ripRectangle(max(NE,50*mpisize-1), max(NE,50*mpisize-1), d1=mpisize)
 
         Z0=0.5
         IMP=RHO*k*(cmath.exp(k*(Z0-L))-cmath.exp(-k*(Z0-L)))/(cmath.exp(k*(Z0-L))+cmath.exp(-k*(Z0-L)))
diff --git a/downunder/test/python/run_inversion_gravmag_2d.py b/downunder/test/python/run_inversion_gravmag_2d.py
index 1d3258a..6b7d2f7 100644
--- a/downunder/test/python/run_inversion_gravmag_2d.py
+++ b/downunder/test/python/run_inversion_gravmag_2d.py
@@ -43,7 +43,7 @@ except KeyError:
     WORKDIR='.'
 
 @unittest.skipIf(not HAVE_RIPLEY, "Ripley module not available")
-class Test_MagneticInversion(unittest.TestCase):
+class Test_JoinInversion(unittest.TestCase):
     def test_2D_inversion(self):
         logging.getLogger('inv.MinimizerLBFGS').setLevel(logging.CRITICAL)
         logging.getLogger('inv.JointGravityMagneticInversion').setLevel(logging.CRITICAL)
@@ -86,7 +86,7 @@ class Test_MagneticInversion(unittest.TestCase):
         domainbuilder.setBackgroundMagneticFluxDensity(B_b)
 
         inv=JointGravityMagneticInversion()
-        inv.setSolverTolerance(1e-4)
+        inv.setSolverTolerance(1e-3)
         inv.setSolverMaxIterations(500)
         inv.setup(domainbuilder)
 
diff --git a/paso/profiling/SConscript b/dudley/SConscript
similarity index 56%
rename from paso/profiling/SConscript
rename to dudley/SConscript
index 32ac432..da397f2 100644
--- a/paso/profiling/SConscript
+++ b/dudley/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,17 +13,18 @@
 #
 ##############################################################################
 
-import os, re
-Import('*')
-local_env = env.Clone()
+Import('env')
+if 'dudley' in env['domains']:
+    if not env['paso'] and not env['trilinos']:
+        print("Dudley requires a solver library! Please either enable Paso or Trilinos.")
+        env.Exit(1)
 
-# get the test source file names
-sources = Glob('*.cpp')
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-# some of these test files are too large to optimize
-local_env['CCFLAGS'] = re.sub('-O[0-9]', '-g', str(local_env['CCFLAGS']))
-local_env.Append(LIBS=['escript', 'esysUtils', 'paso'])
+    # configure python module
+    env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# build the executable
-program = local_env.Program('PasoTests', sources)
+    # configure unit tests
+    env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/dudley/py_src/SConscript b/dudley/py_src/SConscript
index 61735e3..dc68306 100644
--- a/dudley/py_src/SConscript
+++ b/dudley/py_src/SConscript
@@ -14,10 +14,7 @@
 #
 ##############################################################################
 
-
-import os
 Import('*')
-
 local_env = env.Clone()
 
 # get the source file names
@@ -25,8 +22,9 @@ sources = Glob('*.py')
 
 # compile
 pyc = local_env.PyCompile(sources)
+env.Alias('build_dudley', pyc)
 
 # install
-py_inst = local_env.Install(local_env['pyinstall']+'/dudley', pyc)
-env.Alias('install_dudley_py', py_inst)
+py_inst = local_env.Install(Dir('dudley', local_env['pyinstall']), pyc)
+env.Alias('install_dudley', py_inst)
 
diff --git a/dudley/py_src/__init__.py b/dudley/py_src/__init__.py
index 37f12b4..dae5f38 100644
--- a/dudley/py_src/__init__.py
+++ b/dudley/py_src/__init__.py
@@ -27,7 +27,6 @@ http://www.apache.org/licenses/LICENSE-2.0"""
 __url__="https://launchpad.net/escript-finley"
 
 import esys.escript     # This is just to ensure required libraries are loaded
-import esys.pasowrap    #if you don't import this, you won't be able to see methods not in AbstractSystemmatrix
 from .dudleycpp import *
 from .factorywrappers import *
 from .readers import *
diff --git a/dudley/py_src/factorywrappers.py b/dudley/py_src/factorywrappers.py
index e96023c..e17dfd0 100644
--- a/dudley/py_src/factorywrappers.py
+++ b/dudley/py_src/factorywrappers.py
@@ -23,12 +23,38 @@ __license__="""Licensed under the Apache License, version 2.0
 http://www.apache.org/licenses/LICENSE-2.0"""
 __url__="https://launchpad.net/escript-finley"
 
-# This file copied and adapted from the equivalent factorywrappers.py in finley
-
 from .dudleycpp import __Brick_driver, __Rectangle_driver
 
-def Rectangle(n0=1, n1=1, order=1, l0=1.0, l1=1.0, periodic0=False, periodic1=False, integrationOrder=-1, 
-      reducedIntegrationOrder=-1, useElementsOnFace=0, useFullElementOrder=0, optimize=0, **kwargs):
+def Rectangle(n0=1, n1=1, order=1, l0=1.0, l1=1.0, periodic0=False,
+              periodic1=False, integrationOrder=-1, reducedIntegrationOrder=-1,
+              useElementsOnFace=False, useFullElementOrder=False,
+              optimize=False, **kwargs):
+    """
+    Creates a triangular mesh by subdividing n0 x n1 rectangular elements over
+    the brick [0,l0] x [0,l1].
+    The following keyword arguments are understood:
+      diracPoints  - coordinates of dirac points to add to domain
+      diracTags    - list of tags for the dirac points
+      escriptworld - MPI (sub)world to use
+
+    :param n0: number of elements for side 0
+    :type n0: ``int``
+    :param n1: number of elements for side 1
+    :type n1: ``int``
+    :param order: for compatibility with finley, always 1
+    :param l0: length of side 0
+    :type l0: ``float``
+    :param l1: length of side 1
+    :type l1: ``float``
+    :param periodic0: for compatibility with finley, always False
+    :param periodic1: for compatibility with finley, always False
+    :param integrationOrder: for compatibility with finley, always 2
+    :param reducedIntegrationOrder: for compatibility with finley, unused
+    :param useElementsOnFace:  for compatiblity with finley, always False
+    :param useFullElementOrder: for compatibility with finley, always False
+    :param optimize: Enable optimisation of node labels
+    :type optimize: ``bool``
+    """
     if 'diracPoints' in kwargs:
         points=kwargs['diracPoints']
     if 'diracTags' in kwargs:
@@ -44,9 +70,41 @@ def Rectangle(n0=1, n1=1, order=1, l0=1.0, l1=1.0, periodic0=False, periodic1=Fa
 
 Rectangle.__doc__=__Rectangle_driver.__doc__
 
-def Brick(n0=1, n1=1, n2=1, order=1, l0=1.0, l1=1.0, l2=1.0, periodic0=0, periodic1=0, periodic2=0,
-    integrationOrder=-1, reducedIntegrationOrder=-1, useElementsOnFace=0, useFullElementOrder=0,
-    optimize=0, **kwargs):
+def Brick(n0=1, n1=1, n2=1, order=1, l0=1.0, l1=1.0, l2=1.0, periodic0=False,
+          periodic1=False, periodic2=False, integrationOrder=-1,
+          reducedIntegrationOrder=-1, useElementsOnFace=False,
+          useFullElementOrder=False, optimize=False, **kwargs):
+    """
+    Creates a tetrahedral mesh by subdividing n0 x n1 x n2 rectangular elements
+    over the brick [0,l0] x [0,l1] x [0,l2].
+    The following keyword arguments are understood:
+      diracPoints  - coordinates of dirac points to add to domain
+      diracTags    - list of tags for the dirac points
+      escriptworld - MPI (sub)world to use
+
+    :param n0: number of elements for side 0
+    :type n0: ``int``
+    :param n1: number of elements for side 1
+    :type n1: ``int``
+    :param n2: number of elements for side 2
+    :type n2: ``int``
+    :param order: for compatibility with finley, always 1
+    :param l0: length of side 0
+    :type l0: ``float``
+    :param l1: length of side 1
+    :type l1: ``float``
+    :param l2: length of side 2
+    :type l2: ``float``
+    :param periodic0: for compatibility with finley, always False
+    :param periodic1: for compatibility with finley, always False
+    :param periodic2: for compatibility with finley, always False
+    :param integrationOrder: for compatibility with finley, always 2
+    :param reducedIntegrationOrder: for compatibility with finley, unused
+    :param useElementsOnFace:  for compatiblity with finley, always False
+    :param useFullElementOrder: for compatibility with finley, always False
+    :param optimize: Enable optimisation of node labels
+    :type optimize: ``bool``
+    """
     if 'diracPoints' in kwargs:
         points=kwargs['diracPoints']
     if 'diracTags' in kwargs:
@@ -62,3 +120,4 @@ def Brick(n0=1, n1=1, n2=1, order=1, l0=1.0, l1=1.0, l2=1.0, periodic0=0, period
     return __Brick_driver(args)
 
 Brick.__doc__=__Brick_driver.__doc__
+
diff --git a/dudley/py_src/readers.py b/dudley/py_src/readers.py
index ce54531..541cf43 100644
--- a/dudley/py_src/readers.py
+++ b/dudley/py_src/readers.py
@@ -38,7 +38,8 @@ __author__="Lutz Gross, l.gross at uq.edu.au, Joel Fenwick"
 from esys.pycad.gmsh import Design as GMSHDesign
 from .dudleycpp import ReadGmsh
 
-def MakeDomain(design,integrationOrder=-1, reducedIntegrationOrder=-1, optimizeLabeling=True, useMacroElements=False):
+def MakeDomain(design, integrationOrder=-1, reducedIntegrationOrder=-1,
+               optimizeLabeling=True, useMacroElements=False):
     """
     Creates a Dudley `Domain` from a `esys.pycad.design.Design` object.
     Currently only gmsh is supported.
@@ -52,9 +53,9 @@ def MakeDomain(design,integrationOrder=-1, reducedIntegrationOrder=-1, optimizeL
     :type reducedIntegrationOrder: ``int``
     :param optimizeLabeling: if set the labeling of the mesh nodes is optimized
     :type optimizeLabeling: ``bool``
-    :param useMacroElements: uses macro elements.
+    :param useMacroElements: for compatibility with finley. Must be False
     :type useMacroElements: ``bool``
-    :return: the Finley domain defined by the design
+    :return: the Dudley domain defined by the design
     :rtype: `Domain`
     """
     if useMacroElements:
diff --git a/dudley/src/Assemble.h b/dudley/src/Assemble.h
index 9fb4685..db6e0e9 100644
--- a/dudley/src/Assemble.h
+++ b/dudley/src/Assemble.h
@@ -14,95 +14,164 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/****************************************************************************
 
-/*    assemblage routines: header file */
+  Assemblage routines: header file
 
-/************************************************************************************/
-
-#ifndef INC_DUDLEY_ASSEMBLE
-#define INC_DUDLEY_ASSEMBLE
+*****************************************************************************/
 
-/************************************************************************************/
+#ifndef __DUDLEY_ASSEMBLE_H__
+#define __DUDLEY_ASSEMBLE_H__
 
 #include "Dudley.h"
 #include "ElementFile.h"
 #include "NodeFile.h"
-#include "escript/Data.h"
-#include "paso/SystemMatrix.h"
-
-struct Dudley_Assemble_Parameters {
-    dim_t numQuad;		/* number of quadrature nodes */
-    dim_t numDim;		/* spatial dimension */
-    dim_t NN;			/* leading dimension of element node table */
-    dim_t numElements;		/* number of elements */
-
-    dim_t numEqu;
-    index_t *row_DOF;
-    dim_t row_DOF_UpperBound;
-    Dudley_ElementFile_Jacobeans *row_jac;
-    dim_t numShapes;
-
-    dim_t numComp;
-    index_t *col_DOF;
-    dim_t col_DOF_UpperBound;
-
-    const double *shapeFns;
+#include <escript/AbstractSystemMatrix.h>
+
+namespace dudley {
+
+struct AssembleParameters
+{
+    AssembleParameters(const NodeFile* nodes, const ElementFile* ef,
+                       escript::ASM_ptr sm, escript::Data& rhs,
+                       bool reducedOrder);
+
+    /// element file these parameters apply to
+    const ElementFile* elements;
+    /// system matrix to be updated
+    escript::AbstractSystemMatrix* S;
+    /// right-hand side to be updated
+    escript::Data& F;
+    /// number of quadrature nodes
+    int numQuad;
+    /// number of spatial dimensions
+    int numDim;
+    /// leading dimension of element node table
+    int NN;
+    /// number of equations (= matrix row/column block size)
+    int numEqu;
+    /// row and column degrees of freedom
+    const index_t* DOF;
+    /// number of local degrees of freedom
+    dim_t DOF_UpperBound;
+    /// reference to jacobians
+    const ElementFile_Jacobians* jac;
+    int numShapes;
+    const double* shapeFns;
 };
 
-typedef struct Dudley_Assemble_Parameters Dudley_Assemble_Parameters;
-
-#define Dudley_Assemble_reducedIntegrationOrder(__in__) ( (getFunctionSpaceType(__in__) == DUDLEY_REDUCED_ELEMENTS) || (getFunctionSpaceType(__in__) == DUDLEY_REDUCED_FACE_ELEMENTS) )
-
-void Dudley_Assemble_PDE(Dudley_NodeFile *, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *,
-			 const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-			 const escript::Data *);
-
-
-void Dudley_Assemble_getAssembleParameters(Dudley_NodeFile *, Dudley_ElementFile *, paso::SystemMatrix_ptr, const escript::Data *,
-				    bool, Dudley_Assemble_Parameters *);
-void Dudley_Assemble_PDE_System2_3D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *,
-				    const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-void Dudley_Assemble_PDE_System2_2D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *,
-				    const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-void Dudley_Assemble_PDE_System2_1D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix, const escript::Data *,
-				    escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-
-void Dudley_Assemble_PDE_Single2_3D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *,
-				    const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-void Dudley_Assemble_PDE_Single2_2D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *,
-				    const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-void Dudley_Assemble_PDE_Single2_1D(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, const escript::Data *,
-				    const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *, const escript::Data *,
-				    const escript::Data *);
-void Dudley_Assemble_PDE_Points(Dudley_Assemble_Parameters, Dudley_ElementFile *, paso::SystemMatrix_ptr, escript::Data *, const escript::Data *, const escript::Data *);
-
-void Dudley_Assemble_NodeCoordinates(Dudley_NodeFile *, escript::Data *);
-void Dudley_Assemble_setNormal(Dudley_NodeFile *, Dudley_ElementFile *, escript::Data *);
-void Dudley_Assemble_interpolate(Dudley_NodeFile *, Dudley_ElementFile *, const escript::Data *, escript::Data *);
-void Dudley_Assemble_gradient(Dudley_NodeFile *, Dudley_ElementFile *, escript::Data *, const escript::Data *);
-void Dudley_Assemble_integrate(Dudley_NodeFile *, Dudley_ElementFile *, const escript::Data *, double *);
-void Dudley_Assemble_getSize(Dudley_NodeFile *, Dudley_ElementFile *, escript::Data *);
-void Dudley_Assemble_CopyNodalData(Dudley_NodeFile * nodes, escript::Data * out, const escript::Data * in);
-void Dudley_Assemble_CopyElementData(Dudley_ElementFile * elements, escript::Data * out, const escript::Data * in);
-void Dudley_Assemble_AverageElementData(Dudley_ElementFile * elements, escript::Data * out, const escript::Data * in);
-void Dudley_Assemble_addToSystemMatrix(paso::SystemMatrix_ptr in, const dim_t NN_Equa, const index_t * Nodes_Equa, const dim_t num_Equa,
-				       const dim_t NN_Sol, const index_t * Nodes_Sol, const dim_t num_Sol, const double *array);
-
-void Dudley_Assemble_jacobeans_2D(double *, dim_t, dim_t, dim_t, index_t *, double *, double *abs_D, double *quadweight,
-			   index_t *);
-void Dudley_Assemble_jacobeans_2D_M1D_E1D(double *, dim_t, dim_t, dim_t, index_t *, double *, double *abs_D,
-				   double *quadweight, index_t *);
-void Dudley_Assemble_jacobeans_3D(double *, dim_t, dim_t, dim_t, index_t *, double *, double *abs_D, double *quadweight,
-			   index_t *);
-void Dudley_Assemble_jacobeans_3D_M2D_E2D(double *, dim_t, dim_t, dim_t, index_t *, double *, double *abs_D,
-				   double *quadweight, index_t *);
-
-void Dudley_Assemble_LumpedSystem(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, escript::Data * lumpedMat,
-				  const escript::Data * D, const bool useHRZ);
-#endif				/* #ifndef INC_DUDLEY_ASSEMBLE */
+void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
+                  escript::ASM_ptr S, escript::Data& F,
+                  const escript::Data& A, const escript::Data& B,
+                  const escript::Data& C, const escript::Data& D,
+                  const escript::Data& X, const escript::Data& Y);
+
+template<typename Scalar = double>
+void Assemble_PDE_Points(const AssembleParameters& p,
+                         const escript::Data& d_dirac,
+                         const escript::Data& y_dirac);
+
+template<typename Scalar = double>
+void Assemble_PDE_Single_2D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+template<typename Scalar = double>
+void Assemble_PDE_Single_3D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+template<typename Scalar = double>
+void Assemble_PDE_System_2D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+template<typename Scalar = double>
+void Assemble_PDE_System_3D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+
+/// Adds the matrix array[Eq,Eq,NN,NN] onto the matrix S.
+/// The rows/columns are given by i_Eq+Eq*Nodes[Nodes[j_Eq]]
+/// (i_Eq=0:Eq; j_Eq=0:NN_Eq).
+/// The routine has to be called from a parallel region and assumes that
+/// array is fully packed.
+template<typename Scalar>
+void Assemble_addToSystemMatrix(escript::AbstractSystemMatrix* S,
+                                const std::vector<index_t>& Nodes, int numEq,
+                                const std::vector<Scalar>& array);
+
+/// Assembles the mass matrix in lumped form.
+/// The coefficient D has to be defined on the integration points or not
+/// present. `lumpedMat` has to be initialized before the routine is called.
+void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
+                           escript::Data& lumpedMat, const escript::Data& D,
+                           bool useHRZ);
+
+/// averages data
+void Assemble_AverageElementData(const ElementFile* elements,
+                                 escript::Data& out, const escript::Data& in);
+
+/// copies data between different types of elements
+void Assemble_CopyElementData(const ElementFile* elements, escript::Data& out,
+                              const escript::Data& in);
+
+/// copies data between different types of nodal representations
+void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
+                            const escript::Data& in);
+
+/// copies node coordinates into expanded Data object `x`
+void Assemble_NodeCoordinates(const NodeFile* nodes, escript::Data& x);
+
+/// calculates the normal vector at quadrature points on face elements
+void Assemble_getNormal(const NodeFile* nodes, const ElementFile* elements,
+                        escript::Data& normals);
+
+/// calculates the minimum distance between two vertices of elements and
+/// assigns the value to each quadrature point in `size`
+void Assemble_getSize(const NodeFile* nodes, const ElementFile* elements,
+                      escript::Data& size);
+
+/// Assemblage of Jacobians: calculates the gradient of nodal data at
+/// quadrature points
+void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
+                       escript::Data& gradient, const escript::Data& data);
+
+/// integrates data on quadrature points
+void Assemble_integrate(const NodeFile* nodes, const ElementFile* elements,
+                   const escript::Data& data, std::vector<double>& integrals);
+
+/// interpolates nodal data in a data array onto elements (=integration points)
+void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
+                          const escript::Data& data, escript::Data& output);
+
+void Assemble_jacobians_2D(const double* coordinates, int numQuad,
+                           dim_t numElements, int numNodes,
+                           const index_t* nodes, double* dTdX, double* absD,
+                           double* quadWeight, const index_t* elementId);
+
+void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
+                           dim_t numElements, int numNodes,
+                           const index_t* nodes, double* dTdX, double* absD,
+                           double* quadWeight, const index_t* elementId);
+
+void Assemble_jacobians_3D(const double* coordinates, int numQuad,
+                           dim_t numElements, int numNodes,
+                           const index_t* nodes, double* dTdX, double* abs_D,
+                           double* quadWeight, const index_t* elementId);
+
+void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
+                           dim_t numElements, int numNodes,
+                           const index_t* nodes, double* dTdX, double* absD,
+                           double* quadWeight, const index_t* elementId);
+
+
+} // namespace dudley
+
+#endif // __DUDLEY_ASSEMBLE_H__
+
diff --git a/dudley/src/Assemble_AverageElementData.cpp b/dudley/src/Assemble_AverageElementData.cpp
index f401c4f..a1a34cf 100644
--- a/dudley/src/Assemble_AverageElementData.cpp
+++ b/dudley/src/Assemble_AverageElementData.cpp
@@ -14,129 +14,77 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: copies data between elements       */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-/****************************************************************************************************************************/
-
 #include "ShapeTable.h"
+#include "Util.h"
 
-void Dudley_Assemble_AverageElementData(Dudley_ElementFile * elements, escript::Data * out, const escript::Data * in)
-{
-    dim_t n, q, numElements, numQuad_in, numQuad_out, i;
-    __const double *in_array;
-    double *out_array, vol, volinv, wq;
-    register double rtmp;
-    dim_t numComps = getDataPointSize(out);
-    size_t numComps_size;
+#include <escript/index.h>
 
-    Dudley_resetError();
-    if (elements == NULL)
-    {
-	return;
-    }
+namespace dudley {
 
-    numElements = elements->numElements;
-    if (Dudley_Assemble_reducedIntegrationOrder(in))
-    {
-	numQuad_in = QuadNums[elements->numDim][0];
-	wq = QuadWeight[elements->numDim][0];
+void Assemble_AverageElementData(const ElementFile* elements,
+                                 escript::Data& out, const escript::Data& in)
+{
+    if (!elements)
+        return;
 
+    double wq;
+    int numQuad_in, numQuad_out;
+    if (hasReducedIntegrationOrder(in)) {
+        numQuad_in = QuadNums[elements->numDim][0];
+        wq = QuadWeight[elements->numDim][0];
+    } else {
+        numQuad_in = QuadNums[elements->numDim][1];
+        wq = QuadWeight[elements->numDim][1];
     }
-    else
-    {
-	numQuad_in = QuadNums[elements->numDim][1];
-	wq = QuadWeight[elements->numDim][1];
-    }
-    if (Dudley_Assemble_reducedIntegrationOrder(out))
-    {
-	numQuad_out = QuadNums[elements->numDim][0];
+    if (hasReducedIntegrationOrder(out)) {
+        numQuad_out = QuadNums[elements->numDim][0];
+    } else {
+        numQuad_out = QuadNums[elements->numDim][1];
     }
-    else
-    {
-	numQuad_out = QuadNums[elements->numDim][1];
 
-    }
+    // check out and in
+    const dim_t numElements = elements->numElements;
+    const int numComps = out.getDataPointSize();
 
-    /* check out and in */
-    if (numComps != getDataPointSize(in))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_AverageElementData: number of components of input and output Data do not match.");
-    }
-    else if (!numSamplesEqual(in, numQuad_in, numElements))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_AverageElementData: illegal number of samples of input Data object");
-    }
-    else if (!numSamplesEqual(out, numQuad_out, numElements))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_AverageElementData: illegal number of samples of output Data object");
-    }
-    else if (!isExpanded(out))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_AverageElementData: expanded Data object is expected for output data.");
+    if (numComps != in.getDataPointSize()) {
+        throw DudleyException("Assemble_AverageElementData: number of components of input and output Data do not match.");
+    } else if (!in.numSamplesEqual(numQuad_in, numElements)) {
+        throw DudleyException("Assemble_AverageElementData: illegal number of samples of input Data object");
+    } else if (!out.numSamplesEqual(numQuad_out, numElements)) {
+        throw DudleyException("Assemble_AverageElementData: illegal number of samples of output Data object");
+    } else if (!out.actsExpanded()) {
+        throw DudleyException("Assemble_AverageElementData: expanded Data object is expected for output data.");
+    } else {
+        out.requireWrite();
+        if (in.actsExpanded()) {
+            const double vol = wq * numQuad_in;
+            const double volinv = 1. / vol;
+#pragma omp parallel for
+            for (index_t n = 0; n < numElements; n++) {
+                const double* in_array = in.getSampleDataRO(n);
+                double* out_array = out.getSampleDataRW(n);
+                for (int i = 0; i < numComps; ++i) {
+                    double rtmp = 0.;
+                    for (int q = 0; q < numQuad_in; ++q)
+                        rtmp += in_array[INDEX2(i, q, numComps)] * wq;
+                    rtmp *= volinv;
+                    for (int q = 0; q < numQuad_out; ++q)
+                        out_array[INDEX2(i, q, numComps)] = rtmp;
+                }
+            }
+        } else { // constant data
+            const size_t numComps_size = numComps * sizeof(double);
+#pragma omp parallel for
+            for (index_t n = 0; n < numElements; n++) {
+                const double* in_array = in.getSampleDataRO(n);
+                double* out_array = out.getSampleDataRW(n);
+                for (int q = 0; q < numQuad_out; q++)
+                    memcpy(out_array + q * numComps, in_array, numComps_size);
+            }
+        }
     }
+}
 
-    /* now we can start */
+} // namespace dudley
 
-    if (Dudley_noError())
-    {
-	if (isExpanded(in))
-	{
-	    vol = 0;
-	    for (q = 0; q < numQuad_in; ++q)
-		vol += wq;
-	    volinv = 1. / vol;
-	    requireWrite(out);
-#pragma omp parallel private(n, i, rtmp, q, in_array, out_array)
-	    {
-# pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		{
-		    in_array = getSampleDataRO(in, n);
-		    out_array = getSampleDataRW(out, n);
-		    for (i = 0; i < numComps; ++i)
-		    {
-			rtmp = 0;
-			for (q = 0; q < numQuad_in; ++q)
-			    rtmp += in_array[INDEX2(i, q, numComps)] * wq;
-			rtmp *= volinv;
-			for (q = 0; q < numQuad_out; ++q)
-			    out_array[INDEX2(i, q, numComps)] = rtmp;
-		    }
-		}
-	    }
-	}
-	else
-	{
-	    numComps_size = numComps * sizeof(double);
-	    requireWrite(out);
-#pragma omp parallel private(q,n,out_array,in_array)
-	    {
-# pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		{
-		    in_array = getSampleDataRO(in, n);
-		    out_array = getSampleDataRW(out, n);
-		    for (q = 0; q < numQuad_out; q++)
-			memcpy(out_array + q * numComps, in_array, numComps_size);
-		}
-	    }
-	}
-    }
-    return;
-}
diff --git a/dudley/src/Assemble_CopyElementData.cpp b/dudley/src/Assemble_CopyElementData.cpp
index 3349f39..174951d 100644
--- a/dudley/src/Assemble_CopyElementData.cpp
+++ b/dudley/src/Assemble_CopyElementData.cpp
@@ -14,99 +14,52 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: copies data between elements       */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-/****************************************************************************************************************************/
 #include "ShapeTable.h"
+#include "Util.h"
+
+namespace dudley {
 
-void Dudley_Assemble_CopyElementData(Dudley_ElementFile * elements, escript::Data * out, const escript::Data * in)
+void Assemble_CopyElementData(const ElementFile* elements, escript::Data& out,
+                              const escript::Data& in)
 {
-    dim_t n, q, numElements, numQuad;
-    __const double *in_array;
-    double *out_array;
-    dim_t numComps = getDataPointSize(out);
-    size_t len_size;
+    if (!elements)
+        return;
 
-    Dudley_resetError();
-    if (elements == NULL)
-    {
-	return;
-    }
+    dim_t numQuad = (hasReducedIntegrationOrder(in) ?
+            QuadNums[elements->numDim][0] : QuadNums[elements->numDim][1]);
 
-    numElements = elements->numElements;
-    if (Dudley_Assemble_reducedIntegrationOrder(in))
-    {
-	numQuad = QuadNums[elements->numDim][0];
-    }
-    else
-    {
-	numQuad = QuadNums[elements->numDim][1];
+    // check out and in
+    const dim_t numElements = elements->numElements;
+    const int numComps = out.getDataPointSize();
 
+    if (numComps != in.getDataPointSize()) {
+        throw DudleyException("Assemble_CopyElementData: number of components of input and output Data do not match.");
+    } else if (!in.numSamplesEqual(numQuad, numElements)) {
+        throw DudleyException("Assemble_CopyElementData: illegal number of samples of input Data object");
+    } else if (!out.numSamplesEqual(numQuad, numElements)) {
+        throw DudleyException("Assemble_CopyElementData: illegal number of samples of output Data object");
+    } else if (!out.actsExpanded()) {
+        throw DudleyException("Assemble_CopyElementData: expanded Data object is expected for output data.");
+    } else {
+        out.requireWrite();
+        if (in.actsExpanded()) {
+            const size_t len_size = numComps * numQuad * sizeof(double);
+#pragma omp parallel for
+            for (index_t n = 0; n < numElements; n++)
+                memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n), len_size);
+        } else {
+            const size_t len_size = numComps * sizeof(double);
+#pragma omp parallel for
+            for (index_t n = 0; n < numElements; n++) {
+                const double* in_array = in.getSampleDataRO(n);
+                double* out_array = out.getSampleDataRW(n);
+                for (int q = 0; q < numQuad; q++)
+                    memcpy(out_array + q * numComps, in_array, len_size);
+            }
+        }
     }
+}
 
-    /* check out and in */
-    if (numComps != getDataPointSize(in))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_CopyElementData: number of components of input and output Data do not match.");
-    }
-    else if (!numSamplesEqual(in, numQuad, numElements))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyElementData: illegal number of samples of input Data object");
-    }
-    else if (!numSamplesEqual(out, numQuad, numElements))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyElementData: illegal number of samples of output Data object");
-    }
-    else if (!isExpanded(out))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_CopyElementData: expanded Data object is expected for output data.");
-    }
-
-    /* now we can start */
+} // namespace dudley
 
-    if (Dudley_noError())
-    {
-	if (isExpanded(in))
-	{
-	    len_size = numComps * numQuad * sizeof(double);
-	    requireWrite(out);
-#pragma omp parallel private(n)
-	    {
-# pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		    memcpy(getSampleDataRW(out, n), getSampleDataRO(in, n), len_size);
-	    }
-	}
-	else
-	{
-	    len_size = numComps * sizeof(double);
-	    requireWrite(out);
-#pragma omp parallel private(q,n,out_array,in_array)
-	    {
-# pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		{
-		    in_array = getSampleDataRO(in, n);
-		    out_array = getSampleDataRW(out, n);
-		    for (q = 0; q < numQuad; q++)
-			memcpy(out_array + q * numComps, in_array, len_size);
-		}
-	    }
-	}
-    }
-    return;
-}
diff --git a/dudley/src/Assemble_CopyNodalData.cpp b/dudley/src/Assemble_CopyNodalData.cpp
index 300e45a..0a20a71 100644
--- a/dudley/src/Assemble_CopyNodalData.cpp
+++ b/dudley/src/Assemble_CopyNodalData.cpp
@@ -14,390 +14,149 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: copies data between different types nodal representation   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Util.h"
 #include "Assemble.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
+#include "Util.h"
 
-void Dudley_Assemble_CopyNodalData(Dudley_NodeFile * nodes, escript::Data * out, const escript::Data * in)
-{
-    dim_t n, k, l, mpiSize;
-    dim_t numComps = getDataPointSize(out);
-    paso::Coupler_ptr coupler;
-    type_t in_data_type = getFunctionSpaceType(in);
-    type_t out_data_type = getFunctionSpaceType(out);
-    index_t upperBound;
-    double *recv_buffer;
-    size_t numComps_size = 0;
-    Dudley_resetError();
-    if (nodes == NULL)
-	return;
-    mpiSize = nodes->MPIInfo->size;
+namespace dudley {
 
-    /* check out and in */
-    if (numComps != getDataPointSize(in))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_CopyNodalData: number of components of input and output Data do not match.");
-    }
-    else if (!isExpanded(out))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyNodalData: expanded Data object is expected for output data.");
+void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
+                            const escript::Data& in)
+{
+    if (!nodes)
+        return;
+
+    const int mpiSize = nodes->MPIInfo->size;
+    const int numComps = out.getDataPointSize();
+    const int in_data_type = in.getFunctionSpace().getTypeCode();
+    const int out_data_type = out.getFunctionSpace().getTypeCode();
+
+    // check out and in
+    if (numComps != in.getDataPointSize()) {
+        throw escript::ValueError("Assemble_CopyNodalData: number of components of input and output Data do not match.");
+    } else if (!out.actsExpanded()) {
+        throw escript::ValueError("Assemble_CopyNodalData: expanded Data object is expected for output data.");
     }
 
-    /* more sophisticated test needed for overlapping node/DOF counts */
-    if (in_data_type == DUDLEY_NODES)
-    {
-	if (!numSamplesEqual(in, 1, Dudley_NodeFile_getNumNodes(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of input Data object");
-	}
-    }
-    else if (in_data_type == DUDLEY_REDUCED_NODES)
-    {
-	if (!numSamplesEqual(in, 1, Dudley_NodeFile_getNumReducedNodes(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of input Data object");
-	}
+    // more sophisticated test needed for overlapping node/DOF counts
+    if (in_data_type == DUDLEY_NODES) {
+        if (!in.numSamplesEqual(1, nodes->getNumNodes())) {
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
+        }
+    } else if (in_data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+        if (!in.numSamplesEqual(1, nodes->getNumDegreesOfFreedom())) {
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
+        }
+        if ((((out_data_type == DUDLEY_NODES) || (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)) && !in.actsExpanded() && (mpiSize > 1))) {
+
+            throw DudleyException("Assemble_CopyNodalData: DUDLEY_DEGREES_OF_FREEDOM to DUDLEY_NODES or DUDLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
+        }
+    } else {
+        throw escript::ValueError( "Assemble_CopyNodalData: illegal function space type for target object");
     }
-    else if (in_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-    {
-	if (!numSamplesEqual(in, 1, Dudley_NodeFile_getNumDegreesOfFreedom(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of input Data object");
-	}
-	if ((((out_data_type == DUDLEY_NODES) || (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)) && !isExpanded(in)
-	     && (mpiSize > 1)))
-	{
 
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: DUDLEY_DEGREES_OF_FREEDOM to DUDLEY_NODES or DUDLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
-	}
-    }
-    else if (in_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-    {
-	if (!numSamplesEqual(in, 1, Dudley_NodeFile_getNumReducedDegreesOfFreedom(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of input Data object");
-	}
-	if ((out_data_type == DUDLEY_DEGREES_OF_FREEDOM) && !isExpanded(in) && (mpiSize > 1))
-	{
+    dim_t numOut = 0;
+    switch (out_data_type) {
+        case DUDLEY_NODES:
+            numOut = nodes->getNumNodes();
+            break;
 
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: DUDLEY_REDUCED_DEGREES_OF_FREEDOM to DUDLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
-	}
-    }
-    else
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyNodalData: illegal function space type for target object");
-    }
+        case DUDLEY_DEGREES_OF_FREEDOM:
+            numOut = nodes->getNumDegreesOfFreedom();
+            break;
 
-    if (out_data_type == DUDLEY_NODES)
-    {
-	if (!numSamplesEqual(out, 1, Dudley_NodeFile_getNumNodes(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of output Data object");
-	}
+        default:
+            throw escript::ValueError("Assemble_CopyNodalData: illegal function space type for source object");
     }
-    else if (out_data_type == DUDLEY_REDUCED_NODES)
-    {
-	if (!numSamplesEqual(out, 1, Dudley_NodeFile_getNumReducedNodes(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of output Data object");
-	}
-    }
-    else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-    {
-	if (!numSamplesEqual(out, 1, Dudley_NodeFile_getNumDegreesOfFreedom(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of output Data object");
-	}
-    }
-    else if (out_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-    {
-	if (!numSamplesEqual(out, 1, Dudley_NodeFile_getNumReducedDegreesOfFreedom(nodes)))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_CopyNodalData: illegal number of samples of output Data object");
-	}
-    }
-    else
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyNodalData: illegal function space type for source object");
-    }
-
-    /* now we can start */
-
-    if (Dudley_noError())
-    {
-	/*********************** DUDLEY_NODES **************************************************/
-	numComps_size = (size_t) numComps *sizeof(double);
-	if (in_data_type == DUDLEY_NODES)
-	{
-	    requireWrite(out);
-	    if (out_data_type == DUDLEY_NODES)
-	    {
-#pragma omp parallel private(n)
-		{
-
-#pragma omp parallel for private(n) schedule(static)
-		    for (n = 0; n < nodes->nodesMapping->numNodes; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, n), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_NODES)
-	    {
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nodes->reducedNodesMapping->numTargets; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n),
-			       getSampleDataROFast(in, nodes->reducedNodesMapping->map[n]), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n),
-			       getSampleDataROFast(in, nodes->degreesOfFreedomMapping->map[n]), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n),
-			       getSampleDataROFast(in, nodes->reducedDegreesOfFreedomMapping->map[n]), numComps_size);
-		    }
-		}
-	    }
-	/*********************** DUDLEY_REDUCED_NODES **************************************************/
-	}
-	else if (in_data_type == DUDLEY_REDUCED_NODES)
-	{
-	    requireWrite(out);
-	    if (out_data_type == DUDLEY_NODES)
-	    {
-		Dudley_setError(TYPE_ERROR, "Dudley_Assemble_CopyNodalData: cannot copy from reduced nodes to nodes.");
 
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_NODES)
-	    {
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nodes->reducedNodesMapping->numNodes; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, n), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	    {
-		Dudley_setError(TYPE_ERROR,
-				"Dudley_Assemble_CopyNodalData: cannot copy from reduced nodes to degrees of freedom.");
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-#pragma omp parallel private(n,k)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			k = nodes->reducedDegreesOfFreedomMapping->map[n];
-			memcpy(getSampleDataRWFast(out, n),
-			       getSampleDataROFast(in, nodes->reducedNodesMapping->target[k]), numComps_size);
-		    }
-		}
-	    }
-
-	/*********************** DUDLEY_DEGREES_OF_FREEDOM **************************************************/
-	}
-	else if (in_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	{
-	    requireWrite(out);
-	    if (out_data_type == DUDLEY_NODES)
-	    {
-		coupler.reset(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps));
-		if (Esys_noError())
-		{
-		    /* safe provided coupler->copyAll is called before the pointer in "in" is invalidated */
-		    const_cast<escript::Data*>(in)->resolve();
-		    coupler->startCollect(in->getDataRO());  
-		    recv_buffer = coupler->finishCollect();
-		    upperBound = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-#pragma omp parallel private(n,k)
-		    {
-#pragma omp for schedule(static)
-			for (n = 0; n < nodes->numNodes; n++)
-			{
-			    k = nodes->degreesOfFreedomMapping->target[n];
-			    if (k < upperBound)
-			    {
-				memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, k), numComps_size);
-			    }
-			    else
-			    {
-				memcpy(getSampleDataRWFast(out, n),
-				       &recv_buffer[(k - upperBound) * numComps], numComps_size);
-			    }
-			}
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_NODES)
-	    {
-		coupler.reset(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps));
-		if (Esys_noError())
-		{
-		    /* safe provided coupler->copyAll is called before the pointer in "in" is invalidated */
-		    const_cast<escript::Data*>(in)->resolve();
-		    coupler->startCollect(in->getDataRO());  
-		    recv_buffer = coupler->finishCollect();
-		    upperBound = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-		    requireWrite(out);
+    if (!out.numSamplesEqual(1, numOut)) {
+        throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of output Data object");
+    }
 
-#pragma omp parallel private(n,k,l)
-		    {
-#pragma omp for schedule(static)
-			for (n = 0; n < nodes->reducedNodesMapping->numTargets; n++)
-			{
-			    l = nodes->reducedNodesMapping->map[n];
-			    k = nodes->degreesOfFreedomMapping->target[l];
-			    if (k < upperBound)
-			    {
-				memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, k), numComps_size);
-			    }
-			    else
-			    {
-				memcpy(getSampleDataRWFast(out, n),
-				       &recv_buffer[(k - upperBound) * numComps], numComps_size);
-			    }
-			}
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-		requireWrite(out);
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, n), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-		requireWrite(out);
-#pragma omp parallel private(n,k)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			k = nodes->reducedDegreesOfFreedomMapping->map[n];
-			memcpy(getSampleDataRWFast(out, n),
-			       getSampleDataROFast(in, nodes->degreesOfFreedomMapping->target[k]), numComps_size);
-		    }
-		}
-	    }
+    const size_t numComps_size = numComps * sizeof(double);
+
+    /**************************** DUDLEY_NODES ******************************/
+    if (in_data_type == DUDLEY_NODES) {
+        out.requireWrite();
+        if (out_data_type == DUDLEY_NODES) {
+#pragma omp parallel for
+            for (index_t n = 0; n < numOut; n++) {
+                memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n), numComps_size);
+            }
+        } else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+            const index_t* map = nodes->borrowDegreesOfFreedomTarget();
+#pragma omp parallel for
+            for (index_t n = 0; n < numOut; n++) {
+                memcpy(out.getSampleDataRW(n), in.getSampleDataRO(map[n]),
+                       numComps_size);
+            }
+        }
+    /********************** DUDLEY_DEGREES_OF_FREEDOM ***********************/
+    } else if (in_data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+        out.requireWrite();
+        if (out_data_type == DUDLEY_NODES) {
+            const_cast<escript::Data*>(&in)->resolve();
+            const index_t* target = nodes->borrowTargetDegreesOfFreedom();
+#ifdef ESYS_HAVE_PASO
+            paso::Coupler_ptr coupler(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps, nodes->MPIInfo));
+            coupler->startCollect(in.getDataRO());
+            const double* recv_buffer = coupler->finishCollect();
+            const index_t upperBound = nodes->getNumDegreesOfFreedom();
+#pragma omp parallel for
+            for (index_t n = 0; n < numOut; n++) {
+                const index_t k = target[n];
+                if (k < upperBound) {
+                    memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
+                           numComps_size);
+                } else {
+                    memcpy(out.getSampleDataRW(n),
+                           &recv_buffer[(k - upperBound) * numComps],
+                           numComps_size);
+                }
+            }
+#elif defined(ESYS_HAVE_TRILINOS)
+            using namespace esys_trilinos;
+
+            const_TrilinosGraph_ptr graph(nodes->getTrilinosGraph());
+            Teuchos::RCP<const MapType> colMap;
+            Teuchos::RCP<const MapType> rowMap;
+            MapType colPointMap;
+            MapType rowPointMap;
+            if (numComps > 1) {
+                colPointMap = RealBlockVector::makePointMap(*graph->getColMap(),
+                                                            numComps);
+                rowPointMap = RealBlockVector::makePointMap(*graph->getRowMap(),
+                                                            numComps);
+                colMap = Teuchos::rcpFromRef(colPointMap);
+                rowMap = Teuchos::rcpFromRef(rowPointMap);
+            } else {
+                colMap = graph->getColMap();
+                rowMap = graph->getRowMap();
+            }
+
+            const ImportType importer(rowMap, colMap);
+            const Teuchos::ArrayView<const real_t> localIn(
+                                               in.getSampleDataRO(0),
+                                               in.getNumDataPoints()*numComps);
+            Teuchos::RCP<RealVector> lclData = rcp(new RealVector(rowMap,
+                                                  localIn, localIn.size(), 1));
+            Teuchos::RCP<RealVector> gblData = rcp(new RealVector(colMap, 1));
+            gblData->doImport(*lclData, importer, Tpetra::INSERT);
+            Teuchos::ArrayRCP<const real_t> gblArray(gblData->getData(0));
+#pragma omp parallel for
+            for (index_t i = 0; i < numOut; i++) {
+                const real_t* src = &gblArray[target[i] * numComps];
+                std::copy(src, src+numComps, out.getSampleDataRW(i));
+            }
+#endif
+        } else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+#pragma omp parallel for
+            for (index_t n = 0; n < numOut; n++) {
+                memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n),
+                       numComps_size);
+            }
+        }
+    } // in_data_type
+}
 
-	/*********************** DUDLEY_REDUCED_DEGREES_OF_FREEDOM **************************************************/
-	}
-	else if (in_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	{
+} // namespace dudley
 
-	    if (out_data_type == DUDLEY_NODES)
-	    {
-		Dudley_setError(TYPE_ERROR,
-				"Dudley_Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to nodes.");
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_NODES)
-	    {
-		coupler.reset(new paso::Coupler(nodes->reducedDegreesOfFreedomConnector, numComps));
-		if (Esys_noError())
-		{
-		    upperBound = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-		    /* safe provided coupler->copyAll is called before the pointer in "in" is invalidated */
-		    const_cast<escript::Data*>(in)->resolve();
-		    coupler->startCollect(in->getDataRO());  
-		    recv_buffer = coupler->finishCollect();
-		    requireWrite(out);
-#pragma omp parallel private(n,k,l)
-		    {
-#pragma omp for schedule(static)
-			for (n = 0; n < nodes->reducedNodesMapping->numTargets; n++)
-			{
-			    l = nodes->reducedNodesMapping->map[n];
-			    k = nodes->reducedDegreesOfFreedomMapping->target[l];
-			    if (k < upperBound)
-			    {
-				memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, k), numComps_size);
-			    }
-			    else
-			    {
-				memcpy(getSampleDataRWFast(out, n),
-				       &recv_buffer[(k - upperBound) * numComps], numComps_size);
-			    }
-			}
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	    {
-		int nComps = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-		requireWrite(out);
-#pragma omp parallel private(n)
-		{
-#pragma omp for schedule(static)
-		    for (n = 0; n < nComps; n++)
-		    {
-			memcpy(getSampleDataRWFast(out, n), getSampleDataROFast(in, n), numComps_size);
-		    }
-		}
-	    }
-	    else if (out_data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	    {
-		Dudley_setError(TYPE_ERROR,
-				"Dudley_Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to degrees of freedom.");
-	    }
-	}
-    }
-    return;
-}
diff --git a/dudley/src/Assemble_LumpedSystem.cpp b/dudley/src/Assemble_LumpedSystem.cpp
index 366bc4a..f816172 100644
--- a/dudley/src/Assemble_LumpedSystem.cpp
+++ b/dudley/src/Assemble_LumpedSystem.cpp
@@ -14,386 +14,289 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assembles the mass matrix in lumped form                */
-
-/*    The coefficient D has to be defined on the integration points or not present. */
-
-/*    lumpedMat has to be initialized before the routine is called. */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
+#include "ShapeTable.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-#include "ShapeTable.h"
+#include <escript/index.h>
 
-/************************************************************************************/
+namespace dudley {
 
-void Dudley_Assemble_LumpedSystem(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, escript::Data * lumpedMat,
-                                  const escript::Data * D, const bool useHRZ)
+void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
+                           escript::Data& lumpedMat, const escript::Data& D,
+                           bool useHRZ)
 {
-
-    bool reducedIntegrationOrder = FALSE, expandedD;
-    Dudley_Assemble_Parameters p;
-    int dimensions[ESCRIPT_MAX_DATA_RANK];
-    dim_t k, e, len_EM_lumpedMat, q, s;
-    type_t funcspace;
-    index_t color, *row_index = NULL;
-    __const double *D_p = NULL;
-    const double *S = NULL;
-    double *EM_lumpedMat = NULL, *lumpedMat_p = NULL;
-    register double rtmp;
-    register double m_t = 0., diagS = 0.;
-
-    Dudley_resetError();
-
-    if (nodes == NULL || elements == NULL)
-        return;
-    if (lumpedMat->isEmpty() || D->isEmpty())
-        return;
-    if (lumpedMat->isEmpty() && !D->isEmpty())
-    {
-        Dudley_setError(TYPE_ERROR, "Dudley_Assemble_LumpedSystem: coefficients are non-zero but no lumped matrix is given.");
+    if (!nodes || !elements || lumpedMat.isEmpty() || D.isEmpty())
         return;
+
+    const int funcspace = D.getFunctionSpace().getTypeCode();
+    bool reducedIntegrationOrder;
+    // check function space of D
+    if (funcspace == DUDLEY_ELEMENTS) {
+        reducedIntegrationOrder = false;
+    } else if (funcspace == DUDLEY_FACE_ELEMENTS) {
+        reducedIntegrationOrder = false;
+    } else if (funcspace == DUDLEY_REDUCED_ELEMENTS) {
+        reducedIntegrationOrder = true;
+    } else if (funcspace == DUDLEY_REDUCED_FACE_ELEMENTS) {
+        reducedIntegrationOrder = true;
+    } else {
+        throw escript::ValueError("Assemble_LumpedSystem: assemblage failed because of illegal function space.");
     }
-    funcspace = D->getFunctionSpace().getTypeCode();
-    /* check if all function spaces are the same */
-    if (funcspace == DUDLEY_ELEMENTS)
-    {
-        reducedIntegrationOrder = FALSE;
-    }
-    else if (funcspace == DUDLEY_FACE_ELEMENTS)
-    {
-        reducedIntegrationOrder = FALSE;
-    }
-    else if (funcspace == DUDLEY_REDUCED_ELEMENTS)
-    {
-        reducedIntegrationOrder = TRUE;
-    }
-    else if (funcspace == DUDLEY_REDUCED_FACE_ELEMENTS)
-    {
-        reducedIntegrationOrder = TRUE;
-    }
-    else
-    {
-        Dudley_setError(TYPE_ERROR, "Dudley_Assemble_LumpedSystem: assemblage failed because of illegal function space.");
-    }
-    if (!Dudley_noError())
-        return;
 
-    /* set all parameters in p */
-    Dudley_Assemble_getAssembleParameters(nodes, elements, paso::SystemMatrix_ptr(), lumpedMat, reducedIntegrationOrder, &p);
-    if (!Dudley_noError())
-        return;
+    // initialize parameters
+    AssembleParameters p(nodes, elements, escript::ASM_ptr(),
+                         lumpedMat, reducedIntegrationOrder);
 
-    /* check if all function spaces are the same */
-    if (!numSamplesEqual(D, p.numQuad, elements->numElements))
-    {
+    // check if all function spaces are the same
+    if (!D.numSamplesEqual(p.numQuad, elements->numElements)) {
         std::stringstream ss;
-        ss << "Dudley_Assemble_LumpedSystem: sample points of coefficient D "
+        ss << "Assemble_LumpedSystem: sample points of coefficient D "
               "don't match (" << p.numQuad << ","
            << elements->numElements << ")";
-        std::string error_msg(ss.str());
-        Dudley_setError(TYPE_ERROR, error_msg.c_str());
+        throw escript::ValueError(ss.str());
     }
 
-    /*  check the dimensions: */
-    if (p.numEqu == 1)
-    {
-        if (!D->isEmpty())
-        {
-            if (!isDataPointShapeEqual(D, 0, dimensions))
-            {
-                Dudley_setError(TYPE_ERROR, "Dudley_Assemble_LumpedSystem: coefficient D, rank 0 expected.");
-            }
-
+    // check the dimensions
+    if (p.numEqu == 1) {
+        const escript::DataTypes::ShapeType dimensions; //dummy
+        if (D.getDataPointShape() != dimensions) {
+            throw escript::ValueError("Assemble_LumpedSystem: coefficient D, rank 0 expected.");
         }
-    }
-    else
-    {
-        if (!D->isEmpty())
-        {
-            dimensions[0] = p.numEqu;
-            if (!isDataPointShapeEqual(D, 1, dimensions))
-            {
-                std::stringstream ss;
-                ss << "Dudley_Assemble_LumpedSystem: coefficient D, expected "
-                      "shape (" << dimensions[0] << ",)";
-                std::string error_msg(ss.str());
-                Dudley_setError(TYPE_ERROR, error_msg.c_str());
-            }
+    } else {
+        const escript::DataTypes::ShapeType dimensions(1, p.numEqu);
+        if (D.getDataPointShape() != dimensions) {
+            std::stringstream ss;
+            ss << "Assemble_LumpedSystem: coefficient D, expected "
+                  "shape (" << p.numEqu << ",)";
+            throw escript::ValueError(ss.str());
         }
     }
-    if (Dudley_noError())
-    {
-        requireWrite(lumpedMat);
-        lumpedMat_p = getSampleDataRW(lumpedMat, 0);
-        
-        if (funcspace==DUDLEY_POINTS) {
-              #pragma omp parallel private(color, D_p)
-              {
-                    for (color=elements->minColor;color<=elements->maxColor;color++) {
-                      /*  open loop over all elements: */
-                      #pragma omp for private(e) schedule(static)
-                      for(e=0;e<elements->numElements;e++){
-                          if (elements->Color[e]==color) {
-                            D_p=getSampleDataRO(D, e);
-                            if (NULL!=D_p)  Dudley_Util_AddScatter(1,
-                                                        &(p.row_DOF[elements->Nodes[INDEX2(0,e,p.NN)]]),
-                                                        p.numEqu,
-                                                        D_p,
-                                                        lumpedMat_p, 
-                                                        p.row_DOF_UpperBound);
-                          } /* end color check */
-                      } /* end element loop */
-                  } /* end color loop */
-            } /* end parallel region */
-        } else {  
-              
-              len_EM_lumpedMat = p.numShapes * p.numEqu;
 
-              expandedD = D->isExpanded();
-              if (!getQuadShape(elements->numDim, reducedIntegrationOrder, &S))
-              {
-                  Dudley_setError(TYPE_ERROR, "Dudley_Assemble_LumpedSystem: Unable to locate shape function.");
-              }
-              #pragma omp parallel private(color, EM_lumpedMat, row_index, D_p, s, q, k, rtmp, diagS, m_t)
-              {
-                  EM_lumpedMat = new double[len_EM_lumpedMat];
-                  row_index = new index_t[p.numShapes];
-                  if (!Dudley_checkPtr(EM_lumpedMat) && !Dudley_checkPtr(row_index))
-                  {
-                      if (p.numEqu == 1)
-                      {         /* single equation */
-                          if (expandedD)
-                          {             /* with expanded D */
-                              for (color = elements->minColor; color <= elements->maxColor; color++)
-                              {
-                                  /*  open loop over all elements: */
-      #pragma omp for private(e) schedule(static)
-                                  for (e = 0; e < elements->numElements; e++)
-                                  {
-                                      if (elements->Color[e] == color)
-                                      {
-                                          double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-                                          D_p = getSampleDataRO(D, e);
-                                          if (useHRZ)   {
-                                            m_t = 0;    /* mass of the element: m_t */
-                                            for (q = 0; q < p.numQuad; q++)
-                                                m_t += vol * D_p[INDEX2(q, 0, p.numQuad)];
-                                            diagS = 0;  /* diagonal sum: S */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                rtmp = 0;
-                                                for (q = 0; q < p.numQuad; q++)
-                                                  rtmp +=
-                                                        vol * D_p[INDEX2(q, 0, p.numQuad)] * S[INDEX2(s, q, p.numShapes)] *
-                                                        S[INDEX2(s, q, p.numShapes)];
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
-                                                diagS += rtmp;
-                                            }
-                                            /* rescale diagonals by m_t/diagS to ensure consistent mass over element */
-                                            rtmp = m_t / diagS;
-                                            for (s = 0; s < p.numShapes; s++)
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
+    lumpedMat.requireWrite();
+    double* lumpedMat_p = lumpedMat.getSampleDataRW(0);
 
-                                          } else {/* row-sum lumping */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                rtmp = 0;
-                                                for (q = 0; q < p.numQuad; q++)
-                                                  rtmp += vol * S[INDEX2(s, q, p.numShapes)] * D_p[INDEX2(q, 0, p.numQuad)];
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
-                                            }
-                                          }
-                                          for (q = 0; q < p.numShapes; q++)
-                                          {
-                                              row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-                                          }
-                                          Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_lumpedMat, lumpedMat_p,
-                                                                p.row_DOF_UpperBound);
-                                      } /* end color check */
-                                  }     /* end element loop */
-                              } /* end color loop */
-                          }
-                          else
-                          {             /* with constant D */
+    if (funcspace==DUDLEY_POINTS) {
+#pragma omp parallel
+        {
+            for (int color=elements->minColor; color<=elements->maxColor; color++) {
+                // loop over all elements
+#pragma omp for
+                for (index_t e=0; e<elements->numElements; e++) {
+                    if (elements->Color[e]==color) {
+                        const double* D_p = D.getSampleDataRO(e);
+                        util::addScatter(1,
+                                      &p.DOF[elements->Nodes[INDEX2(0,e,p.NN)]],
+                                      p.numEqu, D_p, lumpedMat_p,
+                                      p.DOF_UpperBound);
+                    } // end color check
+                } // end element loop
+            } // end color loop
+        } // end parallel region
+    } else {
+        bool expandedD = D.actsExpanded();
+        const double *S = NULL;
+        if (!getQuadShape(elements->numDim, reducedIntegrationOrder, &S)) {
+            throw DudleyException("Assemble_LumpedSystem: Unable to locate shape function.");
+        }
+#pragma omp parallel
+        {
+            std::vector<double> EM_lumpedMat(p.numShapes * p.numEqu);
+            IndexVector row_index(p.numShapes);
 
-                              for (color = elements->minColor; color <= elements->maxColor; color++)
-                              {
-                                  /*  open loop over all elements: */
-      #pragma omp for private(e) schedule(static)
-                                  for (e = 0; e < elements->numElements; e++)
-                                  {
-                                      if (elements->Color[e] == color)
-                                      {
-                                          double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-                                          D_p = getSampleDataRO(D, e);
-                                          if (useHRZ)   {       /* HRZ lumping */
-                                            m_t = 0;    /* mass of the element: m_t */
-                                            for (q = 0; q < p.numQuad; q++)
-                                                m_t += vol;
-                                            diagS = 0;  /* diagonal sum: S */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                rtmp = 0;
-                                                for (q = 0; q < p.numQuad; q++)
-                                                {
-                                                  rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
-                                                }
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
-                                                diagS += rtmp;
-                                            }
-                                            /* rescale diagonals by m_t/diagS to ensure consistent mass over element */
-                                            rtmp = m_t / diagS * D_p[0];
-                                            for (s = 0; s < p.numShapes; s++)
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
-                                          } else {                      /* row-sum lumping */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                rtmp = 0;
-                                                for (q = 0; q < p.numQuad; q++)
-                                                  rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-                                                EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp * D_p[0];
-                                            }
-                                          }
-                                          for (q = 0; q < p.numShapes; q++)
-                                              row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-                                          Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_lumpedMat, lumpedMat_p,
-                                                                p.row_DOF_UpperBound);
-                                      } /* end color check */
-                                  }     /* end element loop */
-                              } /* end color loop */
+            if (p.numEqu == 1) { // single equation
+                if (expandedD) { // with expanded D
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
+#pragma omp for
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                const double vol = p.jac->absD[e] * p.jac->quadweight;
+                                const double* D_p = D.getSampleDataRO(e);
+                                if (useHRZ) {
+                                    double m_t = 0; // mass of the element
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        m_t += vol * D_p[INDEX2(q, 0, p.numQuad)];
+                                    double diagS = 0;  // diagonal sum
+                                    double rtmp;
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        rtmp = 0.;
+                                        for (int q = 0; q < p.numQuad; q++)
+                                            rtmp +=
+                                                vol * D_p[INDEX2(q, 0, p.numQuad)] * S[INDEX2(s, q, p.numShapes)] *
+                                                S[INDEX2(s, q, p.numShapes)];
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
+                                        diagS += rtmp;
+                                    }
+                                    // rescale diagonals by m_t/diagS to ensure
+                                    // consistent mass over element
+                                    rtmp = m_t / diagS;
+                                    for (int s = 0; s < p.numShapes; s++)
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
+                                } else { // row-sum lumping
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        double rtmp = 0.;
+                                        for (int q = 0; q < p.numQuad; q++)
+                                            rtmp += vol * S[INDEX2(s, q, p.numShapes)] * D_p[INDEX2(q, 0, p.numQuad)];
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
+                                    }
+                                }
+                                for (int q = 0; q < p.numShapes; q++)
+                                    row_index[q] = p.DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
+                                util::addScatter(p.numShapes, &row_index[0],
+                                       p.numEqu, &EM_lumpedMat[0], lumpedMat_p,
+                                       p.DOF_UpperBound);
+                            } // end color check
+                        } // end element loop
+                    } // end color loop
+                } else { // with constant D
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
+#pragma omp for
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                const double vol = p.jac->absD[e] * p.jac->quadweight;
+                                const double* D_p = D.getSampleDataRO(e);
+                                if (useHRZ) { // HRZ lumping
+                                    // mass of the element
+                                    const double m_t = vol*p.numQuad;
+                                    double diagS = 0; // diagonal sum
+                                    double rtmp;
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        rtmp = 0.;
+                                        for (int q = 0; q < p.numQuad; q++) {
+                                            rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
+                                        }
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
+                                        diagS += rtmp;
+                                    }
+                                    // rescale diagonals by m_t/diagS to ensure
+                                    // consistent mass over element
+                                    rtmp = m_t / diagS * D_p[0];
+                                    for (int s = 0; s < p.numShapes; s++)
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
+                                } else { // row-sum lumping
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        double rtmp = 0.;
+                                        for (int q = 0; q < p.numQuad; q++)
+                                            rtmp += vol * S[INDEX2(s, q, p.numShapes)];
+                                        EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp * D_p[0];
+                                    }
+                                }
+                                for (int q = 0; q < p.numShapes; q++)
+                                    row_index[q] = p.DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
+                                util::addScatter(p.numShapes, &row_index[0],
+                                       p.numEqu, &EM_lumpedMat[0], lumpedMat_p,
+                                       p.DOF_UpperBound);
+                            } // end color check
+                        } // end element loop
+                    } // end color loop
+                }
 
-                          }
-                      }
-                      else
-                      {         /* system of  equation */
-                          if (expandedD)
-                          {             /* with expanded D */
-                              for (color = elements->minColor; color <= elements->maxColor; color++)
-                              {
-                                  /*  open loop over all elements: */
-      #pragma omp for private(e) schedule(static)
-                                  for (e = 0; e < elements->numElements; e++)
-                                  {
-                                      if (elements->Color[e] == color)
-                                      {
-                                          double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-                                          D_p = getSampleDataRO(D, e);
+            } else { // system of equations
+                if (expandedD) { // with expanded D
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
+#pragma omp for
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                const double vol = p.jac->absD[e] * p.jac->quadweight;
+                                const double* D_p = D.getSampleDataRO(e);
 
-                                          if (useHRZ)   {       /* HRZ lumping */
-                                            for (k = 0; k < p.numEqu; k++)
-                                            {
-                                                m_t = 0;        /* mass of the element: m_t */
-                                                for (q = 0; q < p.numQuad; q++)
-                                                  m_t += vol * D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)];
+                                if (useHRZ) { // HRZ lumping
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        double m_t = 0; // mass of the element
+                                        for (int q = 0; q < p.numQuad; q++)
+                                            m_t += vol * D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)];
 
-                                                diagS = 0;      /* diagonal sum: S */
-                                                for (s = 0; s < p.numShapes; s++)
-                                                {
-                                                  rtmp = 0;
-                                                  for (q = 0; q < p.numQuad; q++)
-                                                        rtmp +=
-                                                            vol * D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)] *
-                                                            S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
-                                                  diagS += rtmp;
-                                                }
-                                                /* rescale diagonals by m_t/diagS to ensure consistent mass over element */
-                                                rtmp = m_t / diagS;
-                                                for (s = 0; s < p.numShapes; s++)
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] *= rtmp;
-                                            }
-                                          } else {                              /* row-sum lumping */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                for (k = 0; k < p.numEqu; k++)
-                                                {
-                                                  rtmp = 0.;
-                                                  for (q = 0; q < p.numQuad; q++)
-                                                        rtmp +=
-                                                            vol * S[INDEX2(s, q, p.numShapes)] *
-                                                            D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)];
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
-                                                }
-                                            }
-                                          }
-                                          for (q = 0; q < p.numShapes; q++)
-                                              row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-                                          Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_lumpedMat, lumpedMat_p,
-                                                                p.row_DOF_UpperBound);
-                                      } /* end color check */
-                                  }     /* end element loop */
-                              } /* end color loop */
-                          }
-                          else
-                          {             /* with constant D */
-                              for (color = elements->minColor; color <= elements->maxColor; color++)
-                              {
-                                  /*  open loop over all elements: */
-      #pragma omp for private(e) schedule(static)
-                                  for (e = 0; e < elements->numElements; e++)
-                                  {
-                                      if (elements->Color[e] == color)
-                                      {
-                                          double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-                                          D_p = getSampleDataRO(D, e);
+                                        double diagS = 0; // diagonal sum
+                                        double rtmp;
+                                        for (int s = 0; s < p.numShapes; s++) {
+                                            rtmp = 0.;
+                                            for (int q = 0; q < p.numQuad; q++)
+                                                rtmp +=
+                                                    vol * D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)] *
+                                                    S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
+                                            diagS += rtmp;
+                                        }
+                                        // rescale diagonals by m_t/diagS to
+                                        // ensure consistent mass over element
+                                        rtmp = m_t / diagS;
+                                        for (int s = 0; s < p.numShapes; s++)
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] *= rtmp;
+                                    }
+                                } else { // row-sum lumping
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            double rtmp = 0.;
+                                            for (int q = 0; q < p.numQuad; q++)
+                                                rtmp +=
+                                                    vol * S[INDEX2(s, q, p.numShapes)] *
+                                                    D_p[INDEX3(k, q, 0, p.numEqu, p.numQuad)];
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
+                                        }
+                                    }
+                                }
+                                for (int q = 0; q < p.numShapes; q++)
+                                    row_index[q] = p.DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
+                                util::addScatter(p.numShapes, &row_index[0],
+                                       p.numEqu, &EM_lumpedMat[0], lumpedMat_p,
+                                       p.DOF_UpperBound);
+                            } // end color check
+                        } // end element loop
+                    } // end color loop
+                } else { // with constant D
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
+#pragma omp for
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                const double vol = p.jac->absD[e] * p.jac->quadweight;
+                                const double* D_p = D.getSampleDataRO(e);
 
-                                          if (useHRZ)           { /* HRZ lumping */
-                                            m_t = 0;    /* mass of the element: m_t */
-                                            for (q = 0; q < p.numQuad; q++)
-                                                m_t += vol;
-                                            diagS = 0;  /* diagonal sum: S */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                rtmp = 0;
-                                                for (q = 0; q < p.numQuad; q++)
-                                                  rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
-                                                for (k = 0; k < p.numEqu; k++)
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
-                                                diagS += rtmp;
-                                            }
-                                            /* rescale diagonals by m_t/diagS to ensure consistent mass over element */
-                                            rtmp = m_t / diagS;
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                for (k = 0; k < p.numEqu; k++)
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] *= rtmp * D_p[k];
-                                            }
-                                          } else {                              /* row-sum lumping */
-                                            for (s = 0; s < p.numShapes; s++)
-                                            {
-                                                for (k = 0; k < p.numEqu; k++)
-                                                {
-                                                  rtmp = 0.;
-                                                  for (q = 0; q < p.numQuad; q++)
-                                                      rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-                                                  EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp * D_p[k];
-                                                }
-                                            }
-                                          }
-                                          for (q = 0; q < p.numShapes; q++)
-                                              row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-                                          Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_lumpedMat, lumpedMat_p,
-                                                                p.row_DOF_UpperBound);
-                                      } /* end color check */
-                                  }     /* end element loop */
-                              } /* end color loop */
-                          }
-                      }
-                  }                     /* end of pointer check */
-                  delete[] EM_lumpedMat;
-                  delete[] row_index;
-              }                 /* end parallel region */
-        }
+                                if (useHRZ) { // HRZ lumping
+                                    double m_t = vol * p.numQuad; // mass of the element
+                                    double diagS = 0; // diagonal sum
+                                    double rtmp;
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        rtmp = 0.;
+                                        for (int q = 0; q < p.numQuad; q++)
+                                            rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(s, q, p.numShapes)];
+                                        for (int k = 0; k < p.numEqu; k++)
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
+                                        diagS += rtmp;
+                                    }
+                                    // rescale diagonals by m_t/diagS to ensure
+                                    // consistent mass over element
+                                    rtmp = m_t / diagS;
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        for (int k = 0; k < p.numEqu; k++)
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] *= rtmp * D_p[k];
+                                    }
+                                } else { // row-sum lumping
+                                    for (int s = 0; s < p.numShapes; s++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            double rtmp = 0.;
+                                            for (int q = 0; q < p.numQuad; q++)
+                                                rtmp += vol * S[INDEX2(s, q, p.numShapes)];
+                                            EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp * D_p[k];
+                                        }
+                                    }
+                                }
+                                for (int q = 0; q < p.numShapes; q++)
+                                    row_index[q] = p.DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
+                                util::addScatter(p.numShapes, &row_index[0],
+                                       p.numEqu, &EM_lumpedMat[0], lumpedMat_p,
+                                       p.DOF_UpperBound);
+                            } // end color check
+                        } // end element loop
+                    } // end color loop
+                }
+            }
+        } // end parallel region
     }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_NodeCoordinates.cpp b/dudley/src/Assemble_NodeCoordinates.cpp
index 062ede2..f1f7baa 100644
--- a/dudley/src/Assemble_NodeCoordinates.cpp
+++ b/dudley/src/Assemble_NodeCoordinates.cpp
@@ -14,54 +14,42 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: copies node coordinates into an expanded Data Object */
-
-/************************************************************************************/
+#include "Assemble.h"
+#include "Util.h"
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <escript/index.h>
 
-#include "Util.h"
-#include "Assemble.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
+#include <sstream>
 
-/************************************************************************************/
+namespace dudley {
 
-void Dudley_Assemble_NodeCoordinates(Dudley_NodeFile * nodes, escript::Data * x)
+void Assemble_NodeCoordinates(const NodeFile* nodes, escript::Data& x)
 {
-    char error_msg[LenErrorMsg_MAX];
-    dim_t n;
-    size_t dim_size;
-    Dudley_resetError();
     if (nodes == NULL)
-	return;
-    if (!numSamplesEqual(x, 1, nodes->numNodes))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_NodeCoordinates: illegal number of samples of Data object");
-    }
-    else if (getFunctionSpaceType(x) != DUDLEY_NODES)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_NodeCoordinates: Data object is not defined on nodes.");
-    }
-    else if (!isExpanded(x))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_NodeCoordinates: expanded Data object expected");
-    }
-    else if (!isDataPointShapeEqual(x, 1, &(nodes->numDim)))
-    {
-	sprintf(error_msg, "Dudley_Assemble_NodeCoordinates: Data object of shape (%d,) expected", nodes->numDim);
-	Dudley_setError(TYPE_ERROR, error_msg);
-    }
-    else
-    {
-	dim_size = nodes->numDim * sizeof(double);
-	requireWrite(x);
-#pragma omp parallel for private(n)
-	for (n = 0; n < nodes->numNodes; n++)
-	    memcpy(getSampleDataRWFast(x, n), &(nodes->Coordinates[INDEX2(0, n, nodes->numDim)]), dim_size);
+        return;
+
+    const escript::DataTypes::ShapeType expectedShape(1, nodes->numDim);
+
+    if (!x.numSamplesEqual(1, nodes->getNumNodes())) {
+        throw escript::ValueError("Assemble_NodeCoordinates: illegal number of samples of Data object");
+    } else if (x.getFunctionSpace().getTypeCode() != DUDLEY_NODES) {
+        throw escript::ValueError("Assemble_NodeCoordinates: Data object is not defined on nodes.");
+    } else if (!x.actsExpanded()) {
+        throw escript::ValueError("Assemble_NodeCoordinates: expanded Data object expected");
+    } else if (x.getDataPointShape() != expectedShape) {
+        std::stringstream ss;
+        ss << "Assemble_NodeCoordinates: Data object of shape ("
+            << nodes->numDim << ",) expected.";
+        throw escript::ValueError(ss.str());
+    } else {
+        const size_t dim_size = nodes->numDim * sizeof(double);
+        x.requireWrite();
+#pragma omp parallel for
+        for (dim_t n = 0; n < nodes->getNumNodes(); n++)
+            memcpy(x.getSampleDataRW(n),
+                    &nodes->Coordinates[INDEX2(0, n, nodes->numDim)], dim_size);
     }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_PDE.cpp b/dudley/src/Assemble_PDE.cpp
index 8e7fb61..f961da2 100644
--- a/dudley/src/Assemble_PDE.cpp
+++ b/dudley/src/Assemble_PDE.cpp
@@ -14,370 +14,239 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/****************************************************************************
 
-/*    assembles the system of numEq PDEs into the stiffness matrix S and right hand side F */
+  Assembles the system of numEqu PDEs into the stiffness matrix S and right
+  hand side F:
 
-/*     -div(A*grad u)-div(B*u)+C*grad u + D*u= -div X + Y */
+      -div(A*grad u)-div(B*u)+C*grad u + D*u = -div X + Y
 
-/*      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m = -(X_{k,i})_i + Y_k */
+      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m = -(X_{k,i})_i + Y_k
 
-/*    u has numComp components. */
+  u has numEqu components.
+  Shape of the coefficients:
 
-/*    Shape of the coefficients: */
+      A = numEqu x numDim x numEqu x numDim
+      B = numDim x numEqu x numEqu
+      C = numEqu x numDim x numEqu
+      D = numEqu x numEqu
+      X = numEqu x numDim
+      Y = numEqu
 
-/*      A = numEqu x numDim x numComp x numDim */
-/*      B = numDim x numEqu x numComp  */
-/*      C = numEqu x numDim x numComp  */
-/*      D = numEqu x numComp  */
-/*      X = numEqu x numDim   */
-/*      Y = numEqu */
+  The coefficients A,B,C,D,X and Y have to be defined on the integration points
+  or not present (i.e. empty).
 
-/*    The coefficients A,B,C,D,X and Y have to be defined on the integration points or not present (=NULL). */
+  S and F have to be initialised before the routine is called. S or F can be
+  NULL. In this case the left or the right hand side of the PDE is not
+  processed.
 
-/*    S and F have to be initialised before the routine is called. S or F can be NULL. In this case the left or */
-/*    the right hand side of the PDE is not processed.  */
+  The routine does not consider any boundary conditions.
 
-/*    The routine does not consider any boundary conditions. */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+*****************************************************************************/
 
 #include "Assemble.h"
 #include "Util.h"
-#include "esysUtils/blocktimer.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE(Dudley_NodeFile* nodes, Dudley_ElementFile* elements,
-                         paso::SystemMatrix_ptr S, escript::Data* F,
-                         const escript::Data* A, const escript::Data* B, const escript::Data* C,
-                         const escript::Data* D, const escript::Data* X, const escript::Data* Y)
-{
-    bool reducedIntegrationOrder = false;
-    char error_msg[LenErrorMsg_MAX];
-    Dudley_Assemble_Parameters p;
-    dim_t dimensions[ESCRIPT_MAX_DATA_RANK];
-    type_t funcspace;
-    double blocktimer_start = blocktimer_time();
 
-    Dudley_resetError();
+namespace dudley {
 
-    if (nodes == NULL || elements == NULL)
-	return;
-    if (S == NULL && isEmpty(F))
-	return;
-    if (isEmpty(F) && ( !isEmpty(X) || !isEmpty(Y) ) ) 
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_PDE: right hand side coefficients are non-zero but no right hand side vector given.");
-    }
-
-    if (S == NULL && !isEmpty(A) && !isEmpty(B) && !isEmpty(C) && !isEmpty(D))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: coefficients are non-zero but no matrix is given.");
-    }
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
 
-    /*  get the functionspace for this assemblage call */
-    funcspace = UNKNOWN;
-    updateFunctionSpaceType(funcspace, A);
-    updateFunctionSpaceType(funcspace, B);
-    updateFunctionSpaceType(funcspace, C);
-    updateFunctionSpaceType(funcspace, D);
-    updateFunctionSpaceType(funcspace, X);
-    updateFunctionSpaceType(funcspace, Y);
-    if (funcspace == UNKNOWN)
-	return;			/* all  data are empty */
-
-    /* check if all function spaces are the same */
-    if (!functionSpaceTypeEqual(funcspace, A))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient A");
-    }
-    if (!functionSpaceTypeEqual(funcspace, B))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient B");
-    }
-    if (!functionSpaceTypeEqual(funcspace, C))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient C");
-    }
-    if (!functionSpaceTypeEqual(funcspace, D))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient D");
-    }
-    if (!functionSpaceTypeEqual(funcspace, X))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient X");
-    }
-    if (!functionSpaceTypeEqual(funcspace, Y))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: unexpected function space type for coefficient Y");
-    }
-    if (!Dudley_noError())
-	return;
-
-    /* check if all function spaces are the same */
-    if (funcspace == DUDLEY_ELEMENTS)
-    {
-	reducedIntegrationOrder = false;
-    }
-    else if (funcspace == DUDLEY_FACE_ELEMENTS)
-    {
-	reducedIntegrationOrder = false;
-    }
-    else if (funcspace == DUDLEY_REDUCED_ELEMENTS)
-    {
-	reducedIntegrationOrder = true;
-    }
-    else if (funcspace == DUDLEY_REDUCED_FACE_ELEMENTS)
-    {
-	reducedIntegrationOrder = true;
-    }
-    else if (funcspace == DUDLEY_POINTS)
-    {
-	reducedIntegrationOrder = true;
-    }
-    else
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: assemblage failed because of illegal function space.");
-    }
-    if (!Dudley_noError())
-	return;
-
-    /* set all parameters in p */
-    Dudley_Assemble_getAssembleParameters(nodes, elements, S, F, reducedIntegrationOrder, &p);
-    if (!Dudley_noError())
-	return;
-
-    /* check if all function spaces are the same */
-
-    if (!numSamplesEqual(A, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient A don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
-    }
-
-    if (!numSamplesEqual(B, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient B don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
-    }
-
-    if (!numSamplesEqual(C, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient C don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
-    }
-
-    if (!numSamplesEqual(D, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient D don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
-    }
+inline void setNumSamplesError(const char* c, int n0, int n1)
+{
+    std::stringstream ss;
+    ss << "Assemble_PDE: number of sample points of coefficient " << c
+        << " don't match (" << n0 << "," << n1 << ").";
+    const std::string errorMsg(ss.str());
+    throw DudleyException(errorMsg);
+}
 
-    if (!numSamplesEqual(X, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient X don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
+inline void setShapeError(const char* c, int num, const int* dims)
+{
+    std::stringstream ss;
+    ss << "Assemble_PDE: shape of coefficient " << c
+        << " does not match (" << dims[0] << ",";
+    if (num > 1) {
+       ss << dims[1];
+       if (num > 2) {
+           ss << "," << dims[2];
+           if (num > 3) {
+               ss << "," << dims[3];
+           }
+       }
     }
+    ss << ").";
+    const std::string errorMsg(ss.str());
+    throw DudleyException(errorMsg);
+}
 
-    if (!numSamplesEqual(Y, p.numQuad, elements->numElements))
-    {
-	sprintf(error_msg, "Dudley_Assemble_PDE: sample points of coefficient Y don't match (%d,%d)", p.numQuad,
-		elements->numElements);
-	Dudley_setError(TYPE_ERROR, error_msg);
+void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
+                  escript::ASM_ptr S, escript::Data& F,
+                  const escript::Data& A, const escript::Data& B,
+                  const escript::Data& C, const escript::Data& D,
+                  const escript::Data& X, const escript::Data& Y)
+{
+    if (!nodes || !elements || (S.get()==NULL && F.isEmpty()))
+        return;
+
+    if (F.isEmpty() && (!X.isEmpty() || !Y.isEmpty())) {
+        throw DudleyException("Assemble_PDE: right hand side coefficients are non-zero but no right hand side vector given.");
+    }
+
+    if (S.get()==NULL && !A.isEmpty() && !B.isEmpty() && !C.isEmpty() && !D.isEmpty()) {
+        throw DudleyException("Assemble_PDE: coefficients are non-zero but no matrix is given.");
+    }
+
+    // get the functionspace for this assemblage call
+    int funcspace = -1;
+    if (!A.isEmpty()) funcspace=A.getFunctionSpace().getTypeCode();
+    if (!B.isEmpty()) funcspace=B.getFunctionSpace().getTypeCode();
+    if (!C.isEmpty()) funcspace=C.getFunctionSpace().getTypeCode();
+    if (!D.isEmpty()) funcspace=D.getFunctionSpace().getTypeCode();
+    if (!X.isEmpty()) funcspace=X.getFunctionSpace().getTypeCode();
+    if (!Y.isEmpty()) funcspace=Y.getFunctionSpace().getTypeCode();
+    if (funcspace == -1)
+        return; // all data are empty
+
+    // check if all function spaces are the same
+    if (!A.isEmpty() && A.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient A");
+    } else if (!B.isEmpty() && B.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient B");
+    } else if (!C.isEmpty() && C.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient C");
+    } else if (!D.isEmpty() && D.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient D");
+    } else if (!X.isEmpty() && X.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient X");
+    } else if (!Y.isEmpty() && Y.getFunctionSpace().getTypeCode()!=funcspace) {
+        throw DudleyException("Assemble_PDE: unexpected function space type for coefficient Y");
+    }
+
+    // get value type
+    bool isComplex = false;
+    isComplex = isComplex || (!A.isEmpty() && A.isComplex());
+    isComplex = isComplex || (!B.isEmpty() && B.isComplex());
+    isComplex = isComplex || (!C.isEmpty() && C.isComplex());
+    isComplex = isComplex || (!D.isEmpty() && D.isComplex());
+    isComplex = isComplex || (!X.isEmpty() && X.isComplex());
+    isComplex = isComplex || (!Y.isEmpty() && Y.isComplex());
+
+    bool reducedIntegrationOrder;
+    if (funcspace == DUDLEY_ELEMENTS) {
+        reducedIntegrationOrder = false;
+    } else if (funcspace == DUDLEY_FACE_ELEMENTS) {
+        reducedIntegrationOrder = false;
+    } else if (funcspace == DUDLEY_REDUCED_ELEMENTS) {
+        reducedIntegrationOrder = true;
+    } else if (funcspace == DUDLEY_REDUCED_FACE_ELEMENTS) {
+        reducedIntegrationOrder = true;
+    } else if (funcspace == DUDLEY_POINTS) {
+        reducedIntegrationOrder = true;
+    } else {
+        throw DudleyException("Assemble_PDE: assemblage failed because of illegal function space.");
+    }
+
+    // get assemblage parameters
+    AssembleParameters p(nodes, elements, S, F, reducedIntegrationOrder);
+
+    // check if sample numbers are the same
+    if (!A.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("A", p.numQuad, elements->numElements);
+    } else if (!B.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("B", p.numQuad, elements->numElements);
+    } else if (!C.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("C", p.numQuad, elements->numElements);
+    } else if (!D.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("D", p.numQuad, elements->numElements);
+    } else if (!X.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("X", p.numQuad, elements->numElements);
+    } else if (!Y.numSamplesEqual(p.numQuad, elements->numElements)) {
+        setNumSamplesError("Y", p.numQuad, elements->numElements);
+    }
+
+    // check the dimensions
+    if (p.numEqu == 1) {
+        const int dimensions[2] = { p.numDim, p.numDim };
+        if (!A.isDataPointShapeEqual(2, dimensions)) {
+            setShapeError("A", 2, dimensions);
+        } else if (!B.isDataPointShapeEqual(1, dimensions)) {
+            setShapeError("B", 1, dimensions);
+        } else if (!C.isDataPointShapeEqual(1, dimensions)) {
+            setShapeError("C", 1, dimensions);
+        } else if (!D.isDataPointShapeEqual(0, dimensions)) {
+            throw DudleyException("Assemble_PDE: coefficient D must be rank 0.");
+        } else if (!X.isDataPointShapeEqual(1, dimensions)) {
+            setShapeError("X", 1, dimensions);
+        } else if (!Y.isDataPointShapeEqual(0, dimensions)) {
+            throw DudleyException("Assemble_PDE: coefficient Y must be rank 0.");
+        }
+    } else {
+        const int dimAB[4] = { p.numEqu, p.numDim, p.numEqu, p.numDim };
+        const int dimCD[3] = { p.numEqu, p.numEqu, p.numDim };
+        if (!A.isDataPointShapeEqual(4, dimAB)) {
+            setShapeError("A", 4, dimAB);
+        } else if (!B.isDataPointShapeEqual(3, dimAB)) {
+            setShapeError("B", 3, dimAB);
+        } else if (!C.isDataPointShapeEqual(3, dimCD)) {
+            setShapeError("C", 3, dimCD);
+        } else if (!D.isDataPointShapeEqual(2, dimCD)) {
+            setShapeError("D", 2, dimCD);
+        } else if (!X.isDataPointShapeEqual(2, dimAB)) {
+            setShapeError("X", 2, dimAB);
+        } else if (!Y.isDataPointShapeEqual(1, dimAB)) {
+            setShapeError("Y", 1, dimAB);
+        }
+    }
+
+    if (funcspace==DUDLEY_POINTS) {
+        if (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !X.isEmpty()) {
+            throw DudleyException("Dudley_Assemble_PDE: Point elements require A, B, C and X to be empty.");
+        } else {
+            if (isComplex) {
+                Assemble_PDE_Points<cplx_t>(p, D, Y);
+            } else {
+                Assemble_PDE_Points<real_t>(p, D, Y);
+            }
+        }
+    } else {
+        if (p.numEqu > 1) {
+            // system of PDEs
+            if (p.numDim == 3) {
+                if (isComplex) {
+                    Assemble_PDE_System_3D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_System_3D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 2) {
+                if (isComplex) {
+                    Assemble_PDE_System_2D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_System_2D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else {
+                throw DudleyException("Assemble_PDE supports spatial dimensions 2 and 3 only.");
+            }
+        } else {
+            // single PDE
+            if (p.numDim == 3) {
+                if (isComplex) {
+                    Assemble_PDE_Single_3D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_Single_3D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 2) {
+                if (isComplex) {
+                    Assemble_PDE_Single_2D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_Single_2D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else {
+                throw DudleyException("Assemble_PDE supports spatial dimensions 2 and 3 only.");
+            }
+        }
     }
+}
 
-    /*  check the dimensions: */
+} // namespace dudley
 
-    if (p.numEqu == 1 && p.numComp == 1)
-    {
-	if (!isEmpty(A))
-	{
-	    dimensions[0] = p.numDim;
-	    dimensions[1] = p.numDim;
-	    if (!isDataPointShapeEqual(A, 2, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient A: illegal shape, expected shape (%d,%d)",
-			dimensions[0], dimensions[1]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(B))
-	{
-	    dimensions[0] = p.numDim;
-	    if (!isDataPointShapeEqual(B, 1, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient B: illegal shape (%d,)", dimensions[0]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(C))
-	{
-	    dimensions[0] = p.numDim;
-	    if (!isDataPointShapeEqual(C, 1, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient C, expected shape (%d,)", dimensions[0]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(D))
-	{
-	    if (!isDataPointShapeEqual(D, 0, dimensions))
-	    {
-		Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: coefficient D, rank 0 expected.");
-	    }
-	}
-	if (!isEmpty(X))
-	{
-	    dimensions[0] = p.numDim;
-	    if (!isDataPointShapeEqual(X, 1, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient X, expected shape (%d,", dimensions[0]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(Y))
-	{
-	    if (!isDataPointShapeEqual(Y, 0, dimensions))
-	    {
-		Dudley_setError(TYPE_ERROR, "Dudley_Assemble_PDE: coefficient Y, rank 0 expected.");
-	    }
-	}
-    }
-    else
-    {
-	if (!isEmpty(A))
-	{
-	    dimensions[0] = p.numEqu;
-	    dimensions[1] = p.numDim;
-	    dimensions[2] = p.numComp;
-	    dimensions[3] = p.numDim;
-	    if (!isDataPointShapeEqual(A, 4, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient A, expected shape (%d,%d,%d,%d)", dimensions[0],
-			dimensions[1], dimensions[2], dimensions[3]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(B))
-	{
-	    dimensions[0] = p.numEqu;
-	    dimensions[1] = p.numDim;
-	    dimensions[2] = p.numComp;
-	    if (!isDataPointShapeEqual(B, 3, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient B, expected shape (%d,%d,%d)", dimensions[0],
-			dimensions[1], dimensions[2]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(C))
-	{
-	    dimensions[0] = p.numEqu;
-	    dimensions[1] = p.numComp;
-	    dimensions[2] = p.numDim;
-	    if (!isDataPointShapeEqual(C, 3, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient C, expected shape (%d,%d,%d)", dimensions[0],
-			dimensions[1], dimensions[2]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(D))
-	{
-	    dimensions[0] = p.numEqu;
-	    dimensions[1] = p.numComp;
-	    if (!isDataPointShapeEqual(D, 2, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient D, expected shape (%d,%d)", dimensions[0],
-			dimensions[1]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(X))
-	{
-	    dimensions[0] = p.numEqu;
-	    dimensions[1] = p.numDim;
-	    if (!isDataPointShapeEqual(X, 2, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient X, expected shape (%d,%d)", dimensions[0],
-			dimensions[1]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-	if (!isEmpty(Y))
-	{
-	    dimensions[0] = p.numEqu;
-	    if (!isDataPointShapeEqual(Y, 1, dimensions))
-	    {
-		sprintf(error_msg, "Dudley_Assemble_PDE: coefficient Y, expected shape (%d,)", dimensions[0]);
-		Dudley_setError(TYPE_ERROR, error_msg);
-	    }
-	}
-    }
-    if (Dudley_noError())
-    {      
-            if (funcspace==DUDLEY_POINTS) {
-	         if ( !isEmpty(A) || !isEmpty(B) || !isEmpty(C) || !isEmpty(X) ) {
-                         Dudley_setError(TYPE_ERROR,"Finley_Assemble_PDE: Point elements require A, B, C and X to be empty.");
-                  } else {
-	              Dudley_Assemble_PDE_Points(p, elements,S,F, D, Y);
-		  }
-           }
-           else
-	   {
-	      if (p.numEqu == p.numComp)
-	      {
-		  if (p.numEqu > 1)
-		  {
-		      /* system of PDESs */
-		      if (p.numDim == 3)
-		      {
-			  Dudley_Assemble_PDE_System2_3D(p, elements, S, F, A, B, C, D, X, Y);
-		      }
-		      else if (p.numDim == 2)
-		      {
-			  Dudley_Assemble_PDE_System2_2D(p, elements, S, F, A, B, C, D, X, Y);
-		      }
-		      else
-		      {
-			  Dudley_setError(VALUE_ERROR, "Dudley_Assemble_PDE supports spatial dimensions 2 and 3 only.");
-		      }
-		  }
-		  else
-		  {
-		      /* single PDES */
-		      if (p.numDim == 3)
-		      {
-			  Dudley_Assemble_PDE_Single2_3D(p, elements, S, F, A, B, C, D, X, Y);
-		      }
-		      else if (p.numDim == 2)
-		      {
-			  Dudley_Assemble_PDE_Single2_2D(p, elements, S, F, A, B, C, D, X, Y);
-		      }
-		      else
-		      {
-			  Dudley_setError(VALUE_ERROR, "Dudley_Assemble_PDE supports spatial dimensions 2 and 3 only.");
-		      }
-		  }
-	    }
-	    else
-	    {
-		Dudley_setError(VALUE_ERROR, "Dudley_Assemble_PDE requires number of equations == number of solutions  .");
-	    }
-	  }
-    }
-    blocktimer_increment("Dudley_Assemble_PDE()", blocktimer_start);
-}
diff --git a/dudley/src/Assemble_PDE_Points.cpp b/dudley/src/Assemble_PDE_Points.cpp
index 6535432..7de8caf 100644
--- a/dudley/src/Assemble_PDE_Points.cpp
+++ b/dudley/src/Assemble_PDE_Points.cpp
@@ -15,77 +15,80 @@
 *****************************************************************************/
 
 
-/************************************************************************************/
+/****************************************************************************
 
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
+  Assembles the system of numEqu PDEs into the stiffness matrix S and right
+  hand side F
 
+      d_dirac_{k,m} u_m and y_dirac_k
 
-/*      d_dirac_{k,m} u_m yand _dirac_k */
+  u has p.numEqu components in a 3D domain.
+  The shape functions for test and solution must be identical and
+  row_NS == row_NN.
 
-/*    u has p.numComp components in a 3D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                                                  */
+  Shape of the coefficients:
 
-/*    Shape of the coefficients: */
+      d_dirac = p.numEqu x p.numEqu
+      y_dirac = p.numEqu
 
-/*      d_dirac = p.numEqu x p.numComp  */
-/*      y_dirac = p.numEqu   */
 
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+*****************************************************************************/
 
 #include "Assemble.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
 
-/************************************************************************************/
+#include <escript/index.h>
 
-void  Dudley_Assemble_PDE_Points(Dudley_Assemble_Parameters p,
-                                 Dudley_ElementFile* elements,
-                                 paso::SystemMatrix_ptr Mat, escript::Data* F,
-                                 const escript::Data* d_dirac, const escript::Data* y_dirac) {
+namespace dudley {
 
-    index_t color, e, row_index;
-    __const double  *d_dirac_p, *y_dirac_p;
-    
-    double *F_p=(requireWrite(F), getSampleDataRW(F,0));	/* use comma, to get around the mixed code and declarations thing */
+template<typename Scalar>
+void Assemble_PDE_Points(const AssembleParameters& p,
+                         const escript::Data& d_dirac,
+                         const escript::Data& y_dirac)
+{
+    Scalar* F_p = NULL;
+    const Scalar zero = static_cast<Scalar>(0);
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
+    }
 
-    #pragma omp parallel private(color, d_dirac_p, y_dirac_p)
+#pragma omp parallel
     {
-          for (color=elements->minColor;color<=elements->maxColor;color++) {
-             /*  open loop over all elements: */
-             #pragma omp for private(e) schedule(static)
-             for(e=0;e<elements->numElements;e++){
-                if (elements->Color[e]==color) {
+        std::vector<index_t> rowIndex(1);
+        std::vector<Scalar> values(p.numEqu*p.numEqu);
+
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
+#pragma omp for
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    rowIndex[0] = p.DOF[p.elements->Nodes[INDEX2(0,e,p.NN)]];
+                    if (!y_dirac.isEmpty()) {
+                        const Scalar* y_dirac_p = y_dirac.getSampleDataRO(e, zero);
+                        util::addScatter(1, &rowIndex[0], p.numEqu,
+                                         y_dirac_p, F_p, p.DOF_UpperBound);
+                    }
                    
-		   d_dirac_p=getSampleDataRO(d_dirac, e);
-                   y_dirac_p=getSampleDataRO(y_dirac, e);
-		   
-                   row_index=p.row_DOF[elements->Nodes[INDEX2(0,e,p.NN)]];
-		   
-		   if (NULL!=y_dirac_p)  Dudley_Util_AddScatter(1,
-                                                        &row_index,
-                                                        p.numEqu,
-                                                        y_dirac_p,
-                                                        F_p, 
-                                                        p.row_DOF_UpperBound);
-		   
-                   if (NULL!=d_dirac_p) Dudley_Assemble_addToSystemMatrix(Mat,
-                                                                   1,
-                                                                   &row_index,
-                                                                   p.numEqu,
-                                                                   1,
-                                                                   &row_index,
-                                                                   p.numComp,
-                                                                   d_dirac_p);
-                } /* end color check */
-             } /* end element loop */
-         } /* end color loop */
-   } /* end parallel region */
+                    if (!d_dirac.isEmpty()) {
+                        const Scalar* EM_S = d_dirac.getSampleDataRO(e, zero);
+                        values.assign(EM_S, EM_S+p.numEqu*p.numEqu);
+                        Assemble_addToSystemMatrix(p.S, rowIndex, p.numEqu,
+                                                   values);
+                    }
+                } // end color check
+            } // end element loop
+        } // end color loop
+    } // end parallel region
 }
+
+// instantiate our two supported versions
+template void Assemble_PDE_Points<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& d, const escript::Data& y);
+template void Assemble_PDE_Points<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& d, const escript::Data& y);
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_PDE_Single2_1D.cpp b/dudley/src/Assemble_PDE_Single2_1D.cpp
deleted file mode 100644
index b7cf147..0000000
--- a/dudley/src/Assemble_PDE_Single2_1D.cpp
+++ /dev/null
@@ -1,356 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{i,j} u_,j)_i-(B_{i} u)_i+C_{j} u_,j-D u_m  and -(X_,i)_i + Y */
-
-/*    in a 1D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                         */
-
-/*    Shape of the coefficients: */
-
-/*      A = 1 x 1 */
-/*      B = 1   */
-/*      C = 1   */
-/*      D = scalar  */
-/*      X = 1  */
-/*      Y = scalar   */
-
-/************************************************************************************/
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_Single2_1D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escriptDataC * F,
-				    escriptDataC * A, escriptDataC * B, escriptDataC * C, escriptDataC * D,
-				    escriptDataC * X, escriptDataC * Y)
-{
-
-#define DIM 1
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r;
-    register double rtmp;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    double *S = p.row_jac->BasisFunctions->S;
-    dim_t len_EM_S = p.row_numShapesTotal * p.row_numShapesTotal;
-    dim_t len_EM_F = p.row_numShapesTotal;
-
-#pragma omp parallel private(color, EM_S, EM_F, Vol, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q, row_index, q, s,r,rtmp,add_EM_F, add_EM_S)
-    {
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.row_numShapesTotal];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-
-			A_p = getSampleDataRO(A, e);
-			C_p = getSampleDataRO(C, e);
-			B_p = getSampleDataRO(B, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.row_numShapesTotal, DIM, p.numQuadTotal, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-		      /************************************************************************************/
-			/*   process A: */
-		      /************************************************************************************/
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX4(0, 0, 0, 0, DIM, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					{
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)] *
-						A_q[INDEX3(0, 0, q, DIM, DIM)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapesTotal, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapesTotal, DIM)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] +=
-					    rtmp * A_p[INDEX2(0, 0, DIM)];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process B: */
-		      /************************************************************************************/
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX3(0, 0, 0, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					{
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)] *
-						B_q[INDEX2(0, q, DIM)] * S[INDEX2(r, q, p.row_numShapes)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)] *
-						S[INDEX2(r, q, p.row_numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] +=
-					    rtmp * B_p[0];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process C: */
-		      /************************************************************************************/
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX3(0, 0, 0, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					{
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] * C_q[INDEX2(0, q, DIM)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapesTotal, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapesTotal, DIM)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] +=
-					    rtmp * C_p[0];
-				    }
-				}
-			    }
-			}
-		      /*********************************************************************************** */
-			/* process D */
-		      /************************************************************************************/
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX2(0, 0, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					{
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] * D_q[q] *
-						S[INDEX2(r, q, p.row_numShapes)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] *
-						S[INDEX2(r, q, p.row_numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.row_numShapesTotal)] +=
-					    rtmp * D_p[0];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process X: */
-		      /************************************************************************************/
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX3(0, 0, 0, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp +=
-					    vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)] *
-					    X_q[INDEX2(0, q, DIM)];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp += vol * DSDX[INDEX3(s, 0, q, p.row_numShapesTotal, DIM)];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp * X_p[0];
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process Y: */
-		     /************************************************************************************/
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX2(0, 0, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp += vol * S[INDEX2(s, q, p.row_numShapes)] * Y_q[q];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp += vol * S[INDEX2(s, q, p.row_numShapes)];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp * Y_p[0];
-				}
-			    }
-			}
-		       /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		       /*********************************************************************************************************************/
-			for (q = 0; q < p.row_numShapesTotal; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.row_numShapesTotal, row_index, p.numEqu, EM_F, F_p,
-						   p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.row_numShapesTotal, row_index, p.numEqu,
-							      p.row_numShapesTotal, row_index, p.numComp, EM_S);
-
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;	/* these FREEs appear to be inside the if because if any of the allocs */
-	    delete[] EM_F;	/* failed it means an out of memory (which is not recoverable anyway) */
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
-
-/*
- * $Log$
- */
diff --git a/dudley/src/Assemble_PDE_Single2_2D.cpp b/dudley/src/Assemble_PDE_Single2_2D.cpp
deleted file mode 100644
index 1bbb523..0000000
--- a/dudley/src/Assemble_PDE_Single2_2D.cpp
+++ /dev/null
@@ -1,393 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{i,j} u_,j)_i-(B_{i} u)_i+C_{j} u_,j-D u_m  and -(X_,i)_i + Y */
-
-/*    in a 2D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                         */
-
-/*    Shape of the coefficients: */
-
-/*      A = 2 x 2 */
-/*      B = 2   */
-/*      C = 2   */
-/*      D = scalar  */
-/*      X = 2  */
-/*      Y = scalar   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "ShapeTable.h"
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_Single2_2D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escript::Data* F,
-				    const escript::Data* A, const escript::Data* B, const escript::Data* C, const escript::Data* D,
-				    const escript::Data* X, const escript::Data* Y)
-{
-
-#define DIM 2
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r;
-    register double rtmp00, rtmp01, rtmp10, rtmp11, rtmp, rtmp0, rtmp1;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    const double *S = p.shapeFns;
-    dim_t len_EM_S = p.numShapes * p.numShapes;
-    dim_t len_EM_F = p.numShapes;
-
-#pragma omp parallel private(color,EM_S, EM_F, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q,row_index,q, s,r,rtmp00, rtmp01, rtmp10, rtmp11, rtmp, rtmp0, rtmp1,add_EM_F, add_EM_S)
-    {
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.numShapes];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-
-			A_p = getSampleDataRO(A, e);
-			B_p = getSampleDataRO(B, e);
-			C_p = getSampleDataRO(C, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-
-
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-		     /************************************************************************************/
-			/*   process A: */
-		     /************************************************************************************/
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX4(0, 0, 0, 0, DIM, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(0, 0, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(0, 1, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(1, 0, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(1, 1, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
-
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp00 = 0;
-					rtmp01 = 0;
-					rtmp10 = 0;
-					rtmp11 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp00 += rtmp0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp01 += rtmp0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp10 += rtmp1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp11 += rtmp1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp00 * A_p[INDEX2(0, 0, DIM)] + rtmp01 * A_p[INDEX2(0, 1, DIM)] +
-					    rtmp10 * A_p[INDEX2(1, 0, DIM)] + rtmp11 * A_p[INDEX2(1, 1, DIM)];
-				    }
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process B: */
-		     /************************************************************************************/
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0.;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * S[INDEX2(r, q, p.numShapes)] *
-						(DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						 B_q[INDEX2(0, q, DIM)] +
-						 DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] * B_q[INDEX2(1, q, DIM)]);
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(r, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp0 * B_p[0] + rtmp1 * B_p[1];
-				    }
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process C: */
-		     /************************************************************************************/
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.numShapes)] * (C_q[INDEX2(0, q, DIM)] *
-										      DSDX[INDEX3
-											   (r, 0, q,
-											    p.numShapes,
-											    DIM)] + C_q[INDEX2(1, q,
-													       DIM)]
-										      *
-										      DSDX[INDEX3
-											   (r, 1, q,
-											    p.numShapes, DIM)]);
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(s, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp0 * C_p[0] + rtmp1 * C_p[1];
-				    }
-				}
-			    }
-			}
-		     /*********************************************************************************** */
-			/* process D */
-		     /************************************************************************************/
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX2(0, 0, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.numShapes)] * D_q[q] *
-						S[INDEX2(r, q, p.numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp * D_p[0];
-				    }
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process X: */
-		     /************************************************************************************/
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0.;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp +=
-					    vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						   X_q[INDEX2(0, q, DIM)] +
-						   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] * X_q[INDEX2(1, q, DIM)]);
-				    }
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp0 = 0.;
-				    rtmp1 = 0.;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					rtmp1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-				    }
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp0 * X_p[0] + rtmp1 * X_p[1];
-				}
-			    }
-			}
-		    /************************************************************************************/
-			/*   process Y: */
-		    /************************************************************************************/
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX2(0, 0, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[q];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp * Y_p[0];
-				}
-			    }
-			}
-		      /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		      /*********************************************************************************************************************/
-
-			for (q = 0; q < p.numShapes; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_F, F_p, p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.numShapes, row_index, p.numEqu,
-							      p.numShapes, row_index, p.numComp, EM_S);
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;
-	    delete[] EM_F;
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
diff --git a/dudley/src/Assemble_PDE_Single2_3D.cpp b/dudley/src/Assemble_PDE_Single2_3D.cpp
deleted file mode 100644
index 82daca1..0000000
--- a/dudley/src/Assemble_PDE_Single2_3D.cpp
+++ /dev/null
@@ -1,428 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{i,j} u_,j)_i-(B_{i} u)_i+C_{j} u_,j-D u_m  and -(X_,i)_i + Y */
-
-/*    in a 3D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                         */
-
-/*    Shape of the coefficients: */
-
-/*      A = 3 x 3 */
-/*      B = 3   */
-/*      C = 3   */
-/*      D = scalar  */
-/*      X = 3  */
-/*      Y = scalar   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_Single2_3D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escript::Data* F,
-				    const escript::Data* A, const escript::Data* B, const escript::Data* C, const escript::Data* D,
-				    const escript::Data* X, const escript::Data* Y)
-{
-
-#define DIM 3
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r;
-    register double rtmp, rtmp00, rtmp01, rtmp02, rtmp10, rtmp11, rtmp12, rtmp20, rtmp21, rtmp22, rtmp0, rtmp1, rtmp2;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    const double *S = p.shapeFns;
-    dim_t len_EM_S = p.numShapes * p.numShapes;
-    dim_t len_EM_F = p.numShapes;
-
-#pragma omp parallel private(color,EM_S, EM_F, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q,row_index,q, s,r,rtmp, rtmp00, rtmp01, rtmp02, rtmp10, rtmp11, rtmp12, rtmp20, rtmp21, rtmp22, rtmp0, rtmp1, rtmp2,add_EM_F, add_EM_S)
-    {
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.numShapes];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-
-			A_p = getSampleDataRO(A, e);
-			B_p = getSampleDataRO(B, e);
-			C_p = getSampleDataRO(C, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-
-		      /************************************************************************************/
-			/*   process A: */
-		      /************************************************************************************/
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX4(0, 0, 0, 0, DIM, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(0, 0, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(0, 1, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(0, 2, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(1, 0, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(1, 1, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(1, 2, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(2, 0, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(2, 1, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-						       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-						       A_q[INDEX3(2, 2, q, DIM, DIM)] *
-						       DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp00 = 0;
-					rtmp01 = 0;
-					rtmp02 = 0;
-					rtmp10 = 0;
-					rtmp11 = 0;
-					rtmp12 = 0;
-					rtmp20 = 0;
-					rtmp21 = 0;
-					rtmp22 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-
-					    rtmp0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp00 += rtmp0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp01 += rtmp0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp02 += rtmp0 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-
-					    rtmp1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp10 += rtmp1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp11 += rtmp1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp12 += rtmp1 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-
-					    rtmp2 = vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-					    rtmp20 += rtmp2 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp21 += rtmp2 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp22 += rtmp2 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp00 * A_p[INDEX2(0, 0, DIM)] + rtmp01 * A_p[INDEX2(0, 1, DIM)] +
-					    rtmp02 * A_p[INDEX2(0, 2, DIM)] + rtmp10 * A_p[INDEX2(1, 0, DIM)] +
-					    rtmp11 * A_p[INDEX2(1, 1, DIM)] + rtmp12 * A_p[INDEX2(1, 2, DIM)] +
-					    rtmp20 * A_p[INDEX2(2, 0, DIM)] + rtmp21 * A_p[INDEX2(2, 1, DIM)] +
-					    rtmp22 * A_p[INDEX2(2, 2, DIM)];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process B: */
-		      /************************************************************************************/
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp += vol * S[INDEX2(r, q, p.numShapes)] *
-						(DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						 B_q[INDEX2(0, q, DIM)] +
-						 DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						 B_q[INDEX2(1, q, DIM)] +
-						 DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] * B_q[INDEX2(2, q, DIM)]);
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					rtmp2 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(r, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp2 += rtmp * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp0 * B_p[0] + rtmp1 * B_p[1] + rtmp2 * B_p[2];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process C: */
-		      /************************************************************************************/
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] *
-						(C_q[INDEX2(0, q, DIM)] *
-						 DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-						 C_q[INDEX2(1, q, DIM)] *
-						 DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-						 C_q[INDEX2(2, q, DIM)] * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					rtmp2 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(s, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp2 += rtmp * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-					}
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-					    rtmp0 * C_p[0] + rtmp1 * C_p[1] + rtmp2 * C_p[2];
-				    }
-				}
-			    }
-			}
-		      /*********************************************************************************** */
-			/* process D */
-		      /************************************************************************************/
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX2(0, 0, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.numShapes)] * D_q[q] *
-						S[INDEX2(r, q, p.numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
-					EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp * D_p[0];
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process X: */
-		      /************************************************************************************/
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX3(0, 0, 0, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp +=
-					    vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						   X_q[INDEX2(0, q, DIM)] +
-						   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						   X_q[INDEX2(1, q, DIM)] +
-						   DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] * X_q[INDEX2(2, q, DIM)]);
-				    }
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp0 = 0;
-				    rtmp1 = 0;
-				    rtmp2 = 0;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					rtmp1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					rtmp2 += vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-				    }
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp0 * X_p[0] + rtmp1 * X_p[1] + rtmp2 * X_p[2];
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process Y: */
-		     /************************************************************************************/
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX2(0, 0, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[q];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp;
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-				    EM_F[INDEX2(0, s, p.numEqu)] += rtmp * Y_p[0];
-				}
-			    }
-			}
-		       /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		       /*********************************************************************************************************************/
-
-			for (q = 0; q < p.numShapes; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_F, F_p, p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.numShapes, row_index, p.numEqu,
-							      p.numShapes, row_index, p.numComp, EM_S);
-
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;
-	    delete[] EM_F;
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
diff --git a/dudley/src/Assemble_PDE_Single_2D.cpp b/dudley/src/Assemble_PDE_Single_2D.cpp
new file mode 100644
index 0000000..4772613
--- /dev/null
+++ b/dudley/src/Assemble_PDE_Single_2D.cpp
@@ -0,0 +1,321 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+/****************************************************************************
+
+  Assembles a single PDE into the stiffness matrix S and right hand side F
+
+      -(A_{i,j} u_,j)_i-(B_{i} u)_i+C_{j} u_,j-D u_m  and -(X_,i)_i + Y
+
+  in a 2D domain. The shape functions for test and solution must be identical
+  and row_NS == row_NN.
+
+  Shape of the coefficients:
+
+      A = 2 x 2
+      B = 2
+      C = 2
+      D = scalar
+      X = 2
+      Y = scalar
+
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+namespace dudley {
+
+template<typename Scalar>
+void Assemble_PDE_Single_2D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y)
+{
+    const int DIM = 2;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
+    }
+    const double* S = p.shapeFns;
+    const int len_EM_S = p.numShapes * p.numShapes;
+    const int len_EM_F = p.numShapes;
+
+#pragma omp parallel
+    {
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        std::vector<index_t> row_index(len_EM_F);
+
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
+#pragma omp for
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    const double vol = p.jac->absD[e] * p.jac->quadweight;
+                    const double* DSDX = &p.jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)];
+                    std::fill(EM_S.begin(), EM_S.end(), zero);
+                    std::fill(EM_F.begin(), EM_F.end(), zero);
+                    bool add_EM_F = false;
+                    bool add_EM_S = false;
+                    /////////////////
+                    //  process A  //
+                    /////////////////
+                    if (!A.isEmpty()) {
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedA) {
+                            const Scalar* A_q = &A_p[INDEX4(0, 0, 0, 0, DIM, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f += vol *
+                                            (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                             A_q[INDEX3(0, 0, q, DIM, DIM)] *
+                                             DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                             DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                             A_q[INDEX3(0, 1, q, DIM, DIM)] *
+                                             DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                             DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                             A_q[INDEX3(1, 0, q, DIM, DIM)] *
+                                             DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                             DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                             A_q[INDEX3(1, 1, q, DIM, DIM)] *
+                                             DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f00 = zero;
+                                    Scalar f01 = zero;
+                                    Scalar f10 = zero;
+                                    Scalar f11 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        const Scalar f1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f00 += f0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f01 += f0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f10 += f1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f11 += f1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f00 * A_p[INDEX2(0, 0, DIM)] + f01 * A_p[INDEX2(0, 1, DIM)] +
+                                        f10 * A_p[INDEX2(1, 0, DIM)] + f11 * A_p[INDEX2(1, 1, DIM)];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process B //
+                    ///////////////
+                    if (!B.isEmpty()) {
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedB) {
+                            const Scalar* B_q = &B_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = 0.;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f +=
+                                            vol * S[INDEX2(r, q, p.numShapes)] *
+                                            (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                             B_q[INDEX2(0, q, DIM)] +
+                                             DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] * B_q[INDEX2(1, q, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(r, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f0 * B_p[0] + f1 * B_p[1];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process C //
+                    ///////////////
+                    if (!C.isEmpty())
+                    {
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedC) {
+                            const Scalar* C_q = &C_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f += vol * S[INDEX2(s, q, p.numShapes)]*
+                                            (C_q[INDEX2(0, q, DIM)] *
+                                            DSDX[INDEX3(r, 0, q, p.numShapes, DIM)]
+                                            + C_q[INDEX2(1, q, DIM)] *
+                                            DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(s, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f0 * C_p[0] + f1 * C_p[1];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process D //
+                    ///////////////
+                    if (!D.isEmpty())
+                    {
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedD) {
+                            const Scalar* D_q = &D_p[INDEX2(0, 0, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f +=
+                                            vol * S[INDEX2(s, q, p.numShapes)] * D_q[q] *
+                                            S[INDEX2(r, q, p.numShapes)];
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f * D_p[0];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process X //
+                    ///////////////
+                    if (!X.isEmpty()) {
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedX) {
+                            const Scalar* X_q = &X_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f += vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                               X_q[INDEX2(0, q, DIM)] +
+                                               DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] * X_q[INDEX2(1, q, DIM)]);
+                                }
+                                EM_F[INDEX2(0, s, p.numEqu)] += f;
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f0 = zero;
+                                Scalar f1 = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                    f1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                }
+                                EM_F[INDEX2(0, s, p.numEqu)] += f0*X_p[0] + f1*X_p[1];
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process Y //
+                    ///////////////
+                    if (!Y.isEmpty()) {
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedY) {
+                            const Scalar* Y_q = &Y_p[INDEX2(0, 0, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[q];
+                                EM_F[INDEX2(0, s, p.numEqu)] += f;
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)];
+                                EM_F[INDEX2(0, s, p.numEqu)] += f * Y_p[0];
+                            }
+                        }
+                    }
+                    // add the element matrices onto the matrix and
+                    // right hand side
+                    for (int q = 0; q < p.numShapes; q++)
+                        row_index[q] = p.DOF[p.elements->Nodes[INDEX2(q, e, p.NN)]];
+                    if (add_EM_F)
+                        util::addScatter(p.numShapes, &row_index[0],
+                                    p.numEqu, &EM_F[0], F_p, p.DOF_UpperBound);
+                    if (add_EM_S)
+                        Assemble_addToSystemMatrix(p.S, row_index, p.numEqu,
+                                                   EM_S);
+                } // end color check
+            } // end element loop
+        } // end color loop
+    } // end parallel region
+}
+
+// instantiate our two supported versions
+template void Assemble_PDE_Single_2D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_Single_2D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_PDE_Single_3D.cpp b/dudley/src/Assemble_PDE_Single_3D.cpp
new file mode 100644
index 0000000..cd2cbe6
--- /dev/null
+++ b/dudley/src/Assemble_PDE_Single_3D.cpp
@@ -0,0 +1,365 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+/****************************************************************************
+
+  Assembles a single PDE into the stiffness matrix S right hand side F
+
+      -(A_{i,j} u_,j)_i-(B_{i} u)_i+C_{j} u_,j-D u_m  and -(X_,i)_i + Y
+
+  in a 3D domain. The shape functions for test and solution must be identical
+  and row_NS == row_NN.
+
+  Shape of the coefficients:
+
+      A = 3 x 3
+      B = 3
+      C = 3
+      D = scalar
+      X = 3
+      Y = scalar
+
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+namespace dudley {
+
+template<typename Scalar>
+void Assemble_PDE_Single_3D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y)
+{
+    const int DIM = 3;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
+    }
+    const double* S = p.shapeFns;
+    const int len_EM_S = p.numShapes * p.numShapes;
+    const int len_EM_F = p.numShapes;
+
+#pragma omp parallel
+    {
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        std::vector<index_t> row_index(len_EM_F);
+
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
+#pragma omp for
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    const double vol = p.jac->absD[e] * p.jac->quadweight;
+                    const double* DSDX = &p.jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)];
+                    std::fill(EM_S.begin(), EM_S.end(), zero);
+                    std::fill(EM_F.begin(), EM_F.end(), zero);
+                    bool add_EM_F = false;
+                    bool add_EM_S = false;
+
+                    ///////////////
+                    // process A //
+                    ///////////////
+                    if (!A.isEmpty()) {
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedA) {
+                            const Scalar* A_q = &A_p[INDEX4(0, 0, 0, 0, DIM, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f +=
+                                            vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(0, 0, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(0, 1, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(0, 2, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(1, 0, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(1, 1, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(1, 2, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(2, 0, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(2, 1, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                   DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                   A_q[INDEX3(2, 2, q, DIM, DIM)] *
+                                                   DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f00 = zero;
+                                    Scalar f01 = zero;
+                                    Scalar f02 = zero;
+                                    Scalar f10 = zero;
+                                    Scalar f11 = zero;
+                                    Scalar f12 = zero;
+                                    Scalar f20 = zero;
+                                    Scalar f21 = zero;
+                                    Scalar f22 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+
+                                        const Scalar f0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f00 += f0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f01 += f0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f02 += f0 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+
+                                        const Scalar f1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f10 += f1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f11 += f1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f12 += f1 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+
+                                        const Scalar f2 = vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                        f20 += f2 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f21 += f2 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f22 += f2 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f00 * A_p[INDEX2(0, 0, DIM)] + f01 * A_p[INDEX2(0, 1, DIM)] +
+                                        f02 * A_p[INDEX2(0, 2, DIM)] + f10 * A_p[INDEX2(1, 0, DIM)] +
+                                        f11 * A_p[INDEX2(1, 1, DIM)] + f12 * A_p[INDEX2(1, 2, DIM)] +
+                                        f20 * A_p[INDEX2(2, 0, DIM)] + f21 * A_p[INDEX2(2, 1, DIM)] +
+                                        f22 * A_p[INDEX2(2, 2, DIM)];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process B //
+                    ///////////////
+                    if (!B.isEmpty()) {
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedB) {
+                            const Scalar* B_q = &B_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f += vol * S[INDEX2(r, q, p.numShapes)] *
+                                            (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                             B_q[INDEX2(0, q, DIM)] +
+                                             DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                             B_q[INDEX2(1, q, DIM)] +
+                                             DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] * B_q[INDEX2(2, q, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(r, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f2 += f * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f0 * B_p[0] + f1 * B_p[1] + f2 * B_p[2];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process C //
+                    ///////////////
+                    if (!C.isEmpty()) {
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedC) {
+                            const Scalar* C_q = &C_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] *
+                                            (C_q[INDEX2(0, q, DIM)] *
+                                             DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                             C_q[INDEX2(1, q, DIM)] *
+                                             DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                             C_q[INDEX2(2, q, DIM)] * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(s, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f2 += f * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+                                    }
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                        f0 * C_p[0] + f1 * C_p[1] + f2 * C_p[2];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process D //
+                    ///////////////
+                    if (!D.isEmpty()) {
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedD) {
+                            const Scalar* D_q = &D_p[INDEX2(0, 0, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f +=
+                                            vol * S[INDEX2(s, q, p.numShapes)] * D_q[q] *
+                                            S[INDEX2(r, q, p.numShapes)];
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
+                                    EM_S[INDEX4(0, 0, s, r, p.numEqu, p.numEqu, p.numShapes)] += f * D_p[0];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process X //
+                    ///////////////
+                    if (!X.isEmpty()) {
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedX) {
+                            const Scalar* X_q = &X_p[INDEX3(0, 0, 0, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f +=
+                                        vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                               X_q[INDEX2(0, q, DIM)] +
+                                               DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                               X_q[INDEX2(1, q, DIM)] +
+                                               DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] * X_q[INDEX2(2, q, DIM)]);
+                                }
+                                EM_F[INDEX2(0, s, p.numEqu)] += f;
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f0 = zero;
+                                Scalar f1 = zero;
+                                Scalar f2 = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                    f1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                    f2 += vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                }
+                                EM_F[INDEX2(0, s, p.numEqu)] += f0 * X_p[0] + f1 * X_p[1] + f2 * X_p[2];
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process Y //
+                    ///////////////
+                    if (!Y.isEmpty()) {
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedY) {
+                            const Scalar* Y_q = &Y_p[INDEX2(0, 0, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[q];
+                                EM_F[INDEX2(0, s, p.numEqu)] += f;
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)];
+                                EM_F[INDEX2(0, s, p.numEqu)] += f * Y_p[0];
+                            }
+                        }
+                    }
+                    // add the element matrices onto the matrix and right
+                    // hand side
+                    for (int q = 0; q < p.numShapes; q++)
+                        row_index[q] = p.DOF[p.elements->Nodes[INDEX2(q, e, p.NN)]];
+
+                    if (add_EM_F)
+                        util::addScatter(p.numShapes, &row_index[0], p.numEqu,
+                                         &EM_F[0], F_p, p.DOF_UpperBound);
+                    if (add_EM_S)
+                        Assemble_addToSystemMatrix(p.S, row_index, p.numEqu,
+                                                   EM_S);
+
+                } // end color check
+            } // end element loop
+        } // end color loop
+    } // end parallel region
+}
+
+// instantiate our two supported versions
+template void Assemble_PDE_Single_3D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_Single_3D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_PDE_System2_1D.cpp b/dudley/src/Assemble_PDE_System2_1D.cpp
deleted file mode 100644
index c119fd2..0000000
--- a/dudley/src/Assemble_PDE_System2_1D.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m  and -(X_{k,i})_i + Y_k */
-
-/*    u has p.numComp components in a 1D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                                                  */
-
-/*    Shape of the coefficients: */
-
-/*      A = p.numEqu x 1 x p.numComp x 1 */
-/*      B = 1 x numEqu x p.numComp  */
-/*      C = p.numEqu x 1 x p.numComp  */
-/*      D = p.numEqu x p.numComp  */
-/*      X = p.numEqu x 1  */
-/*      Y = p.numEqu   */
-
-/************************************************************************************/
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_System2_1D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escriptDataC * F,
-				    escriptDataC * A, escriptDataC * B, escriptDataC * C, escriptDataC * D,
-				    escriptDataC * X, escriptDataC * Y)
-{
-
-#define DIM 1
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r, k, m;
-    register double rtmp;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    double *S = p.row_jac->BasisFunctions->S;
-    dim_t len_EM_S = p.row_numShapes * p.row_numShapes * p.numEqu * p.numComp;
-    dim_t len_EM_F = p.row_numShapes * p.numEqu;
-
-#pragma omp parallel private(color, EM_S, EM_F, Vol, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q,row_index, q, s,r,k,m,rtmp,add_EM_F, add_EM_S)
-    {
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.row_numShapes];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-
-			A_p = getSampleDataRO(A, e);
-			B_p = getSampleDataRO(B, e);
-			C_p = getSampleDataRO(C, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-			Vol = &(p.row_jac->volume[INDEX3(0, 0, e, p.numQuadTotal, 1)]);
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.row_numShapes, DIM, p.numQuadTotal, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-
-		      /************************************************************************************/
-			/*   process A: */
-		      /************************************************************************************/
-
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX6(0, 0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0.;
-						for (q = 0; q < p.numQuadTotal; q++)
-						{
-						    rtmp += vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)] *
-							A_q[INDEX5(k, 0, m, 0, q, p.numEqu, DIM, p.numComp, DIM)] *
-							DSDX[INDEX3(r, 0, q, p.row_numShapes, DIM)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapes, DIM)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] +=
-						    rtmp * A_p[INDEX4(k, 0, m, 0, p.numEqu, DIM, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process B: */
-		      /************************************************************************************/
-
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0.;
-						for (q = 0; q < p.numQuadTotal; q++)
-						{
-						    rtmp += vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)] *
-							B_q[INDEX4(k, 0, m, q, p.numEqu, DIM, p.numComp)] *
-							S[INDEX2(r, q, p.row_numShapes)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)] *
-						S[INDEX2(r, q, p.row_numShapes)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] +=
-						    rtmp * B_p[INDEX3(k, 0, m, p.numEqu, DIM)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process C: */
-		      /************************************************************************************/
-
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, p.numComp, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuadTotal; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(s, q, p.row_numShapes)] *
-							C_q[INDEX4(k, m, 0, q, p.numEqu, p.numComp, DIM)] *
-							DSDX[INDEX3(r, 0, q, p.row_numShapes, DIM)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] *
-						DSDX[INDEX3(r, 0, q, p.row_numShapes, DIM)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] +=
-						    rtmp * C_p[INDEX3(k, m, 0, p.numEqu, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /*********************************************************************************** */
-			/* process D */
-		      /************************************************************************************/
-
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX4(0, 0, 0, 0, p.numEqu, p.numComp, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuadTotal; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(s, q, p.row_numShapes)] *
-							D_q[INDEX3(k, m, q, p.numEqu, p.numComp)] *
-							S[INDEX2(r, q, p.row_numShapes)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] += rtmp;
-
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (r = 0; r < p.row_numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] *
-						S[INDEX2(r, q, p.row_numShapes)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.row_numShapes)] +=
-						    rtmp * D_p[INDEX2(k, m, p.numEqu)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process X: */
-		      /************************************************************************************/
-
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX4(0, 0, 0, 0, p.numEqu, DIM, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)] *
-						X_q[INDEX3(k, 0, q, p.numEqu, DIM)];
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp += vol * DSDX[INDEX3(s, 0, q, p.row_numShapes, DIM)];
-				    for (k = 0; k < p.numEqu; k++)
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp * X_p[INDEX2(k, 0, p.numEqu)];
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process Y: */
-		     /************************************************************************************/
-
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX3(0, 0, 0, p.numEqu, p.numQuadTotal)]);
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuadTotal; q++)
-					    rtmp +=
-						vol * S[INDEX2(s, q, p.row_numShapes)] * Y_q[INDEX2(k, q, p.numEqu)];
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.row_numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuadTotal; q++)
-					rtmp += vol * S[INDEX2(s, q, p.row_numShapes)];
-				    for (k = 0; k < p.numEqu; k++)
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp * Y_p[k];
-				}
-			    }
-			}
-		       /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		       /*********************************************************************************************************************/
-
-			for (q = 0; q < p.row_numShapes; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.row_numShapes, row_index, p.numEqu, EM_F, F_p,
-						   p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.row_numShapes, row_index, p.numEqu,
-							      p.row_numShapes, row_index, p.numComp, EM_S);
-
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;
-	    delete[] EM_F;
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
-
-/*
- * $Log$
- */
diff --git a/dudley/src/Assemble_PDE_System2_2D.cpp b/dudley/src/Assemble_PDE_System2_2D.cpp
deleted file mode 100644
index a089101..0000000
--- a/dudley/src/Assemble_PDE_System2_2D.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m  and -(X_{k,i})_i + Y_k */
-
-/*    u has p.numComp components in a 2D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                                                  */
-
-/*    Shape of the coefficients: */
-
-/*      A = p.numEqu x 2 x p.numComp x 2 */
-/*      B = 2 x p.numEqu x p.numComp  */
-/*      C = p.numEqu x 2 x p.numComp  */
-/*      D = p.numEqu x p.numComp  */
-/*      X = p.numEqu x 2  */
-/*      Y = p.numEqu   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_System2_2D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escript::Data* F,
-				    const escript::Data* A, const escript::Data* B, const escript::Data* C, const escript::Data* D,
-				    const escript::Data* X, const escript::Data* Y)
-{
-
-#define DIM 2
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r, k, m;
-    register double rtmp, rtmp0, rtmp1, rtmp00, rtmp10, rtmp01, rtmp11;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    const double *S = p.shapeFns;
-    dim_t len_EM_S = p.numShapes * p.numShapes * p.numEqu * p.numComp;
-    dim_t len_EM_F = p.numShapes * p.numEqu;
-
-#pragma omp parallel private(color,EM_S, EM_F, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q,row_index,q, s,r,k,m,rtmp, rtmp0, rtmp1, rtmp00, rtmp10, rtmp01, rtmp11,add_EM_F, add_EM_S)
-    {
-
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.numShapes];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-
-			A_p = getSampleDataRO(A, e);
-			B_p = getSampleDataRO(B, e);
-			C_p = getSampleDataRO(C, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-
-		      /************************************************************************************/
-			/*   process A: */
-		      /************************************************************************************/
-			A_p = getSampleDataRO(A, e);
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX6(0, 0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 0, m, 0, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 0, m, 1, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 1, m, 0, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 1, m, 1, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp00 = 0;
-					rtmp01 = 0;
-					rtmp10 = 0;
-					rtmp11 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp00 += rtmp0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp01 += rtmp0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp10 += rtmp1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp11 += rtmp1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp00 * A_p[INDEX4(k, 0, m, 0, p.numEqu, DIM, p.numComp)]
-						    + rtmp01 * A_p[INDEX4(k, 0, m, 1, p.numEqu, DIM, p.numComp)]
-						    + rtmp10 * A_p[INDEX4(k, 1, m, 0, p.numEqu, DIM, p.numComp)]
-						    + rtmp11 * A_p[INDEX4(k, 1, m, 1, p.numEqu, DIM, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process B: */
-		      /************************************************************************************/
-			B_p = getSampleDataRO(B, e);
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp += vol * S[INDEX2(r, q, p.numShapes)] *
-							(DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							 B_q[INDEX4(k, 0, m, q, p.numEqu, DIM, p.numComp)] +
-							 DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							 B_q[INDEX4(k, 1, m, q, p.numEqu, DIM, p.numComp)]);
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(r, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp0 * B_p[INDEX3(k, 0, m, p.numEqu, DIM)] +
-						    rtmp1 * B_p[INDEX3(k, 1, m, p.numEqu, DIM)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process C: */
-		      /************************************************************************************/
-			C_p = getSampleDataRO(C, e);
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, p.numComp, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp += vol * S[INDEX2(s, q, p.numShapes)] *
-							(C_q[INDEX4(k, m, 0, q, p.numEqu, p.numComp, DIM)] *
-							 DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							 C_q[INDEX4(k, m, 1, q, p.numEqu, p.numComp, DIM)] *
-							 DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(s, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp0 * C_p[INDEX3(k, m, 0, p.numEqu, p.numComp)] +
-						    rtmp1 * C_p[INDEX3(k, m, 1, p.numEqu, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /*********************************************************************************** */
-			/* process D */
-		      /************************************************************************************/
-			D_p = getSampleDataRO(D, e);
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX4(0, 0, 0, 0, p.numEqu, p.numComp, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(s, q, p.numShapes)] *
-							D_q[INDEX3(k, m, q, p.numEqu, p.numComp)] *
-							S[INDEX2(r, q, p.numShapes)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp * D_p[INDEX2(k, m, p.numEqu)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process X: */
-		      /************************************************************************************/
-			X_p = getSampleDataRO(X, e);
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX4(0, 0, 0, 0, p.numEqu, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       X_q[INDEX3(k, 0, q, p.numEqu, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       X_q[INDEX3(k, 1, q, p.numEqu, DIM)]);
-					}
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp0 = 0;
-				    rtmp1 = 0;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					rtmp1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-				    }
-				    for (k = 0; k < p.numEqu; k++)
-					EM_F[INDEX2(k, s, p.numEqu)] +=
-					    rtmp0 * X_p[INDEX2(k, 0, p.numEqu)] + rtmp1 * X_p[INDEX2(k, 1, p.numEqu)];
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process Y: */
-		     /************************************************************************************/
-			Y_p = getSampleDataRO(Y, e);
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX3(0, 0, 0, p.numEqu, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[INDEX2(k, q, p.numEqu)];
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-				    for (k = 0; k < p.numEqu; k++)
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp * Y_p[k];
-				}
-			    }
-			}
-		       /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		       /*********************************************************************************************************************/
-			for (q = 0; q < p.numShapes; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_F, F_p, p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.numShapes, row_index, p.numEqu,
-							      p.numShapes, row_index, p.numComp, EM_S);
-
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;
-	    delete[] EM_F;
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
-
-/*
- * $Log$
- */
diff --git a/dudley/src/Assemble_PDE_System2_3D.cpp b/dudley/src/Assemble_PDE_System2_3D.cpp
deleted file mode 100644
index abde0db..0000000
--- a/dudley/src/Assemble_PDE_System2_3D.cpp
+++ /dev/null
@@ -1,504 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    assembles the system of numEq PDEs into the stiffness matrix S right hand side F  */
-/*    the shape functions for test and solution must be identical */
-
-/*      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m  and -(X_{k,i})_i + Y_k */
-
-/*    u has p.numComp components in a 3D domain. The shape functions for test and solution must be identical  */
-/*    and row_NS == row_NN                                                                                  */
-
-/*    Shape of the coefficients: */
-
-/*      A = p.numEqu x 3 x p.numComp x 3 */
-/*      B = 3 x p.numEqu x p.numComp  */
-/*      C = p.numEqu x 3 x p.numComp  */
-/*      D = p.numEqu x p.numComp  */
-/*      X = p.numEqu x 3  */
-/*      Y = p.numEqu   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/************************************************************************************/
-
-void Dudley_Assemble_PDE_System2_3D(Dudley_Assemble_Parameters p, Dudley_ElementFile * elements,
-				    paso::SystemMatrix_ptr Mat, escript::Data* F,
-				    const escript::Data* A, const escript::Data* B, const escript::Data* C, const escript::Data* D,
-				    const escript::Data* X, const escript::Data* Y)
-{
-
-#define DIM 3
-    index_t color;
-    dim_t e;
-    __const double *A_p, *B_p, *C_p, *D_p, *X_p, *Y_p, *A_q, *B_q, *C_q, *D_q, *X_q, *Y_q;
-    double *EM_S, *EM_F, *DSDX;
-    index_t *row_index;
-    register dim_t q, s, r, k, m;
-    register double rtmp, rtmp0, rtmp1, rtmp2, rtmp00, rtmp01, rtmp02, rtmp10, rtmp11, rtmp12, rtmp20, rtmp21, rtmp22;
-    bool add_EM_F, add_EM_S;
-
-    bool extendedA = isExpanded(A);
-    bool extendedB = isExpanded(B);
-    bool extendedC = isExpanded(C);
-    bool extendedD = isExpanded(D);
-    bool extendedX = isExpanded(X);
-    bool extendedY = isExpanded(Y);
-    double *F_p = (requireWrite(F), getSampleDataRW(F, 0));	/* use comma, to get around the mixed code and declarations thing */
-    const double *S = p.shapeFns;
-    dim_t len_EM_S = p.numShapes * p.numShapes * p.numEqu * p.numComp;
-    dim_t len_EM_F = p.numShapes * p.numEqu;
-
-#pragma omp parallel private(color,EM_S, EM_F, DSDX, A_p, B_p, C_p, D_p, X_p, Y_p, A_q, B_q, C_q, D_q, X_q, Y_q,row_index,q, s,r,k,m,rtmp, rtmp0, rtmp1, rtmp2, rtmp00, rtmp01, rtmp02, rtmp10, rtmp11, rtmp12, rtmp20, rtmp21, rtmp22,add_EM_F, add_EM_S)
-    {
-	EM_S = new  double[len_EM_S];
-	EM_F = new  double[len_EM_F];
-	row_index = new  index_t[p.numShapes];
-
-	if (!Dudley_checkPtr(EM_S) && !Dudley_checkPtr(EM_F) && !Dudley_checkPtr(row_index))
-	{
-
-	    for (color = elements->minColor; color <= elements->maxColor; color++)
-	    {
-		/*  open loop over all elements: */
-#pragma omp for private(e) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    if (elements->Color[e] == color)
-		    {
-			double vol = p.row_jac->absD[e] * p.row_jac->quadweight;
-
-			A_p = getSampleDataRO(A, e);
-			B_p = getSampleDataRO(B, e);
-			C_p = getSampleDataRO(C, e);
-			D_p = getSampleDataRO(D, e);
-			X_p = getSampleDataRO(X, e);
-			Y_p = getSampleDataRO(Y, e);
-
-
-			DSDX = &(p.row_jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)]);
-			for (q = 0; q < len_EM_S; ++q)
-			    EM_S[q] = 0;
-			for (q = 0; q < len_EM_F; ++q)
-			    EM_F[q] = 0;
-			add_EM_F = FALSE;
-			add_EM_S = FALSE;
-
-		      /************************************************************************************/
-			/*   process A: */
-		      /************************************************************************************/
-			if (NULL != A_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedA)
-			    {
-				A_q = &(A_p[INDEX6(0, 0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 0, m, 0, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 0, m, 1, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 0, m, 2, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 1, m, 0, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 1, m, 1, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 1, m, 2, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 2, m, 0, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 2, m, 1, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-							       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-							       A_q[INDEX5(k, 2, m, 2, q, p.numEqu, DIM, p.numComp, DIM)]
-							       * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
-
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp00 = 0;
-					rtmp01 = 0;
-					rtmp02 = 0;
-					rtmp10 = 0;
-					rtmp11 = 0;
-					rtmp12 = 0;
-					rtmp20 = 0;
-					rtmp21 = 0;
-					rtmp22 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp00 += rtmp0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp01 += rtmp0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp02 += rtmp0 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-
-					    rtmp1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp10 += rtmp1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp11 += rtmp1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp12 += rtmp1 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-
-					    rtmp2 = vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-					    rtmp20 += rtmp2 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp21 += rtmp2 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp22 += rtmp2 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp00 * A_p[INDEX4(k, 0, m, 0, p.numEqu, DIM, p.numComp)] +
-						    rtmp01 * A_p[INDEX4(k, 0, m, 1, p.numEqu, DIM, p.numComp)] +
-						    rtmp02 * A_p[INDEX4(k, 0, m, 2, p.numEqu, DIM, p.numComp)] +
-						    rtmp10 * A_p[INDEX4(k, 1, m, 0, p.numEqu, DIM, p.numComp)] +
-						    rtmp11 * A_p[INDEX4(k, 1, m, 1, p.numEqu, DIM, p.numComp)] +
-						    rtmp12 * A_p[INDEX4(k, 1, m, 2, p.numEqu, DIM, p.numComp)] +
-						    rtmp20 * A_p[INDEX4(k, 2, m, 0, p.numEqu, DIM, p.numComp)] +
-						    rtmp21 * A_p[INDEX4(k, 2, m, 1, p.numEqu, DIM, p.numComp)] +
-						    rtmp22 * A_p[INDEX4(k, 2, m, 2, p.numEqu, DIM, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process B: */
-		      /************************************************************************************/
-			if (NULL != B_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedB)
-			    {
-				B_q = &(B_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, DIM, p.numComp, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(r, q, p.numShapes)] *
-							(DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-							 B_q[INDEX4(k, 0, m, q, p.numEqu, DIM, p.numComp)] +
-							 DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-							 B_q[INDEX4(k, 1, m, q, p.numEqu, DIM, p.numComp)] +
-							 DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-							 B_q[INDEX4(k, 2, m, q, p.numEqu, DIM, p.numComp)]);
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					rtmp2 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(r, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					    rtmp2 += rtmp * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp0 * B_p[INDEX3(k, 0, m, p.numEqu, DIM)] +
-						    rtmp1 * B_p[INDEX3(k, 1, m, p.numEqu, DIM)] +
-						    rtmp2 * B_p[INDEX3(k, 2, m, p.numEqu, DIM)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process C: */
-		      /************************************************************************************/
-			if (NULL != C_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedC)
-			    {
-				C_q = &(C_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, p.numComp, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(s, q, p.numShapes)] *
-							(C_q[INDEX4(k, m, 0, q, p.numEqu, p.numComp, DIM)] *
-							 DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
-							 C_q[INDEX4(k, m, 1, q, p.numEqu, p.numComp, DIM)] *
-							 DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
-							 C_q[INDEX4(k, m, 2, q, p.numEqu, p.numComp, DIM)] *
-							 DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp0 = 0;
-					rtmp1 = 0;
-					rtmp2 = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp = vol * S[INDEX2(s, q, p.numShapes)];
-					    rtmp0 += rtmp * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
-					    rtmp1 += rtmp * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
-					    rtmp2 += rtmp * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
-					}
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp0 * C_p[INDEX3(k, m, 0, p.numEqu, p.numComp)] +
-						    rtmp1 * C_p[INDEX3(k, m, 1, p.numEqu, p.numComp)] +
-						    rtmp2 * C_p[INDEX3(k, m, 2, p.numEqu, p.numComp)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /*********************************************************************************** */
-			/* process D */
-		      /************************************************************************************/
-			if (NULL != D_p)
-			{
-			    add_EM_S = TRUE;
-			    if (extendedD)
-			    {
-				D_q = &(D_p[INDEX4(0, 0, 0, 0, p.numEqu, p.numComp, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						rtmp = 0;
-						for (q = 0; q < p.numQuad; q++)
-						{
-						    rtmp +=
-							vol * S[INDEX2(s, q, p.numShapes)] *
-							D_q[INDEX3(k, m, q, p.numEqu, p.numComp)] *
-							S[INDEX2(r, q, p.numShapes)];
-						}
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] += rtmp;
-					    }
-					}
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (r = 0; r < p.numShapes; r++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
-					for (k = 0; k < p.numEqu; k++)
-					{
-					    for (m = 0; m < p.numComp; m++)
-					    {
-						EM_S[INDEX4(k, m, s, r, p.numEqu, p.numComp, p.numShapes)] +=
-						    rtmp * D_p[INDEX2(k, m, p.numEqu)];
-					    }
-					}
-				    }
-				}
-			    }
-			}
-		      /************************************************************************************/
-			/*   process X: */
-		      /************************************************************************************/
-			if (NULL != X_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedX)
-			    {
-				X_q = &(X_p[INDEX4(0, 0, 0, 0, p.numEqu, DIM, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0;
-					for (q = 0; q < p.numQuad; q++)
-					{
-					    rtmp +=
-						vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
-						       X_q[INDEX3(k, 0, q, p.numEqu, DIM)] +
-						       DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
-						       X_q[INDEX3(k, 1, q, p.numEqu, DIM)] +
-						       DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
-						       X_q[INDEX3(k, 2, q, p.numEqu, DIM)]);
-					}
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp0 = 0;
-				    rtmp1 = 0;
-				    rtmp2 = 0;
-				    for (q = 0; q < p.numQuad; q++)
-				    {
-					rtmp0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
-					rtmp1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
-					rtmp2 += vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
-				    }
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp0 * X_p[INDEX2(k, 0, p.numEqu)]
-					    + rtmp1 * X_p[INDEX2(k, 1, p.numEqu)] + rtmp2 * X_p[INDEX2(k, 2, p.numEqu)];
-				    }
-				}
-			    }
-			}
-		     /************************************************************************************/
-			/*   process Y: */
-		     /************************************************************************************/
-			if (NULL != Y_p)
-			{
-			    add_EM_F = TRUE;
-			    if (extendedY)
-			    {
-				Y_q = &(Y_p[INDEX3(0, 0, 0, p.numEqu, p.numQuad)]);
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    for (k = 0; k < p.numEqu; k++)
-				    {
-					rtmp = 0.;
-					for (q = 0; q < p.numQuad; q++)
-					    rtmp += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[INDEX2(k, q, p.numEqu)];
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (s = 0; s < p.numShapes; s++)
-				{
-				    rtmp = 0;
-				    for (q = 0; q < p.numQuad; q++)
-					rtmp += vol * S[INDEX2(s, q, p.numShapes)];
-				    for (k = 0; k < p.numEqu; k++)
-					EM_F[INDEX2(k, s, p.numEqu)] += rtmp * Y_p[k];
-				}
-			    }
-			}
-
-		       /*********************************************************************************************************************/
-			/* add the element matrices onto the matrix and right hand side                                */
-		       /*********************************************************************************************************************/
-			for (q = 0; q < p.numShapes; q++)
-			    row_index[q] = p.row_DOF[elements->Nodes[INDEX2(q, e, p.NN)]];
-
-			if (add_EM_F)
-			    Dudley_Util_AddScatter(p.numShapes, row_index, p.numEqu, EM_F, F_p, p.row_DOF_UpperBound);
-			if (add_EM_S)
-			    Dudley_Assemble_addToSystemMatrix(Mat, p.numShapes, row_index, p.numEqu,
-							      p.numShapes, row_index, p.numComp, EM_S);
-		    }		/* end color check */
-		}		/* end element loop */
-	    }			/* end color loop */
-
-	    delete[] EM_S;
-	    delete[] EM_F;
-	    delete[] row_index;
-
-	}			/* end of pointer check */
-    }				/* end parallel region */
-}
diff --git a/dudley/src/Assemble_PDE_System_2D.cpp b/dudley/src/Assemble_PDE_System_2D.cpp
new file mode 100644
index 0000000..d4557b2
--- /dev/null
+++ b/dudley/src/Assemble_PDE_System_2D.cpp
@@ -0,0 +1,374 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+/****************************************************************************
+
+  Assembles the system of numEqu PDEs into the stiffness matrix S and right
+  hand side F
+
+      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m
+  and
+      -(X_{k,i})_i + Y_k
+
+  u has p.numEqu components in a 2D domain. The shape functions for test and
+  solution must be identical and and row_NS == row_NN.
+
+  Shape of the coefficients:
+
+      A = p.numEqu x 2 x p.numEqu x 2
+      B = 2 x p.numEqu x p.numEqu
+      C = p.numEqu x 2 x p.numEqu
+      D = p.numEqu x p.numEqu
+      X = p.numEqu x 2
+      Y = p.numEqu
+
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+namespace dudley {
+
+template<typename Scalar>
+void Assemble_PDE_System_2D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y)
+{
+    const int DIM = 2;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
+    }
+    const double* S = p.shapeFns;
+    const size_t len_EM_S = p.numShapes * p.numShapes * p.numEqu * p.numEqu;
+    const size_t len_EM_F = p.numShapes * p.numEqu;
+
+#pragma omp parallel
+    {
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        std::vector<index_t> row_index(p.numShapes);
+
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
+#pragma omp for
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    const double vol = p.jac->absD[e] * p.jac->quadweight;
+                    const double* DSDX = &p.jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)];
+                    std::fill(EM_S.begin(), EM_S.end(), zero);
+                    std::fill(EM_F.begin(), EM_F.end(), zero);
+                    bool add_EM_F = false;
+                    bool add_EM_S = false;
+
+                    ///////////////
+                    // process A //
+                    ///////////////
+                    if (!A.isEmpty()) {
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedA) {
+                            const Scalar* A_q = &A_p[INDEX6(0, 0, 0, 0, 0, 0, p.numEqu, DIM, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 0, m, 0, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 0, m, 1, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 1, m, 0, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 1, m, 1, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f00 = zero;
+                                    Scalar f01 = zero;
+                                    Scalar f10 = zero;
+                                    Scalar f11 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        const Scalar f1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f00 += f0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f01 += f0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f10 += f1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f11 += f1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++)
+                                        {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f00 * A_p[INDEX4(k, 0, m, 0, p.numEqu, DIM, p.numEqu)]
+                                                + f01 * A_p[INDEX4(k, 0, m, 1, p.numEqu, DIM, p.numEqu)]
+                                                + f10 * A_p[INDEX4(k, 1, m, 0, p.numEqu, DIM, p.numEqu)]
+                                                + f11 * A_p[INDEX4(k, 1, m, 1, p.numEqu, DIM, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process B //
+                    ///////////////
+                    if (!B.isEmpty()) {
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedB) {
+                            const Scalar* B_q = &B_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, DIM, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f += vol * S[INDEX2(r, q, p.numShapes)] *
+                                                    (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                     B_q[INDEX4(k, 0, m, q, p.numEqu, DIM, p.numEqu)] +
+                                                     DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                     B_q[INDEX4(k, 1, m, q, p.numEqu, DIM, p.numEqu)]);
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(r, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f0 * B_p[INDEX3(k, 0, m, p.numEqu, DIM)] +
+                                                f1 * B_p[INDEX3(k, 1, m, p.numEqu, DIM)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process C //
+                    ///////////////
+                    if (!C.isEmpty()) {
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedC) {
+                            const Scalar* C_q = &C_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f += vol * S[INDEX2(s, q, p.numShapes)] *
+                                                    (C_q[INDEX4(k, m, 0, q, p.numEqu, p.numEqu, DIM)] *
+                                                     DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                     C_q[INDEX4(k, m, 1, q, p.numEqu, p.numEqu, DIM)] *
+                                                     DSDX[INDEX3(r, 1, q, p.numShapes, DIM)]);
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(s, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f0 * C_p[INDEX3(k, m, 0, p.numEqu, p.numEqu)] +
+                                                f1 * C_p[INDEX3(k, m, 1, p.numEqu, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process D //
+                    ///////////////
+                    if (!D.isEmpty()) {
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedD) {
+                            const Scalar* D_q = &D_p[INDEX4(0, 0, 0, 0, p.numEqu, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * S[INDEX2(s, q, p.numShapes)] *
+                                                    D_q[INDEX3(k, m, q, p.numEqu, p.numEqu)] *
+                                                    S[INDEX2(r, q, p.numShapes)];
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f * D_p[INDEX2(k, m, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process X //
+                    ///////////////
+                    if (!X.isEmpty()) {
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedX) {
+                            const Scalar* X_q = &X_p[INDEX4(0, 0, 0, 0, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int k = 0; k < p.numEqu; k++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f +=
+                                            vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                   X_q[INDEX3(k, 0, q, p.numEqu, DIM)] +
+                                                   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                   X_q[INDEX3(k, 1, q, p.numEqu, DIM)]);
+                                    }
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f0 = zero;
+                                Scalar f1 = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                    f1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                }
+                                for (int k = 0; k < p.numEqu; k++)
+                                    EM_F[INDEX2(k, s, p.numEqu)] +=
+                                        f0 * X_p[INDEX2(k, 0, p.numEqu)] + f1 * X_p[INDEX2(k, 1, p.numEqu)];
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process Y //
+                    ///////////////
+                    if (!Y.isEmpty()) {
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedY) {
+                            const Scalar* Y_q = &Y_p[INDEX3(0, 0, 0, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int k = 0; k < p.numEqu; k++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[INDEX2(k, q, p.numEqu)];
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)];
+                                for (int k = 0; k < p.numEqu; k++)
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f * Y_p[k];
+                            }
+                        }
+                    }
+                    // add the element matrices onto the matrix and right
+                    // hand side
+                    for (int q = 0; q < p.numShapes; q++)
+                        row_index[q] = p.DOF[p.elements->Nodes[INDEX2(q, e, p.NN)]];
+
+                    if (add_EM_F)
+                        util::addScatter(p.numShapes, &row_index[0], p.numEqu,
+                                         &EM_F[0], F_p, p.DOF_UpperBound);
+                    if (add_EM_S)
+                        Assemble_addToSystemMatrix(p.S, row_index, p.numEqu,
+                                                   EM_S);
+
+                } // end color check
+            } // end element loop
+        } // end color loop
+    } // end parallel region
+}
+
+// instantiate our two supported versions
+template void Assemble_PDE_System_2D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_System_2D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_PDE_System_3D.cpp b/dudley/src/Assemble_PDE_System_3D.cpp
new file mode 100644
index 0000000..275ab60
--- /dev/null
+++ b/dudley/src/Assemble_PDE_System_3D.cpp
@@ -0,0 +1,427 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+/****************************************************************************
+
+  Assembles the system of numEqu PDEs into the stiffness matrix S and right
+  hand side F
+
+      -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m
+  and
+      -(X_{k,i})_i + Y_k
+
+  u has p.numEqu components in a 3D domain. The shape functions for test and
+  solution must be identical and row_NS == row_NN
+
+  Shape of the coefficients:
+
+      A = p.numEqu x 3 x p.numEqu x 3
+      B = 3 x p.numEqu x p.numEqu
+      C = p.numEqu x 3 x p.numEqu
+      D = p.numEqu x p.numEqu
+      X = p.numEqu x 3
+      Y = p.numEqu
+
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+namespace dudley {
+
+template<typename Scalar>
+void Assemble_PDE_System_3D(const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y)
+{
+    const int DIM = 3;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
+    }
+    const double* S = p.shapeFns;
+    const size_t len_EM_S = p.numShapes * p.numShapes * p.numEqu * p.numEqu;
+    const size_t len_EM_F = p.numShapes * p.numEqu;
+
+#pragma omp parallel
+    {
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        std::vector<index_t> row_index(p.numShapes);
+
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
+#pragma omp for
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    const double vol = p.jac->absD[e] * p.jac->quadweight;
+                    const double* DSDX = &p.jac->DSDX[INDEX5(0, 0, 0, 0, e, p.numShapes, DIM, p.numQuad, 1)];
+                    std::fill(EM_S.begin(), EM_S.end(), zero);
+                    std::fill(EM_F.begin(), EM_F.end(), zero);
+                    bool add_EM_F = false;
+                    bool add_EM_S = false;
+
+                    ///////////////
+                    // process A //
+                    ///////////////
+                    if (!A.isEmpty()) {
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedA) {
+                            const Scalar* A_q = &A_p[INDEX6(0, 0, 0, 0, 0, 0, p.numEqu, DIM, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 0, m, 0, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 0, m, 1, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 0, m, 2, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 1, m, 0, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 1, m, 1, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 1, m, 2, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 2, m, 0, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 2, m, 1, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                           DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                           A_q[INDEX5(k, 2, m, 2, q, p.numEqu, DIM, p.numEqu, DIM)]
+                                                           * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
+
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f00 = zero;
+                                    Scalar f01 = zero;
+                                    Scalar f02 = zero;
+                                    Scalar f10 = zero;
+                                    Scalar f11 = zero;
+                                    Scalar f12 = zero;
+                                    Scalar f20 = zero;
+                                    Scalar f21 = zero;
+                                    Scalar f22 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f0 = vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f00 += f0 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f01 += f0 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f02 += f0 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+
+                                        const Scalar f1 = vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f10 += f1 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f11 += f1 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f12 += f1 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+
+                                        const Scalar f2 = vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                        f20 += f2 * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f21 += f2 * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f22 += f2 * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f00 * A_p[INDEX4(k, 0, m, 0, p.numEqu, DIM, p.numEqu)] +
+                                                f01 * A_p[INDEX4(k, 0, m, 1, p.numEqu, DIM, p.numEqu)] +
+                                                f02 * A_p[INDEX4(k, 0, m, 2, p.numEqu, DIM, p.numEqu)] +
+                                                f10 * A_p[INDEX4(k, 1, m, 0, p.numEqu, DIM, p.numEqu)] +
+                                                f11 * A_p[INDEX4(k, 1, m, 1, p.numEqu, DIM, p.numEqu)] +
+                                                f12 * A_p[INDEX4(k, 1, m, 2, p.numEqu, DIM, p.numEqu)] +
+                                                f20 * A_p[INDEX4(k, 2, m, 0, p.numEqu, DIM, p.numEqu)] +
+                                                f21 * A_p[INDEX4(k, 2, m, 1, p.numEqu, DIM, p.numEqu)] +
+                                                f22 * A_p[INDEX4(k, 2, m, 2, p.numEqu, DIM, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process B //
+                    ///////////////
+                    if (!B.isEmpty()) {
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedB) {
+                            const Scalar* B_q = &B_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, DIM, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * S[INDEX2(r, q, p.numShapes)] *
+                                                    (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                     B_q[INDEX4(k, 0, m, q, p.numEqu, DIM, p.numEqu)] +
+                                                     DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                     B_q[INDEX4(k, 1, m, q, p.numEqu, DIM, p.numEqu)] +
+                                                     DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                     B_q[INDEX4(k, 2, m, q, p.numEqu, DIM, p.numEqu)]);
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(r, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                        f2 += f * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f0 * B_p[INDEX3(k, 0, m, p.numEqu, DIM)] +
+                                                f1 * B_p[INDEX3(k, 1, m, p.numEqu, DIM)] +
+                                                f2 * B_p[INDEX3(k, 2, m, p.numEqu, DIM)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process C //
+                    ///////////////
+                    if (!C.isEmpty()) {
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedC) {
+                            const Scalar* C_q = &C_p[INDEX5(0, 0, 0, 0, 0, p.numEqu, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * S[INDEX2(s, q, p.numShapes)] *
+                                                    (C_q[INDEX4(k, m, 0, q, p.numEqu, p.numEqu, DIM)] *
+                                                     DSDX[INDEX3(r, 0, q, p.numShapes, DIM)] +
+                                                     C_q[INDEX4(k, m, 1, q, p.numEqu, p.numEqu, DIM)] *
+                                                     DSDX[INDEX3(r, 1, q, p.numShapes, DIM)] +
+                                                     C_q[INDEX4(k, m, 2, q, p.numEqu, p.numEqu, DIM)] *
+                                                     DSDX[INDEX3(r, 2, q, p.numShapes, DIM)]);
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        const Scalar f = vol * S[INDEX2(s, q, p.numShapes)];
+                                        f0 += f * DSDX[INDEX3(r, 0, q, p.numShapes, DIM)];
+                                        f1 += f * DSDX[INDEX3(r, 1, q, p.numShapes, DIM)];
+                                        f2 += f * DSDX[INDEX3(r, 2, q, p.numShapes, DIM)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++)
+                                        {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f0 * C_p[INDEX3(k, m, 0, p.numEqu, p.numEqu)] +
+                                                f1 * C_p[INDEX3(k, m, 1, p.numEqu, p.numEqu)] +
+                                                f2 * C_p[INDEX3(k, m, 2, p.numEqu, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process D //
+                    ///////////////
+                    if (!D.isEmpty()) {
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
+                        add_EM_S = true;
+                        if (expandedD) {
+                            const Scalar* D_q = &D_p[INDEX4(0, 0, 0, 0, p.numEqu, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            Scalar f = zero;
+                                            for (int q = 0; q < p.numQuad; q++) {
+                                                f +=
+                                                    vol * S[INDEX2(s, q, p.numShapes)] *
+                                                    D_q[INDEX3(k, m, q, p.numEqu, p.numEqu)] *
+                                                    S[INDEX2(r, q, p.numShapes)];
+                                            }
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] += f;
+                                        }
+                                    }
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int r = 0; r < p.numShapes; r++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * S[INDEX2(r, q, p.numShapes)];
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        for (int m = 0; m < p.numEqu; m++) {
+                                            EM_S[INDEX4(k, m, s, r, p.numEqu, p.numEqu, p.numShapes)] +=
+                                                f * D_p[INDEX2(k, m, p.numEqu)];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process X //
+                    ///////////////
+                    if (!X.isEmpty()) {
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedX) {
+                            const Scalar* X_q = &X_p[INDEX4(0, 0, 0, 0, p.numEqu, DIM, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int k = 0; k < p.numEqu; k++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++) {
+                                        f +=
+                                            vol * (DSDX[INDEX3(s, 0, q, p.numShapes, DIM)] *
+                                                   X_q[INDEX3(k, 0, q, p.numEqu, DIM)] +
+                                                   DSDX[INDEX3(s, 1, q, p.numShapes, DIM)] *
+                                                   X_q[INDEX3(k, 1, q, p.numEqu, DIM)] +
+                                                   DSDX[INDEX3(s, 2, q, p.numShapes, DIM)] *
+                                                   X_q[INDEX3(k, 2, q, p.numEqu, DIM)]);
+                                    }
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f0 = zero;
+                                Scalar f1 = zero;
+                                Scalar f2 = zero;
+                                for (int q = 0; q < p.numQuad; q++) {
+                                    f0 += vol * DSDX[INDEX3(s, 0, q, p.numShapes, DIM)];
+                                    f1 += vol * DSDX[INDEX3(s, 1, q, p.numShapes, DIM)];
+                                    f2 += vol * DSDX[INDEX3(s, 2, q, p.numShapes, DIM)];
+                                }
+                                for (int k = 0; k < p.numEqu; k++) {
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f0 * X_p[INDEX2(k, 0, p.numEqu)]
+                                        + f1 * X_p[INDEX2(k, 1, p.numEqu)] + f2 * X_p[INDEX2(k, 2, p.numEqu)];
+                                }
+                            }
+                        }
+                    }
+                    ///////////////
+                    // process Y //
+                    ///////////////
+                    if (!Y.isEmpty()) {
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                        add_EM_F = true;
+                        if (expandedY) {
+                            const Scalar* Y_q = &Y_p[INDEX3(0, 0, 0, p.numEqu, p.numQuad)];
+                            for (int s = 0; s < p.numShapes; s++) {
+                                for (int k = 0; k < p.numEqu; k++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuad; q++)
+                                        f += vol * S[INDEX2(s, q, p.numShapes)] * Y_q[INDEX2(k, q, p.numEqu)];
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f;
+                                }
+                            }
+                        } else {
+                            for (int s = 0; s < p.numShapes; s++) {
+                                Scalar f = zero;
+                                for (int q = 0; q < p.numQuad; q++)
+                                    f += vol * S[INDEX2(s, q, p.numShapes)];
+                                for (int k = 0; k < p.numEqu; k++)
+                                    EM_F[INDEX2(k, s, p.numEqu)] += f * Y_p[k];
+                            }
+                        }
+                    }
+
+                    // add the element matrices onto the matrix and right
+                    // hand side
+                    for (int q = 0; q < p.numShapes; q++)
+                        row_index[q] = p.DOF[p.elements->Nodes[INDEX2(q, e, p.NN)]];
+
+                    if (add_EM_F)
+                        util::addScatter(p.numShapes, &row_index[0], p.numEqu,
+                                         &EM_F[0], F_p, p.DOF_UpperBound);
+                    if (add_EM_S)
+                        Assemble_addToSystemMatrix(p.S, row_index, p.numEqu,
+                                                   EM_S);
+                } // end color check
+            } // end element loop
+        } // end color loop
+    } // end parallel region
+}
+
+// instantiate our two supported versions
+template void Assemble_PDE_System_3D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_System_3D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_addToSystemMatrix.cpp b/dudley/src/Assemble_addToSystemMatrix.cpp
index 51b4a8c..713727b 100644
--- a/dudley/src/Assemble_addToSystemMatrix.cpp
+++ b/dudley/src/Assemble_addToSystemMatrix.cpp
@@ -14,296 +14,287 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "Assemble.h"
 
-/* Dudley: SystemMatrix and SystemVector */
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
 
-/*  adds the matrix array[Equa,Sol,NN,NN] onto the matrix in. */
-/* the rows/columns are given by */
-/*  i_Equa+Equa*Nodes_Equa[Nodes[j_Equa]] (i_Equa=0:Equa; j_Equa=0:NN_Equa). */
-/*  the routine has to be called from a parallel region                        */
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/TrilinosMatrixAdapter.h>
 
-/*  This routine assumes that in->Equa=in->Sol=1, i.e. */
-/*  array is fully packed. */
-/* TODO: the case in->Equa!=1  */
+using esys_trilinos::TrilinosMatrixAdapter;
+#endif
 
-/************************************************************************************/
+namespace dudley {
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
 
-#include "Assemble.h"
+#ifdef ESYS_HAVE_PASO
+static void addToSystemMatrixPasoCSC(paso::SystemMatrix* S,
+                                     const std::vector<index_t>& Nodes,
+                                     int numEq,
+                                     const std::vector<double>& array);
 
-/************************************************************************************/
+static void addToSystemMatrixPasoCSR(paso::SystemMatrix* S,
+                                     const std::vector<index_t>& Nodes,
+                                     int numEq,
+                                     const std::vector<double>& array);
+#endif
 
-void Dudley_Assemble_addToSystemMatrix(paso::SystemMatrix_ptr in, const dim_t NN_Equa, const index_t * Nodes_Equa, const dim_t num_Equa,
-				       const dim_t NN_Sol, const index_t * Nodes_Sol, const dim_t num_Sol, const double *array)
+template<>
+void Assemble_addToSystemMatrix<real_t>(escript::AbstractSystemMatrix* S,
+                                        const std::vector<index_t>& Nodes,
+                                        int numEq,
+                                        const std::vector<real_t>& array)
 {
-    index_t index_offset = (in->type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
-    dim_t k_Equa, j_Equa, j_Sol, k_Sol, i_Equa, i_Sol, l_col, l_row, ic, ir, k, i_row, i_col;
-    index_t *mainBlock_ptr, *mainBlock_index, *col_coupleBlock_ptr, *col_coupleBlock_index, *row_coupleBlock_ptr,
-	*row_coupleBlock_index;
-    double *mainBlock_val, *row_coupleBlock_val, *col_coupleBlock_val;
-    dim_t row_block_size = in->row_block_size;
-    dim_t col_block_size = in->col_block_size;
-    dim_t block_size = in->block_size;
-    dim_t num_subblocks_Equa = num_Equa / row_block_size;
-    dim_t num_subblocks_Sol = num_Sol / col_block_size;
-    dim_t numMyCols = in->pattern->mainPattern->numInput;
-    dim_t numMyRows = in->pattern->mainPattern->numOutput;
-
-    if (in->type & MATRIX_FORMAT_CSC)
-    {
-	/* MATRIX_FORMAT_CSC does not support MPI !!!!! */
-	mainBlock_ptr = in->mainBlock->pattern->ptr;
-	mainBlock_index = in->mainBlock->pattern->index;
-	mainBlock_val = in->mainBlock->val;
-	col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
-	col_coupleBlock_index = in->col_coupleBlock->pattern->index;
-	col_coupleBlock_val = in->col_coupleBlock->val;
-	row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
-	row_coupleBlock_index = in->row_coupleBlock->pattern->index;
-	row_coupleBlock_val = in->row_coupleBlock->val;
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrix* pmat = dynamic_cast<paso::SystemMatrix*>(S);
+    if (pmat) {
+        // call the right function depending on storage type
+        if (pmat->type & MATRIX_FORMAT_CSC) {
+            addToSystemMatrixPasoCSC(pmat, Nodes, numEq, array);
+        } else { // type == CSR
+            addToSystemMatrixPasoCSR(pmat, Nodes, numEq, array);
+        }
+        return;
+    }
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tmat = dynamic_cast<TrilinosMatrixAdapter*>(S);
+    if (tmat) {
+        tmat->add(Nodes, array);
+        return;
+    }
+#endif
+    throw DudleyException("Assemble_addToSystemMatrix: unsupported system "
+                          "matrix type.");
+}
 
-	for (k_Sol = 0; k_Sol < NN_Sol; ++k_Sol)
-	{			/* Down columns of array */
-	    j_Sol = Nodes_Sol[k_Sol];
-	    for (l_col = 0; l_col < num_subblocks_Sol; ++l_col)
-	    {
-		i_col = j_Sol * num_subblocks_Sol + l_col;
-		if (i_col < numMyCols)
-		{
-		    for (k_Equa = 0; k_Equa < NN_Equa; ++k_Equa)
-		    {		/* Across cols of array */
-			j_Equa = Nodes_Equa[k_Equa];
-			for (l_row = 0; l_row < num_subblocks_Equa; ++l_row)
-			{
-			    i_row = j_Equa * num_subblocks_Equa + index_offset + l_row;
-			    if (i_row < numMyRows + index_offset)
-			    {
-				for (k = mainBlock_ptr[i_col] - index_offset;
-				     k < mainBlock_ptr[i_col + 1] - index_offset; ++k)
-				{
-				    if (mainBlock_index[k] == i_row)
-				    {
-					/* Entry array(k_Equa, j_Sol) is a block (col_block_size x col_block_size) */
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						mainBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (k = col_coupleBlock_ptr[i_col] - index_offset;
-				     k < col_coupleBlock_ptr[i_col + 1] - index_offset; ++k)
-				{
-				    if (row_coupleBlock_index[k] == i_row - numMyRows)
-				    {
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						row_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];;
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			}
-		    }
-		}
-		else
-		{
-		    for (k_Equa = 0; k_Equa < NN_Equa; ++k_Equa)
-		    {		/* Across rows of array */
-			j_Equa = Nodes_Equa[k_Equa];
-			for (l_row = 0; l_row < num_subblocks_Equa; ++l_row)
-			{
-			    i_row = j_Equa * num_subblocks_Equa + index_offset + l_row;
-			    if (i_row < numMyRows + index_offset)
-			    {
-				for (k = col_coupleBlock_ptr[i_col - numMyCols] - index_offset;
-				     k < col_coupleBlock_ptr[i_col - numMyCols + 1] - index_offset; ++k)
-				{
-				    if (col_coupleBlock_index[k] == i_row)
-				    {
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						col_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			}
-		    }
-		}
-	    }
-	}
+template<>
+void Assemble_addToSystemMatrix<cplx_t>(escript::AbstractSystemMatrix* S,
+                                        const std::vector<index_t>& Nodes,
+                                        int numEq,
+                                        const std::vector<cplx_t>& array)
+{
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tmat = dynamic_cast<TrilinosMatrixAdapter*>(S);
+    if (tmat) {
+        tmat->add(Nodes, array);
+        return;
     }
-    else if (in->type & MATRIX_FORMAT_TRILINOS_CRS)
-    {
-	/* this needs to be modified */
-#ifdef TRILINOS
-	for (k_Equa = 0; k_Equa < NN_Equa; ++k_Equa)
-	{			/* Down columns of array */
-	    j_Equa = Nodes_Equa[k_Equa];
-	    if (j_Equa < in->mainBlock->pattern->output_node_distribution->numLocal)
-	    {
-		for (k_Sol = 0; k_Sol < NN_Sol; ++k_Sol)
-		{		/* Across rows of array */
-		    j_Sol = Nodes_Sol[k_Sol];
-		    for (l_row = 0; l_row < num_subblocks_Equa; ++l_row)
-		    {
-			irow = j_Equa * row_block_size + l_row;
-			for (l_col = 0; l_col < col_block_size; ++l_col)
-			{
-			    icol = j_Sol * col_block_size + index_offset + l_col;
-			    /* irow is local and icol is global */
-			    Trilinos_SumIntoMyValues(in->trilinos_data, irow, icol,
-						     array[INDEX4
-							   (l_row, l_col, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)]);
-			}
-		    }
-		}
-	    }
-	}
 #endif
+    throw DudleyException("addToSystemMatrix: only Trilinos matrices support "
+                          "complex-valued assembly!");
+}
+
+#ifdef ESYS_HAVE_PASO
+void addToSystemMatrixPasoCSC(paso::SystemMatrix* in,
+                              const std::vector<index_t>& Nodes,
+                              int numEq, const std::vector<double>& array)
+{
+    const int index_offset = (in->type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
+    const int row_block_size = in->row_block_size;
+    const int col_block_size = in->col_block_size;
+    const int block_size = in->block_size;
+    const int num_subblocks_Eq = numEq / row_block_size;
+    const int num_subblocks_Sol = numEq / col_block_size;
+    const dim_t numMyCols = in->pattern->mainPattern->numInput;
+    const dim_t numMyRows = in->pattern->mainPattern->numOutput;
+    const int NN = Nodes.size();
+
+    const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
+    const index_t* mainBlock_index = in->mainBlock->pattern->index;
+    double* mainBlock_val = in->mainBlock->val;
+    const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
+    const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
+    double* col_coupleBlock_val = in->col_coupleBlock->val;
+    const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
+    double* row_coupleBlock_val = in->row_coupleBlock->val;
+
+    for (int k_Sol = 0; k_Sol < NN; ++k_Sol) { // Down columns of array
+        const index_t j_Sol = Nodes[k_Sol];
+        for (int l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
+            const index_t i_col = j_Sol * num_subblocks_Sol + l_col;
+            if (i_col < numMyCols) {
+                for (int k_Eq = 0; k_Eq < NN; ++k_Eq) {
+                    // Across cols of array
+                    const index_t j_Eq = Nodes[k_Eq];
+                    for (int l_row = 0; l_row < num_subblocks_Eq; ++l_row) {
+                        const index_t i_row = j_Eq * num_subblocks_Eq + index_offset + l_row;
+                        if (i_row < numMyRows + index_offset) {
+                            for (index_t k = mainBlock_ptr[i_col]-index_offset;
+                                 k < mainBlock_ptr[i_col + 1]-index_offset; ++k) {
+                                if (mainBlock_index[k] == i_row) {
+                                    // Entry array(k_Eq, j_Sol) is a block
+                                    // (col_block_size x col_block_size)
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            mainBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                        }
+                                    }
+                                    break;
+                                }
+                            }
+                        } else {
+                            for (index_t k = col_coupleBlock_ptr[i_col]-index_offset;
+                                 k < col_coupleBlock_ptr[i_col + 1]-index_offset; ++k) {
+                                if (row_coupleBlock_index[k] == i_row - numMyRows) {
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            row_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                        }
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            } else { // i_col >= numMyCols
+                for (int k_Eq = 0; k_Eq < NN; ++k_Eq) {
+                    // Across rows of array
+                    const index_t j_Eq = Nodes[k_Eq];
+                    for (int l_row = 0; l_row < num_subblocks_Eq; ++l_row) {
+                        const index_t i_row = j_Eq * num_subblocks_Eq + index_offset + l_row;
+                        if (i_row < numMyRows + index_offset) {
+                            for (index_t k = col_coupleBlock_ptr[i_col-numMyCols]-index_offset;
+                                 k < col_coupleBlock_ptr[i_col - numMyCols + 1] - index_offset; ++k) {
+                                if (col_coupleBlock_index[k] == i_row) {
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            col_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                        }
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
     }
-    else
-    {
-	mainBlock_ptr = in->mainBlock->pattern->ptr;
-	mainBlock_index = in->mainBlock->pattern->index;
-	mainBlock_val = in->mainBlock->val;
-	col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
-	col_coupleBlock_index = in->col_coupleBlock->pattern->index;
-	col_coupleBlock_val = in->col_coupleBlock->val;
-	row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
-	row_coupleBlock_index = in->row_coupleBlock->pattern->index;
-	row_coupleBlock_val = in->row_coupleBlock->val;
+}
 
-	for (k_Equa = 0; k_Equa < NN_Equa; ++k_Equa)
-	{			/* Down columns of array */
-	    j_Equa = Nodes_Equa[k_Equa];
-	    for (l_row = 0; l_row < num_subblocks_Equa; ++l_row)
-	    {
-		i_row = j_Equa * num_subblocks_Equa + l_row;
-		/* only look at the matrix rows stored on this processor */
-		if (i_row < numMyRows)
-		{
-		    for (k_Sol = 0; k_Sol < NN_Sol; ++k_Sol)
-		    {		/* Across rows of array */
-			j_Sol = Nodes_Sol[k_Sol];
-			for (l_col = 0; l_col < num_subblocks_Sol; ++l_col)
-			{
-			    /* only look at the matrix rows stored on this processor */
-			    i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
-			    if (i_col < numMyCols + index_offset)
-			    {
-				for (k = mainBlock_ptr[i_row] - index_offset;
-				     k < mainBlock_ptr[i_row + 1] - index_offset; ++k)
-				{
-				    if (mainBlock_index[k] == i_col)
-				    {
-					/* Entry array(k_Sol, j_Equa) is a block (row_block_size x col_block_size) */
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						mainBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			    else
-			    {
-				for (k = col_coupleBlock_ptr[i_row] - index_offset;
-				     k < col_coupleBlock_ptr[i_row + 1] - index_offset; ++k)
-				{
-				    if (col_coupleBlock_index[k] == i_col - numMyCols)
-				    {
-					/* Entry array(k_Sol, j_Equa) is a block (row_block_size x col_block_size) */
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						col_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			}
-		    }
-		}
-		else
-		{
-		    for (k_Sol = 0; k_Sol < NN_Sol; ++k_Sol)
-		    {		/* Across rows of array */
-			j_Sol = Nodes_Sol[k_Sol];
-			for (l_col = 0; l_col < num_subblocks_Sol; ++l_col)
-			{
-			    i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
-			    if (i_col < numMyCols + index_offset)
-			    {
-				for (k = row_coupleBlock_ptr[i_row - numMyRows] - index_offset;
-				     k < row_coupleBlock_ptr[i_row - numMyRows + 1] - index_offset; ++k)
-				{
-				    if (row_coupleBlock_index[k] == i_col)
-				    {
-					/* Entry array(k_Sol, j_Equa) is a block (row_block_size x col_block_size) */
-					for (ic = 0; ic < col_block_size; ++ic)
-					{
-					    i_Sol = ic + col_block_size * l_col;
-					    for (ir = 0; ir < row_block_size; ++ir)
-					    {
-						i_Equa = ir + row_block_size * l_row;
-						row_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
-						    array[INDEX4
-							  (i_Equa, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
-					    }
-					}
-					break;
-				    }
-				}
-			    }
-			}
-		    }
-		}
-	    }
-	}
+void addToSystemMatrixPasoCSR(paso::SystemMatrix* in,
+                              const std::vector<index_t>& Nodes,
+                              int numEq, const std::vector<double>& array)
+{
+    const int index_offset = (in->type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
+    const int row_block_size = in->row_block_size;
+    const int col_block_size = in->col_block_size;
+    const int block_size = in->block_size;
+    const int num_subblocks_Eq = numEq / row_block_size;
+    const int num_subblocks_Sol = numEq / col_block_size;
+    const dim_t numMyCols = in->pattern->mainPattern->numInput;
+    const dim_t numMyRows = in->pattern->mainPattern->numOutput;
+    const int NN = Nodes.size();
+
+    const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
+    const index_t* mainBlock_index = in->mainBlock->pattern->index;
+    double* mainBlock_val = in->mainBlock->val;
+    const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
+    const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
+    double* col_coupleBlock_val = in->col_coupleBlock->val;
+    const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
+    const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
+    double* row_coupleBlock_val = in->row_coupleBlock->val;
+
+    for (int k_Eq = 0; k_Eq < NN; ++k_Eq) { // Down columns of array
+        const index_t j_Eq = Nodes[k_Eq];
+        for (int l_row = 0; l_row < num_subblocks_Eq; ++l_row) {
+        const index_t i_row = j_Eq * num_subblocks_Eq + l_row;
+        // only look at the matrix rows stored on this processor
+        if (i_row < numMyRows) {
+            for (int k_Sol = 0; k_Sol < NN; ++k_Sol) { // Across rows of array
+            const index_t j_Sol = Nodes[k_Sol];
+            for (int l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
+                // only look at the matrix rows stored on this processor
+                const index_t i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
+                if (i_col < numMyCols + index_offset) {
+                    for (index_t k = mainBlock_ptr[i_row] - index_offset;
+                         k < mainBlock_ptr[i_row + 1] - index_offset; ++k) {
+                        if (mainBlock_index[k] == i_col) {
+                            // Entry array(k_Sol, j_Eq) is a block
+                            // (row_block_size x col_block_size)
+                            for (int ic = 0; ic < col_block_size; ++ic) {
+                                const int i_Sol = ic + col_block_size * l_col;
+                                for (int ir = 0; ir < row_block_size; ++ir) {
+                                    const int i_Eq = ir + row_block_size * l_row;
+                                    mainBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                        array[INDEX4
+                                          (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                }
+                            }
+                            break;
+                        }
+                    }
+                } else {
+                    for (index_t k = col_coupleBlock_ptr[i_row] - index_offset;
+                         k < col_coupleBlock_ptr[i_row + 1] - index_offset; ++k) {
+                        if (col_coupleBlock_index[k] == i_col - numMyCols) {
+                            // Entry array(k_Sol, j_Eq) is a block
+                            // (row_block_size x col_block_size)
+                            for (int ic = 0; ic < col_block_size; ++ic) {
+                                const int i_Sol = ic + col_block_size * l_col;
+                                for (int ir = 0; ir < row_block_size; ++ir) {
+                                    const int i_Eq = ir+row_block_size*l_row;
+                                    col_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                        array[INDEX4
+                                          (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                    }
+                                }
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        } else {
+            for (int k_Sol = 0; k_Sol < NN; ++k_Sol) { // Across rows of array
+                const index_t j_Sol = Nodes[k_Sol];
+                for (int l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
+                    const index_t i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
+                    if (i_col < numMyCols + index_offset) {
+                        for (index_t k = row_coupleBlock_ptr[i_row - numMyRows] - index_offset;
+                             k < row_coupleBlock_ptr[i_row - numMyRows + 1] - index_offset; ++k) {
+                            if (row_coupleBlock_index[k] == i_col) {
+                                // Entry array(k_Sol, j_Eq) is a block
+                                // (row_block_size x col_block_size)
+                                for (int ic = 0; ic < col_block_size; ++ic) {
+                                    const int i_Sol = ic + col_block_size * l_col;
+                                    for (int ir = 0; ir < row_block_size; ++ir) {
+                                        const int i_Eq = ir + row_block_size * l_row;
+                                        row_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                            array[INDEX4
+                                              (i_Eq, i_Sol, k_Eq, k_Sol, numEq, numEq, NN)];
+                                        }
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
     }
 }
+#endif // ESYS_HAVE_PASO
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_getAssembleParameters.cpp b/dudley/src/Assemble_getAssembleParameters.cpp
index 1c8117e..f226a4c 100644
--- a/dudley/src/Assemble_getAssembleParameters.cpp
+++ b/dudley/src/Assemble_getAssembleParameters.cpp
@@ -14,177 +14,120 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/****************************************************************************
 
-/*    assemblage routines: prepares the assemble parameter set */
+  Assemblage routines: prepares the assemble parameter set
 
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+*****************************************************************************/
 
 #include "Assemble.h"
 #include "ShapeTable.h"
 
-/************************************************************************************/
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
+
+using escript::ValueError;
+
+namespace dudley {
 
-void Dudley_Assemble_getAssembleParameters(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, paso::SystemMatrix_ptr S,
-				    const escript::Data* F, bool reducedIntegrationOrder, Dudley_Assemble_Parameters * parm)
+AssembleParameters::AssembleParameters(const NodeFile* nodes,
+                                       const ElementFile* ef,
+                                       escript::ASM_ptr sm,
+                                       escript::Data& rhs,
+                                       bool reducedIntegrationOrder) :
+    elements(ef),
+    S(sm.get()),
+    F(rhs),
+    shapeFns(NULL)
 {
-    Dudley_resetError();
-    parm->shapeFns = NULL;
-    if (!isEmpty(F) && !isExpanded(F))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getAssembleParameters: Right hand side is not expanded.");
-	return;
+    if (!rhs.isEmpty() && !rhs.actsExpanded()) {
+        throw ValueError("AssembleParameters: Right hand side is not expanded.");
     }
 
-    if (!getQuadShape(elements->numDim, reducedIntegrationOrder, &(parm->shapeFns)))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getAssembleParameters: Can not locate shape functions.");
+    if (!getQuadShape(elements->numDim, reducedIntegrationOrder, &shapeFns)) {
+        throw DudleyException("AssembleParameters: Cannot locate shape functions.");
     }
-    /*  check the dimensions of S and F */
-    if (S != NULL && !isEmpty(F))
-    {
-	if (!numSamplesEqual
-	    (F, 1,
-	     (S->row_distribution->getMyNumComponents() * S->row_block_size) /
-	     S->logical_row_block_size))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_getAssembleParameters: number of rows of matrix and length of right hand side don't match.");
-	    return;
-	}
-    }
-    /* get the number of equations and components */
-    if (S == NULL)
-    {
-	if (isEmpty(F))
-	{
-	    parm->numEqu = 1;
-	    parm->numComp = 1;
-	}
-	else
-	{
-	    parm->numEqu = getDataPointSize(F);
-	    parm->numComp = parm->numEqu;
-	}
-    }
-    else
-    {
-	if (isEmpty(F))
-	{
-	    parm->numEqu = S->logical_row_block_size;
-	    parm->numComp = S->logical_col_block_size;
-	}
-	else
-	{
-	    if (getDataPointSize(F) != S->logical_row_block_size)
-	    {
-		Dudley_setError(TYPE_ERROR,
-				"Dudley_Assemble_getAssembleParameters: matrix row block size and number of components of right hand side don't match.");
-		return;
-	    }
-	    parm->numEqu = S->logical_row_block_size;
-	    parm->numComp = S->logical_col_block_size;
-	}
+
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrix* pasoMat = sm ?
+        dynamic_cast<paso::SystemMatrix*>(sm.get()) : NULL;
+
+    // check the dimensions of matrix and rhs
+    if (pasoMat != NULL && !rhs.isEmpty()) {
+        const dim_t numRows = pasoMat->row_distribution->getMyNumComponents()*pasoMat->row_block_size;
+        if (!rhs.numSamplesEqual(1, numRows/pasoMat->logical_row_block_size)) {
+            throw ValueError("AssembleParameters: number of rows of matrix "
+                             "and length of right hand side don't match.");
+        }
     }
-    parm->col_DOF = nodes->degreesOfFreedomMapping->target;
-    parm->row_DOF = nodes->degreesOfFreedomMapping->target;
-    /* get the information for the labeling of the degrees of freedom from matrix */
-    if (S != NULL)
-    {
-	/* Make sure # rows in matrix == num DOF for one of: full or reduced (use numLocalDOF for MPI) */
-	if (S->row_distribution->getMyNumComponents() * S->row_block_size ==
-	    parm->numEqu * nodes->degreesOfFreedomDistribution->getMyNumComponents())
-	{
-	    parm->row_DOF_UpperBound = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-	    parm->row_DOF = nodes->degreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else if (S->row_distribution->getMyNumComponents() * S->row_block_size ==
-		 parm->numEqu * nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents())
-	{
-	    parm->row_DOF_UpperBound = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-	    parm->row_DOF = nodes->reducedDegreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_getAssembleParameters: number of rows in matrix does not match the number of degrees of freedom in mesh");
-	}
-	/* Make sure # cols in matrix == num DOF for one of: full or reduced (use numLocalDOF for MPI) */
-	if (S->col_distribution->getMyNumComponents() * S->col_block_size ==
-	    parm->numComp * nodes->degreesOfFreedomDistribution->getMyNumComponents())
-	{
-	    parm->col_DOF_UpperBound = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-	    parm->col_DOF = nodes->degreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else if (S->col_distribution->getMyNumComponents() * S->col_block_size ==
-		 parm->numComp * nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents())
-	{
-	    parm->col_DOF_UpperBound = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-	    parm->col_DOF = nodes->reducedDegreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_getAssembleParameters: number of columns in matrix does not match the number of degrees of freedom in mesh");
-	}
+#endif
+
+    // get the number of equations and components
+    if (sm == NULL) {
+        if (rhs.isEmpty()) {
+            numEqu = 1;
+        } else {
+            numEqu = rhs.getDataPointSize();
+        }
+    } else {
+        if (!rhs.isEmpty() && rhs.getDataPointSize() != sm->getRowBlockSize()) {
+            throw ValueError("AssembleParameters: matrix row block size and "
+                      "number of components of right hand side don't match.");
+        }
+        if (sm->getRowBlockSize() != sm->getColumnBlockSize())
+            throw DudleyException("Dudley requires number of equations == number of solutions.");
+        numEqu = sm->getRowBlockSize();
     }
-    if (!Dudley_noError())
-	return;
-    /* get the information from right hand side */
-    if (!isEmpty(F))
-    {
-	if (numSamplesEqual(F, 1, nodes->degreesOfFreedomDistribution->getMyNumComponents()))
-	{
-	    parm->row_DOF_UpperBound = nodes->degreesOfFreedomDistribution->getMyNumComponents();
-	    parm->row_DOF = nodes->degreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else if (numSamplesEqual
-		 (F, 1, nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents()))
-	{
-	    parm->row_DOF_UpperBound = nodes->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-	    parm->row_DOF = nodes->reducedDegreesOfFreedomMapping->target;
-	    parm->row_jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-	}
-	else
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_getAssembleParameters: length of RHS vector does not match the number of degrees of freedom in mesh");
-	}
-	if (S == NULL)
-	{
-	    parm->col_DOF_UpperBound = parm->row_DOF_UpperBound;
-	    parm->col_DOF = parm->row_DOF;
-	    parm->row_jac = parm->row_jac;
-	}
+    DOF = nodes->borrowTargetDegreesOfFreedom();
+    DOF_UpperBound = nodes->getNumDegreesOfFreedom();
+
+#ifdef ESYS_HAVE_PASO
+    // get the information for the labeling of the degrees of freedom from
+    // the matrix
+    if (pasoMat) {
+        // Make sure # rows in matrix == num local DOF
+        const index_t numRows = pasoMat->row_distribution->getMyNumComponents()*pasoMat->row_block_size;
+        const index_t numCols = pasoMat->col_distribution->getMyNumComponents()*pasoMat->col_block_size;
+
+        if (numRows != numEqu * nodes->getNumDegreesOfFreedom()) {
+            throw DudleyException("AssembleParameters: number of rows in "
+                                  "matrix does not match the number of "
+                                  "degrees of freedom in mesh");
+        }
+        // Make sure # cols in matrix == num local DOF
+        if (numCols != numRows) {
+            throw DudleyException("AssembleParameters: number of columns in "
+                                  "matrix does not match the number of "
+                                  "degrees of freedom in mesh");
+        }
     }
+#endif
 
-    if (parm->row_jac->numDim != parm->row_jac->numDim)
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_getAssembleParameters: spacial dimension for row and column shape function must match.");
+    // get the information from right hand side
+    if (!rhs.isEmpty() &&
+            !rhs.numSamplesEqual(1, nodes->getNumDegreesOfFreedom())) {
+        throw DudleyException("AssembleParameters: length of RHS vector does "
+                              "not match the number of degrees of freedom "
+                              "in mesh");
     }
 
-    if (elements->numNodes < parm->row_jac->numShapes)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getAssembleParameters: too many nodes are expected by row.");
+    jac = elements->borrowJacobians(nodes, reducedIntegrationOrder);
+
+    if (elements->numNodes < jac->numShapes) {
+        throw DudleyException("AssembleParameters: too many nodes are "
+                              "expected by row.");
     }
-    if (parm->row_jac->numElements != elements->numElements)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getAssembleParameters: number of elements for row is wrong.");
+    if (jac->numElements != elements->numElements) {
+        throw DudleyException("AssembleParameters: number of elements for "
+                              "row is wrong.");
     }
 
-    parm->numQuad = parm->row_jac->numQuad;
-    parm->NN = elements->numNodes;
-    parm->numElements = elements->numElements;
-    parm->numDim = parm->row_jac->numDim;
-    parm->numShapes = parm->row_jac->numShapes;
-
+    NN = elements->numNodes;
+    numQuad = jac->numQuad;
+    numDim = jac->numDim;
+    numShapes = jac->numShapes;
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_getNormal.cpp b/dudley/src/Assemble_getNormal.cpp
new file mode 100644
index 0000000..140c86a
--- /dev/null
+++ b/dudley/src/Assemble_getNormal.cpp
@@ -0,0 +1,83 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "ShapeTable.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+namespace dudley {
+
+void Assemble_getNormal(const NodeFile* nodes, const ElementFile* elements,
+                        escript::Data& normal)
+{
+    if (!nodes || !elements)
+        return;
+
+    const int NN = elements->numNodes;
+    const int numDim = nodes->numDim;
+    const int numQuad = (hasReducedIntegrationOrder(normal) ? 1 : NN);
+    const int numDim_local = elements->numLocalDim;
+    const int NS = elements->numDim + 1;
+
+    const double *dSdv = NULL;
+    switch (elements->numDim) {
+        case 2:
+            dSdv = &DTDV_2D[0][0];
+        break;
+        case 3:
+            dSdv = &DTDV_3D[0][0];
+        break;
+        default:
+            dSdv = &DTDV_1D[0][0];
+        break;
+    }
+
+    // check the dimensions of normal
+    if (!(numDim == numDim_local || numDim - 1 == numDim_local)) {
+        throw DudleyException("Assemble_setNormal: Cannot calculate normal vector");
+    } else if (!normal.isDataPointShapeEqual(1, &numDim)) {
+        throw DudleyException("Assemble_setNormal: illegal point data shape of normal Data object");
+    } else if (!normal.numSamplesEqual(numQuad, elements->numElements)) {
+        throw DudleyException("Assemble_setNormal: illegal number of samples of normal Data object");
+    } else if (!normal.actsExpanded()) {
+        throw DudleyException("Assemble_setNormal: expanded Data object is expected for normal.");
+    }
+
+    normal.requireWrite();
+#pragma omp parallel
+    {
+        std::vector<double> local_X(NS * numDim);
+        std::vector<double> dVdv(numQuad * numDim * numDim_local);
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            // gather local coordinates of nodes into local_X
+            util::gather(NS, &elements->Nodes[INDEX2(0, e, NN)], numDim,
+                         nodes->Coordinates, &local_X[0]);
+
+            // calculate dVdv(i,j,q)=local_X(i,n)*DSDv(n,j,q)
+            util::smallMatMult(numDim, numDim_local * numQuad,
+                                     &dVdv[0], NS, &local_X[0], dSdv);
+            // get normalized vector
+            double* normal_array = normal.getSampleDataRW(e);
+            util::normalVector(numQuad, numDim, numDim_local, &dVdv[0], normal_array);
+        }
+    }
+}
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_getSize.cpp b/dudley/src/Assemble_getSize.cpp
index 94eca49..8c3efe2 100644
--- a/dudley/src/Assemble_getSize.cpp
+++ b/dudley/src/Assemble_getSize.cpp
@@ -14,114 +14,69 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: */
-
-/*    calculates the minimum distance between two vertices of elements and assigns the value to each  */
-/*    quadrature point in element_size                                                                         */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-/************************************************************************************/
-void Dudley_Assemble_getSize(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, escript::Data* element_size)
-{
+#include <escript/index.h>
 
-    double *local_X = NULL, *element_size_array;
-    dim_t e, n0, n1, q, i, NVertices, NN, NS, numQuad, numDim;
-    double d, diff, max_diff;
-    Dudley_resetError();
+namespace dudley {
 
-    if (nodes == NULL || elements == NULL)
-    {
-	return;
-    }
-
-    numDim = nodes->numDim;
-
-    /* now we look up what type of elements we need based on the function space of element_size */
-    /* if it is DUDLEY_REDUCED_ELEMENTS or DUDLEY_REDUCED_FACE_ELEMENTS then we have single quad point */
-
-    if (Dudley_Assemble_reducedIntegrationOrder(element_size))
-    {
-	numQuad = 1;
-    }
-    else
-    {
-	numQuad = elements->numDim + 1;
+void Assemble_getSize(const NodeFile* nodes, const ElementFile* elements,
+                      escript::Data& out)
+{
+    if (!nodes || !elements)
+        return;
+
+    const int numDim = nodes->numDim;
+
+    // now we look up what type of elements we need based on the function space
+    // of out. If it is DUDLEY_REDUCED_ELEMENTS or
+    // DUDLEY_REDUCED_FACE_ELEMENTS then we have single quad point
+    int numQuad = (hasReducedIntegrationOrder(out) ? 1 : elements->numNodes);
+    const int NN = elements->numNodes;
+    const int NS = elements->numDim + 1;
+    const int NVertices = elements->numDim + 1;
+
+    // check the dimensions of out
+    if (!out.numSamplesEqual(numQuad, elements->numElements)) {
+        throw DudleyException("Assemble_getSize: illegal number of samples of element size Data object");
+    } else if (!out.isDataPointShapeEqual(0, &numDim)) {
+        throw DudleyException("Assemble_getSize: illegal data point shape of element size Data object");
+    } else if (!out.actsExpanded()) {
+        throw DudleyException("Assemble_getSize: expanded Data object is expected for element size.");
     }
 
-    NN = elements->numNodes;
-    NS = elements->numDim + 1;
-    NVertices = elements->numDim + 1;
-
-    /* check the dimensions of element_size */
-
-    if (!numSamplesEqual(element_size, numQuad, elements->numElements))
+    // now we can start
+    out.requireWrite();
+#pragma omp parallel
     {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getSize: illegal number of samples of element size Data object");
-    }
-    else if (!isDataPointShapeEqual(element_size, 0, &(numDim)))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getSize: illegal data point shape of element size Data object");
-    }
-    else if (!isExpanded(element_size))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_getSize: expanded Data object is expected for element size.");
-    }
-    /* now we can start: */
+        std::vector<double> local_X(NN * numDim);
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            // gather local coordinates of nodes into local_X(numDim,NN)
+            util::gather(NS, &elements->Nodes[INDEX2(0, e, NN)], numDim,
+                         nodes->Coordinates, &local_X[0]);
+            // calculate minimal differences
+            double max_diff = 0;
+            for (int n0 = 0; n0 < NVertices; n0++) {
+                for (int n1 = n0 + 1; n1 < NVertices; n1++) {
+                    double diff = 0;
+                    for (int i = 0; i < numDim; i++) {
+                        const double d = local_X[INDEX2(i, n0, numDim)] - local_X[INDEX2(i, n1, numDim)];
+                        diff += d * d;
+                    }
+
+                    max_diff = std::max(max_diff, diff);
+                }
+            }
+            max_diff = sqrt(max_diff);
+            // set all values to max_diff
+            double* out_array = out.getSampleDataRW(e);
+            for (int q = 0; q < numQuad; q++)
+                out_array[q] = max_diff;
+        }
+    } // end of parallel region
+}
 
-    if (Dudley_noError())
-    {
-	requireWrite(element_size);
-#pragma omp parallel private(local_X)
-	{
-	    /* allocation of work arrays */
-	    local_X = new double[NN * numDim];
-	    if (!Dudley_checkPtr(local_X))
-	    {
-		/* open the element loop */
-#pragma omp for private(e,max_diff,diff,n0,n1,d,q,i,element_size_array) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    /* gather local coordinates of nodes into local_X(numDim,NN): */
-		    Dudley_Util_Gather_double(NS, &(elements->Nodes[INDEX2(0, e, NN)]), numDim, nodes->Coordinates,
-					      local_X);
-		    /* calculate minimal differences */
-		    max_diff = 0;
-		    for (n0 = 0; n0 < NVertices; n0++)
-		    {
-			for (n1 = n0 + 1; n1 < NVertices; n1++)
-			{
-			    diff = 0;
-			    for (i = 0; i < numDim; i++)
-			    {
-				d = local_X[INDEX2(i, n0, numDim)] - local_X[INDEX2(i, n1, numDim)];
-				diff += d * d;
-			    }
+} // namespace dudley
 
-    			    max_diff = MAX(max_diff, diff);
-			    
-			}
-		    }
-		    max_diff = sqrt(max_diff);
-		    /* set all values to max_diff */
-		    element_size_array = getSampleDataRW(element_size, e);
-		    for (q = 0; q < numQuad; q++)
-			element_size_array[q] = max_diff;
-		}
-	    }
-	    delete[] local_X;
-	}			/* end of parallel region */
-    }
-    return;
-}
diff --git a/dudley/src/Assemble_gradient.cpp b/dudley/src/Assemble_gradient.cpp
index aff3a52..b98cb8b 100644
--- a/dudley/src/Assemble_gradient.cpp
+++ b/dudley/src/Assemble_gradient.cpp
@@ -14,484 +14,202 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage of Jacobians: calculate the gradient of nodal data at quadrature points */
-
-/************************************************************************************/
-
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-/***************************************************************************************/
 
-/* Unless the loops in here get complicated again this file should be compiled for loop unrolling */
+#include <escript/index.h>
+
+// Unless the loops in here get complicated again this file should be compiled
+// with loop unrolling
+
+namespace dudley {
 
-void Dudley_Assemble_gradient(Dudley_NodeFile * nodes, Dudley_ElementFile * elements,
-			      escript::Data* grad_data, const escript::Data* data)
+void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
+                       escript::Data& grad_data, const escript::Data& data)
 {
-    size_t localGradSize = 0;
-    register dim_t e, q, l, s, n;
-    register __const double *data_array;
-    register double *grad_data_e;
-    dim_t numNodes = 0, numShapesTotal = 0, numComps, NN = 0, numDim = 0, numQuad = 0;
-    type_t data_type = getFunctionSpaceType(data);
-    bool reducedIntegrationOrder = FALSE;
-    Dudley_ElementFile_Jacobeans *jac = NULL;
+    if (!nodes || !elements)
+        return;
 
-    Dudley_resetError();
-    if (nodes == NULL || elements == NULL)
-	return;
-    numComps = getDataPointSize(data);
-    NN = elements->numNodes;
-    reducedIntegrationOrder = Dudley_Assemble_reducedIntegrationOrder(grad_data);
+    const int numComps = data.getDataPointSize();
+    const int NN = elements->numNodes;
+    const bool reducedIntegrationOrder = hasReducedIntegrationOrder(grad_data);
+    const int data_type = data.getFunctionSpace().getTypeCode();
 
-    if (data_type == DUDLEY_NODES)
-    {
-	numNodes = nodes->nodesMapping->numTargets;
-    }
-    else if (data_type == DUDLEY_REDUCED_NODES)
-    {
-	numNodes = nodes->reducedNodesMapping->numTargets;
-    }
-    else if (data_type == DUDLEY_DEGREES_OF_FREEDOM)
-    {
-	if (elements->MPIInfo->size > 1)
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_gradient: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
-	    return;
-	}
-	numNodes = nodes->degreesOfFreedomMapping->numTargets;
-    }
-    else if (data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-    {
-	if (elements->MPIInfo->size > 1)
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_gradient: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
-	    return;
-	}
-	numNodes = nodes->reducedDegreesOfFreedomMapping->numTargets;
-    }
-    else
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_gradient: Cannot calculate gradient of data because of unsuitable input data representation.");
+    dim_t numNodes = 0;
+    if (data_type == DUDLEY_NODES) {
+        numNodes = nodes->getNumNodes();
+    } else if (data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+        if (elements->MPIInfo->size > 1) {
+            throw DudleyException("Assemble_gradient: for more than one "
+                "processor DEGREES_OF_FREEDOM data are not accepted as input.");
+        }
+        numNodes = nodes->getNumDegreesOfFreedom();
+    } else {
+        throw DudleyException("Assemble_gradient: Cannot calculate gradient "
+               "of data because of unsuitable input data representation.");
     }
 
-    jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, reducedIntegrationOrder);
-    if (Dudley_noError())
-    {
-	numDim = jac->numDim;
-	numShapesTotal = jac->numShapes;
-	numQuad = jac->numQuad;
-	localGradSize = sizeof(double) * numDim * numQuad * numComps;
-	/* check the dimensions of data */
+    ElementFile_Jacobians* jac = elements->borrowJacobians(nodes,
+                                                     reducedIntegrationOrder);
+    const int numDim = jac->numDim;
+    const int numShapesTotal = jac->numShapes;
+    const int numQuad = jac->numQuad;
+    const size_t localGradSize = sizeof(double) * numDim * numQuad * numComps;
 
-	if (!numSamplesEqual(grad_data, numQuad, elements->numElements))
-	{
-	    Dudley_setError(TYPE_ERROR, "Dudley_Assemble_gradient: illegal number of samples in gradient Data object");
-	}
-	else if (!numSamplesEqual(data, 1, numNodes))
-	{
-	    Dudley_setError(TYPE_ERROR, "Dudley_Assemble_gradient: illegal number of samples of input Data object");
-	}
-	else if (numDim * numComps != getDataPointSize(grad_data))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_gradient: illegal number of components in gradient data object.");
-	}
-	else if (!isExpanded(grad_data))
-	{
-	    Dudley_setError(TYPE_ERROR, "Dudley_Assemble_gradient: expanded Data object is expected for output data.");
-	}
+    // check the dimensions of data
+    if (!grad_data.numSamplesEqual(numQuad, elements->numElements)) {
+        throw DudleyException("Assemble_gradient: illegal number of samples in gradient Data object");
+    } else if (!data.numSamplesEqual(1, numNodes)) {
+        throw DudleyException("Assemble_gradient: illegal number of samples of input Data object");
+    } else if (numDim * numComps != grad_data.getDataPointSize()) {
+        throw DudleyException("Assemble_gradient: illegal number of components in gradient data object.");
+    } else if (!grad_data.actsExpanded()) {
+        throw DudleyException("Assemble_gradient: expanded Data object is expected for output data.");
     }
-    /* now we can start */
 
-    if (Dudley_noError())
+    grad_data.requireWrite();
+#pragma omp parallel
     {
-	requireWrite(grad_data);
-#pragma omp parallel private(e,q,l,s,n,data_array,grad_data_e)
-	{
-	    if (data_type == DUDLEY_NODES)
-	    {
-		if (numDim == 1)
-		{
-		    const dim_t numShapes = 2;
-#define DIM 1
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, n);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 2)
-		{
-		    const dim_t numShapes = 3;
-#define DIM 2
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, n);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 3)
-		{
-		    const dim_t numShapes = 4;
-#define DIM 3
-#pragma omp for private(e,grad_data_e,s,n,data_array,q,l) schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, n);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 2, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-	    }
-	    else if (data_type == DUDLEY_REDUCED_NODES)
-	    {
-		if (numDim == 1)
-		{
-		    const dim_t numShapes = 2;
-#define DIM 1
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedNodesMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+        if (data_type == DUDLEY_NODES) {
+            if (numDim == 1) {
+                const int numShapes = 2;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(n);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 2)
-		{
-		    const dim_t numShapes = 3;
-#define DIM 2
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedNodesMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            } else if (numDim == 2) {
+                const int numShapes = 3;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(n);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 3)
-		{
-		    const dim_t numShapes = 4;
-#define DIM 3
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedNodesMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            } else if (numDim == 3) {
+                const int numShapes = 4;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(n);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 2, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-	    }
-	    else if (data_type == DUDLEY_DEGREES_OF_FREEDOM)
-	    {
-		if (numDim == 1)
-		{
-		    const dim_t numShapes = 2;
-#define DIM 1
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->degreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 2, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            }
+        } else if (data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+            const index_t* target = nodes->borrowTargetDegreesOfFreedom();
+            if (numDim == 1) {
+                const int numShapes = 2;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(target[n]);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 2)
-		{
-		    const dim_t numShapes = 3;
-#define DIM 2
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->degreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            } else if (numDim == 2) {
+                const int numShapes = 3;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(target[n]);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 3)
-		{
-		    const dim_t numShapes = 4;
-#define DIM 3
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->degreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            } else if (numDim == 3) {
+                const int numShapes = 4;
+#pragma omp for
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int s = 0; s < numShapes; s++) {
+                        const index_t n = elements->Nodes[INDEX2(s, e, NN)];
+                        const double* data_array = data.getSampleDataRO(target[n]);
+                        for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 2, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-	    }
-	    else if (data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-	    {
-		if (numDim == 1)
-		{
-		    const dim_t numShapes = 2;
-#define DIM 1
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedDegreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 2)
-		{
-		    const dim_t numShapes = 3;
-#define DIM 2
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedDegreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-		else if (numDim == 3)
-		{
-		    const dim_t numShapes = 4;
-#define DIM 3
-#pragma omp for schedule(static)
-		    for (e = 0; e < elements->numElements; e++)
-		    {
-			grad_data_e = getSampleDataRW(grad_data, e);
-			memset(grad_data_e, 0, localGradSize);
-			for (s = 0; s < numShapes; s++)
-			{
-			    n = elements->Nodes[INDEX2(s, e, NN)];
-			    data_array = getSampleDataRO(data, nodes->reducedDegreesOfFreedomMapping->target[n]);
-			    for (q = 0; q < numQuad; q++)
-			    {
-#pragma ivdep
-				for (l = 0; l < numComps; l++)
-				{
-				    grad_data_e[INDEX4(l, 0, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 1, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				    grad_data_e[INDEX4(l, 2, q, 0, numComps, DIM, numQuad)] +=
-					data_array[l] *
-					jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, DIM, numQuad, 1)];
-				}
-			    }
-			}
-		    }
-#undef DIM
-		}
-	    }
-	}			/* end parallel region */
-    }
+                            for (int l = 0; l < numComps; l++) {
+                                grad_data_e[INDEX4(l, 0, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 0, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 1, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 1, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                                grad_data_e[INDEX4(l, 2, q, 0, numComps, numDim, numQuad)] +=
+                                    data_array[l] *
+                                    jac->DSDX[INDEX5(s, 2, q, 0, e, numShapesTotal, numDim, numQuad, 1)];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    } // end parallel region
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_integrate.cpp b/dudley/src/Assemble_integrate.cpp
index e8a31dd..14c46e7 100644
--- a/dudley/src/Assemble_integrate.cpp
+++ b/dudley/src/Assemble_integrate.cpp
@@ -14,108 +14,70 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*    assemblage routines: integrates data on quadrature points   */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-/************************************************************************************/
+#include <escript/index.h>
+
+namespace dudley {
 
-void Dudley_Assemble_integrate(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, const escript::Data* data, double *out)
+void Assemble_integrate(const NodeFile* nodes, const ElementFile* elements,
+                        const escript::Data& data, std::vector<double>& out)
 {
-/*    type_t data_type=getFunctionSpaceType(data);*/
-    dim_t numQuadTotal;
-    dim_t numComps = getDataPointSize(data);
-    Dudley_ElementFile_Jacobeans *jac = NULL;
-    Esys_MPI_rank my_mpi_rank;
+    if (!nodes || !elements)
+        return;
 
-    Dudley_resetError();
-    if (nodes == NULL || elements == NULL)
-	return;
-    my_mpi_rank = nodes->MPIInfo->rank;
-    /* set some parameter */
-    jac = Dudley_ElementFile_borrowJacobeans(elements, nodes, Dudley_Assemble_reducedIntegrationOrder(data));
-    if (Dudley_noError())
-    {
-	numQuadTotal = jac->numQuad;
-	/* check the shape of the data  */
-	if (!numSamplesEqual(data, numQuadTotal, elements->numElements))
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_integrate: illegal number of samples of integrant kernel Data object");
-	}
-	/* now we can start */
+    const int my_mpi_rank = nodes->MPIInfo->rank;
+    const ElementFile_Jacobians* jac = elements->borrowJacobians(nodes,
+                                         hasReducedIntegrationOrder(data));
 
-	if (Dudley_noError())
-	{
-	    dim_t q, e, i;
-	    __const double *data_array = NULL;
-	    double *out_local = NULL, rtmp;
-	    for (q = 0; q < numComps; q++)
-		out[q] = 0;
-#pragma omp parallel private(q,i,rtmp,data_array,out_local)
-	    {
-		out_local = new double[numComps];
-		if (!Dudley_checkPtr(out_local))
-		{
-		    /* initialize local result */
+    const int numQuadTotal = jac->numQuad;
+    // check the shape of the data
+    if (!data.numSamplesEqual(numQuadTotal, elements->numElements)) {
+        throw DudleyException("Assemble_integrate: illegal number of samples of integrant kernel Data object");
+    }
+
+    const int numComps = data.getDataPointSize();
 
-		    for (i = 0; i < numComps; i++)
-			out_local[i] = 0;
+    for (int q = 0; q < numComps; q++)
+        out[q] = 0;
 
-		    /* open the element loop */
+#pragma omp parallel
+    {
+        std::vector<double> out_local(numComps);
 
-		    if (isExpanded(data))
-		    {
-#pragma omp for private(e) schedule(static)
-			for (e = 0; e < elements->numElements; e++)
-			{
-			    if (elements->Owner[e] == my_mpi_rank)
-			    {
-				double vol = jac->absD[e] * jac->quadweight;
-				data_array = getSampleDataRO(data, e);
-				for (q = 0; q < numQuadTotal; q++)
-				{
-				    for (i = 0; i < numComps; i++)
-					out_local[i] += data_array[INDEX2(i, q, numComps)] * vol;
-				}
-			    }
-			}
-		    }
-		    else
-		    {
-#pragma omp for private(e) schedule(static)
-			for (e = 0; e < elements->numElements; e++)
-			{
-			    if (elements->Owner[e] == my_mpi_rank)
-			    {
-				double vol = jac->absD[e] * jac->quadweight;
-				data_array = getSampleDataRO(data, e);
-				rtmp = 0.;
-				for (q = 0; q < numQuadTotal; q++)
-				    rtmp += vol;
-				for (i = 0; i < numComps; i++)
-				    out_local[i] += data_array[i] * rtmp;
-			    }
-			}
-		    }
-		    /* add local results to global result */
+        if (data.actsExpanded()) {
+#pragma omp for
+            for (index_t e = 0; e < elements->numElements; e++) {
+                if (elements->Owner[e] == my_mpi_rank) {
+                    const double vol = jac->absD[e] * jac->quadweight;
+                    const double* data_array = data.getSampleDataRO(e);
+                    for (int q = 0; q < numQuadTotal; q++) {
+                        for (int i = 0; i < numComps; i++)
+                            out_local[i] += data_array[INDEX2(i, q, numComps)] * vol;
+                    }
+                }
+            }
+        } else {
+#pragma omp for
+            for (index_t e = 0; e < elements->numElements; e++) {
+                if (elements->Owner[e] == my_mpi_rank) {
+                    const double vol = jac->absD[e] * jac->quadweight;
+                    const double* data_array = data.getSampleDataRO(e);
+                    double rtmp = 0.;
+                    for (int q = 0; q < numQuadTotal; q++)
+                        rtmp += vol;
+                    for (int i = 0; i < numComps; i++)
+                        out_local[i] += data_array[i] * rtmp;
+                }
+            }
+        }
+        // add local results to global result
 #pragma omp critical
-		    for (i = 0; i < numComps; i++)
-			out[i] += out_local[i];
-		}
-		delete[] out_local;
-	    }
-	}
+        for (int i = 0; i < numComps; i++)
+            out[i] += out_local[i];
     }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_interpolate.cpp b/dudley/src/Assemble_interpolate.cpp
index ddd4a8d..0e54f6e 100644
--- a/dudley/src/Assemble_interpolate.cpp
+++ b/dudley/src/Assemble_interpolate.cpp
@@ -14,140 +14,82 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*	  assemblage routines: interpolates nodal data in a data array onto elements (=integration points) */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
+#include "ShapeTable.h"
 #include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-#include "ShapeTable.h"
+#include <escript/index.h>
 
-/************************************************************************************/
+namespace dudley {
 
-void Dudley_Assemble_interpolate(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, const escript::Data* data,
-				 escript::Data* interpolated_data)
+void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
+                          const escript::Data& data,
+                          escript::Data& interpolated_data)
 {
-    __const double *data_array;
-    double *local_data = NULL;
-    bool reduced_integration = FALSE;
-    dim_t q, i, NS_DOF, NN, numNodes = 0, e, numQuad = 0;
-    dim_t numComps = getDataPointSize(data);
-    index_t *map = NULL;
-    const double *shapeFns = 0;
-    type_t data_type = getFunctionSpaceType(data);
-    size_t numComps_size;
-    Dudley_resetError();
-    if (nodes == NULL || elements == NULL)
-	return;
-    reduced_integration = Dudley_Assemble_reducedIntegrationOrder(interpolated_data);
-    NN = elements->numNodes;
-
-    /* set some parameter */
-
-    if (data_type == DUDLEY_NODES)
-    {
-	numNodes = Dudley_NodeFile_getNumNodes(nodes);
-	map = Dudley_NodeFile_borrowTargetNodes(nodes);
-    }
-    else if (data_type == DUDLEY_REDUCED_NODES)
-    {
-	numNodes = Dudley_NodeFile_getNumReducedNodes(nodes);
-	map = Dudley_NodeFile_borrowTargetReducedNodes(nodes);
-    }
-    else if (data_type == DUDLEY_DEGREES_OF_FREEDOM)
-    {
-	if (elements->MPIInfo->size > 1)
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_interpolate: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
-	    return;
-	}
-	numNodes = Dudley_NodeFile_getNumDegreesOfFreedom(nodes);
-	map = Dudley_NodeFile_borrowTargetDegreesOfFreedom(nodes);
-    }
-    else if (data_type == DUDLEY_REDUCED_DEGREES_OF_FREEDOM)
-    {
-	if (elements->MPIInfo->size > 1)
-	{
-	    Dudley_setError(TYPE_ERROR,
-			    "Dudley_Assemble_interpolate: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
-	    return;
-	}
-	numNodes = Dudley_NodeFile_getNumReducedDegreesOfFreedom(nodes);
-	map = Dudley_NodeFile_borrowTargetReducedDegreesOfFreedom(nodes);
+    if (!nodes || !elements)
+        return;
+
+    const int data_type = data.getFunctionSpace().getTypeCode();
+    const bool reduced_integration = hasReducedIntegrationOrder(interpolated_data);
+
+    dim_t numNodes = 0;
+    const index_t* map = NULL;
+
+    if (data_type == DUDLEY_NODES) {
+        numNodes = nodes->getNumNodes();
+        map = nodes->borrowTargetNodes();
+    } else if (data_type == DUDLEY_DEGREES_OF_FREEDOM) {
+        if (elements->MPIInfo->size > 1) {
+            throw DudleyException("Assemble_interpolate: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
+        }
+        numNodes = nodes->getNumDegreesOfFreedom();
+        map = nodes->borrowTargetDegreesOfFreedom();
+    } else {
+        throw DudleyException("Assemble_interpolate: Cannot interpolate data");
     }
-    else
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_interpolate: Cannot interpolate data");
-	return;
-    }
-
-    numQuad = reduced_integration ? 1 : (elements->numDim + 1);
-    NS_DOF = elements->numDim + 1;
 
-    /* check the dimensions of interpolated_data and data */
-
-    if (!numSamplesEqual(interpolated_data, numQuad, elements->numElements))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_interpolate: illegal number of samples of output Data object");
-    }
-    else if (!numSamplesEqual(data, 1, numNodes))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_interpolate: illegal number of samples of input Data object");
-    }
-    else if (numComps != getDataPointSize(interpolated_data))
-    {
-	Dudley_setError(TYPE_ERROR,
-			"Dudley_Assemble_interpolate: number of components of input and interpolated Data do not match.");
-    }
-    else if (!isExpanded(interpolated_data))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_interpolate: expanded Data object is expected for output data.");
+    const int numComps = data.getDataPointSize();
+    const int NN = elements->numNodes;
+    const int numQuad = reduced_integration ? 1 : elements->numNodes;
+    const int NS_DOF = elements->numDim + 1;
+    const double *shapeFns = NULL;
+
+    // check the dimensions of interpolated_data and data
+    if (!interpolated_data.numSamplesEqual(numQuad, elements->numElements)) {
+        throw DudleyException("Assemble_interpolate: illegal number of samples of output Data object");
+    } else if (!data.numSamplesEqual(1, numNodes)) {
+        throw DudleyException("Assemble_interpolate: illegal number of samples of input Data object");
+    } else if (numComps != interpolated_data.getDataPointSize()) {
+        throw DudleyException("Assemble_interpolate: number of components of input and interpolated Data do not match.");
+    } else if (!interpolated_data.actsExpanded()) {
+        throw DudleyException("Assemble_interpolate: expanded Data object is expected for output data.");
     }
 
-    if (Dudley_noError() && !getQuadShape(elements->numDim, reduced_integration, &shapeFns))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_interpolate: unable to locate shape function.");
+    if (!getQuadShape(elements->numDim, reduced_integration, &shapeFns)) {
+        throw DudleyException("Assemble_interpolate: unable to locate shape function.");
     }
 
-    /* now we can start */
-
-    if (Dudley_noError())
+    interpolated_data.requireWrite();
+#pragma omp parallel
     {
-	requireWrite(interpolated_data);
-#pragma omp parallel private(local_data, numComps_size)
-	{
-	    local_data = NULL;
-	    /* allocation of work arrays */
-	    local_data = new double[NS_DOF * numComps];
-	    if (!Dudley_checkPtr(local_data))
-	    {
-		numComps_size = (size_t) numComps *sizeof(double);
-		/* open the element loop */
-#pragma omp for private(e,q,i,data_array) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    for (q = 0; q < NS_DOF; q++)
-		    {
-			i = elements->Nodes[INDEX2(q, e, NN)];
-			data_array = getSampleDataRO(data, map[i]);
-			memcpy(&(local_data[INDEX3(0, q, 0, numComps, NS_DOF)]), data_array, numComps_size);
-		    }
-		    /*  calculate interpolated_data=local_data*S */
-		    Dudley_Util_SmallMatSetMult1(1, numComps, numQuad, getSampleDataRW(interpolated_data, e),
-						 NS_DOF, local_data, /*basis->S */ shapeFns);
-		}		/* end of element loop */
-	    }
-	    delete[] local_data;
-	}			/* end of parallel region */
-    }
+        std::vector<double> local_data(NS_DOF * numComps);
+        const size_t numComps_size = numComps * sizeof(double);
+        // open the element loop
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            for (int q = 0; q < NS_DOF; q++) {
+                const index_t i = elements->Nodes[INDEX2(q, e, NN)];
+                const double* data_array = data.getSampleDataRO(map[i]);
+                memcpy(&local_data[INDEX3(0, q, 0, numComps, NS_DOF)],
+                       data_array, numComps_size);
+            }
+            // calculate interpolated_data=local_data*S
+            util::smallMatSetMult1(1, numComps, numQuad,
+                            interpolated_data.getSampleDataRW(e), NS_DOF,
+                            &local_data[0], shapeFns);
+        } // end of element loop
+    } // end of parallel region
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_jacobeans.cpp b/dudley/src/Assemble_jacobeans.cpp
deleted file mode 100644
index df33872..0000000
--- a/dudley/src/Assemble_jacobeans.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-/* Unless the loops in here get complicated again, this file should be compiled with loop unrolling */
-
-/* input: 
-
-double* coordinates[DIM*(*)]
-dim_t numQuad
-double* QuadWeights[numQuad]
-dim_t numShape
-dim_t numElements
-dim_t numNodes
-index_t* nodes[numNodes*numElements]  where NUMSIDES*numShape<=numNodes
-double* DSDv[numShape*DIM*numQuad]
-dim_t numTest
-double* DTDv[LOCDIM*numTest*numQuad] 
-index_t* element_id[numElements]
-
-output:
-
-double* dTdX[DIM*numTest*NUMSIDES*numQuad*numElements]
-double* volume[numQuad*numElements]
-
-*/
-
-#include "ShapeTable.h"
-
-#define SCALING(_nsub_,_dim_) pow(1./(double)(_nsub_),1./(double)(_dim_))
-
-/************************************************************************************/
-/*                                                            */
-/*  Jacobean 2D with area element                             */
-/*                                                            */
-void Dudley_Assemble_jacobeans_2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
-			   double *dTdX, double *absD, double *quadweight, index_t * element_id)
-{
-#define DIM 2
-#define LOCDIM 2
-    register int e, q;
-    char error_msg[LenErrorMsg_MAX];
-    const dim_t numTest = 3;	/* hoping this is used in constant folding */
-    *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6;	/* numQuad is 1 or 3 */
-#pragma omp parallel
-    {
-	register double dXdv00, dXdv10, dXdv01, dXdv11, dvdX00, dvdX10, dvdX01, dvdX11, D, invD;
-#pragma omp for private(e,q, dXdv00,dXdv10,dXdv01,dXdv11,dvdX00,dvdX10,dvdX01,dvdX11, D,invD) schedule(static)
-	for (e = 0; e < numElements; e++)
-	{
-#define COMPDXDV0(P)  coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
-coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*1+\
-coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(0)
-
-#define COMPDXDV1(P)  coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
-coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*(0)+\
-coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(1)
-
-	    dXdv00 = 0;
-	    dXdv10 = 0;
-	    dXdv01 = 0;
-	    dXdv11 = 0;
-	    dXdv00 = COMPDXDV0(0);
-	    dXdv10 = COMPDXDV0(1);
-	    dXdv01 = COMPDXDV1(0);
-	    dXdv11 = COMPDXDV1(1);
-	    D = dXdv00 * dXdv11 - dXdv01 * dXdv10;
-	    absD[e] = ABS(D);
-	    if (D == 0.)
-	    {
-		sprintf(error_msg, "Dudley_Assemble_jacobeans_2D: element %d (id %d) has area zero.", e, element_id[e]);
-		Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
-	    }
-	    else
-	    {
-		invD = 1. / D;
-		dvdX00 = dXdv11 * invD;
-		dvdX10 = -dXdv10 * invD;
-		dvdX01 = -dXdv01 * invD;
-		dvdX11 = dXdv00 * invD;
-		if (numQuad == 1)
-		{
-		    dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
-		    dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
-		    dTdX[INDEX4(2, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
-
-		    dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
-		    dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
-		    dTdX[INDEX4(2, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
-
-		}
-		else		/* numQuad==3 */
-		{
-		    for (q = 0; q < numTest; ++q)	/* relying on unroll loops to optimise this */
-		    {
-			dTdX[INDEX4(0, 0, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
-			dTdX[INDEX4(1, 0, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
-			dTdX[INDEX4(2, 0, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
-
-			dTdX[INDEX4(0, 1, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
-			dTdX[INDEX4(1, 1, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
-			dTdX[INDEX4(2, 1, q, e, numTest, DIM, numQuad)] =
-			    DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
-
-		    }
-		}
-	    }
-	}
-    }				/* end parallel */
-#undef DIM
-#undef LOCDIM
-#undef DTDXSET
-#undef COMPDXDV0
-#undef COMPDXDV1
-}
-
-/************************************************************************************/
-/*                                                            */
-/*  Jacobean 1D manifold in 2D and 1D elements                */
-/*                                                            */
-void Dudley_Assemble_jacobeans_2D_M1D_E1D(double *coordinates, dim_t numQuad,
-				   dim_t numElements, dim_t numNodes, index_t * nodes,
-				   double *dTdX, double *absD, double *quadweight, index_t * element_id)
-{
-#define DIM 2
-#define LOCDIM 1
-    register int e;
-    char error_msg[LenErrorMsg_MAX];
-    const dim_t numTest = 2;
-    *quadweight = (numQuad == 1) ? 1.0 : 0.5;
-    /* numQuad is 1 or 2 */
-#pragma omp parallel
-    {
-	register double dXdv00, dXdv10, dvdX00, dvdX01, D, invD;
-#pragma omp for private(e,dXdv00,dXdv10,dvdX00,dvdX01,D,invD) schedule(static)
-	for (e = 0; e < numElements; e++)
-	{
-	    dXdv00 = 0;
-	    dXdv10 = 0;
-	    dXdv00 +=
-		coordinates[INDEX2(0, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
-		coordinates[INDEX2(0, nodes[INDEX2(1, e, numNodes)], DIM)];
-	    dXdv00 +=
-		coordinates[INDEX2(1, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
-		coordinates[INDEX2(1, nodes[INDEX2(1, e, numNodes)], DIM)];
-	    D = dXdv00 * dXdv00 + dXdv10 * dXdv10;
-	    if (D == 0.)
-	    {
-		sprintf(error_msg, "Dudley_Assemble_jacobeans_2D_M1D_E1D: element %d (id %d) has length zero.", e,
-			element_id[e]);
-		Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
-	    }
-	    else
-	    {
-		invD = 1. / D;
-		dvdX00 = dXdv00 * invD;
-		dvdX01 = dXdv10 * invD;
-		/* The number of quad points is 1 or 2 */
-		dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
-		dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
-		dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
-		dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
-		absD[e] = sqrt(D);
-		if (numQuad == 2)
-		{
-		    dTdX[INDEX4(0, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
-		    dTdX[INDEX4(0, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
-		    dTdX[INDEX4(1, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
-		    dTdX[INDEX4(1, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
-		}
-	    }
-	}
-    }				/* end parallel */
-#undef DIM
-#undef LOCDIM
-}
-
-/************************************************************************************/
-/*                                                            */
-/*  Jacobean 3D                                               */
-/*                                                            */
-void Dudley_Assemble_jacobeans_3D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
-			   double *dTdX, double *absD, double *quadweight, index_t * element_id)
-{
-#define DIM 3
-#define LOCDIM 3
-    int e, q, s;
-    char error_msg[LenErrorMsg_MAX];
-    /* numQuad is 1 or 4 */
-    const dim_t numShape = 4, numTest = 4;
-    *quadweight = (numQuad == 1) ? 1. / 6 : 1. / 24;
-
-#pragma omp parallel
-    {
-	register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, dXdv02, dXdv12, dXdv22,
-	    dvdX00, dvdX10, dvdX20, dvdX01, dvdX11, dvdX21, dvdX02, dvdX12, dvdX22, D, invD, X0_loc, X1_loc, X2_loc;
-#pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,dXdv02,dXdv12,dXdv22,dvdX00,dvdX10,dvdX20,dvdX01,dvdX11,dvdX21,dvdX02,dvdX12,dvdX22,D,invD,X0_loc,X1_loc,X2_loc) schedule(static)
-	for (e = 0; e < numElements; e++)
-	{
-	    dXdv00 = 0;
-	    dXdv10 = 0;
-	    dXdv20 = 0;
-	    dXdv01 = 0;
-	    dXdv11 = 0;
-	    dXdv21 = 0;
-	    dXdv02 = 0;
-	    dXdv12 = 0;
-	    dXdv22 = 0;
-	    for (s = 0; s < numShape; s++)
-	    {
-		X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
-		X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
-		X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
-		dXdv00 += X0_loc * DTDV_3D[s][0];
-		dXdv10 += X1_loc * DTDV_3D[s][0];
-		dXdv20 += X2_loc * DTDV_3D[s][0];
-		dXdv01 += X0_loc * DTDV_3D[s][1];
-		dXdv11 += X1_loc * DTDV_3D[s][1];
-		dXdv21 += X2_loc * DTDV_3D[s][1];
-		dXdv02 += X0_loc * DTDV_3D[s][2];
-		dXdv12 += X1_loc * DTDV_3D[s][2];
-		dXdv22 += X2_loc * DTDV_3D[s][2];
-	    }
-	    D = dXdv00 * (dXdv11 * dXdv22 - dXdv12 * dXdv21) + dXdv01 * (dXdv20 * dXdv12 - dXdv10 * dXdv22) +
-		dXdv02 * (dXdv10 * dXdv21 - dXdv20 * dXdv11);
-	    absD[e] = ABS(D);
-	    if (D == 0.)
-	    {
-		sprintf(error_msg, "Dudley_Assemble_jacobeans_3D: element %d (id %d) has volume zero.", e, element_id[e]);
-		Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
-	    }
-	    else
-	    {
-		invD = 1. / D;
-		dvdX00 = (dXdv11 * dXdv22 - dXdv12 * dXdv21) * invD;
-		dvdX10 = (dXdv20 * dXdv12 - dXdv10 * dXdv22) * invD;
-		dvdX20 = (dXdv10 * dXdv21 - dXdv20 * dXdv11) * invD;
-		dvdX01 = (dXdv02 * dXdv21 - dXdv01 * dXdv22) * invD;
-		dvdX11 = (dXdv00 * dXdv22 - dXdv20 * dXdv02) * invD;
-		dvdX21 = (dXdv01 * dXdv20 - dXdv00 * dXdv21) * invD;
-		dvdX02 = (dXdv01 * dXdv12 - dXdv02 * dXdv11) * invD;
-		dvdX12 = (dXdv02 * dXdv10 - dXdv00 * dXdv12) * invD;
-		dvdX22 = (dXdv00 * dXdv11 - dXdv01 * dXdv10) * invD;
-		for (q = 0; q < numQuad; q++)
-		{
-		    for (s = 0; s < numTest; s++)
-		    {
-			dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] =
-			    DTDV_3D[s][0] * dvdX00 + DTDV_3D[s][1] * dvdX10 + DTDV_3D[s][2] * dvdX20;
-			dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] =
-			    DTDV_3D[s][0] * dvdX01 + DTDV_3D[s][1] * dvdX11 + DTDV_3D[s][2] * dvdX21;
-			dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] =
-			    DTDV_3D[s][0] * dvdX02 + DTDV_3D[s][1] * dvdX12 + DTDV_3D[s][2] * dvdX22;
-		    }
-		}
-	    }
-	}
-    }				/* end parallel */
-#undef DIM
-#undef LOCDIM
-}
-
-/************************************************************************************/
-/*                                                            */
-/*  Jacobean 2D manifold in 3D with 2D elements               */
-/*                                                            */
-void Dudley_Assemble_jacobeans_3D_M2D_E2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes,
-				   index_t * nodes, double *dTdX, double *absD, double *quadweight,
-				   index_t * element_id)
-{
-#define DIM 3
-#define LOCDIM 2
-    register int e, q, s;
-    char error_msg[LenErrorMsg_MAX];
-    const double DTDV[3][2] = { {-1., -1.}, {1., 0.}, {0., 1.} };
-    const dim_t numShape = 3, numTest = 3;
-    /* numQuad is 1 or 3 */
-    *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6;
-#pragma omp parallel
-    {
-	register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, m00, m01, m11,
-	    dvdX00, dvdX01, dvdX02, dvdX10, dvdX11, dvdX12, D, invD, X0_loc, X1_loc, X2_loc;
-#pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,m00,m01,m11,dvdX00,dvdX01,dvdX02,dvdX10,dvdX11,dvdX12,D,invD, X0_loc, X1_loc, X2_loc) schedule(static)
-	for (e = 0; e < numElements; e++)
-	{
-	    dXdv00 = 0;
-	    dXdv10 = 0;
-	    dXdv20 = 0;
-	    dXdv01 = 0;
-	    dXdv11 = 0;
-	    dXdv21 = 0;
-	    for (s = 0; s < numShape; s++)
-	    {
-		X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
-		X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
-		X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
-		dXdv00 += X0_loc * DTDV[s][0];
-		dXdv10 += X1_loc * DTDV[s][0];
-		dXdv20 += X2_loc * DTDV[s][0];
-		dXdv01 += X0_loc * DTDV[s][1];
-		dXdv11 += X1_loc * DTDV[s][1];
-		dXdv21 += X2_loc * DTDV[s][1];
-	    }
-	    m00 = dXdv00 * dXdv00 + dXdv10 * dXdv10 + dXdv20 * dXdv20;
-	    m01 = dXdv00 * dXdv01 + dXdv10 * dXdv11 + dXdv20 * dXdv21;
-	    m11 = dXdv01 * dXdv01 + dXdv11 * dXdv11 + dXdv21 * dXdv21;
-	    D = m00 * m11 - m01 * m01;
-	    absD[e] = sqrt(D);
-	    if (D == 0.)
-	    {
-		sprintf(error_msg, "Dudley_Assemble_jacobeans_3D_M2D: element %d (id %d) has area zero.", e, element_id[e]);
-		Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
-	    }
-	    else
-	    {
-		invD = 1. / D;
-		dvdX00 = (m00 * dXdv00 - m01 * dXdv01) * invD;
-		dvdX01 = (m00 * dXdv10 - m01 * dXdv11) * invD;
-		dvdX02 = (m00 * dXdv20 - m01 * dXdv21) * invD;
-		dvdX10 = (-m01 * dXdv00 + m11 * dXdv01) * invD;
-		dvdX11 = (-m01 * dXdv10 + m11 * dXdv11) * invD;
-		dvdX12 = (-m01 * dXdv20 + m11 * dXdv21) * invD;
-		for (q = 0; q < numQuad; q++)
-		{
-		    for (s = 0; s < numTest; s++)
-		    {
-			dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX00 + DTDV[s][1] * dvdX10;
-			dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX01 + DTDV[s][1] * dvdX11;
-			dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX02 + DTDV[s][1] * dvdX12;
-		    }
-		}
-	    }
-	}
-    }				/* end parallel section */
-#undef DIM
-#undef LOCDIM
-}
diff --git a/dudley/src/Assemble_jacobians.cpp b/dudley/src/Assemble_jacobians.cpp
new file mode 100644
index 0000000..06a6c4d
--- /dev/null
+++ b/dudley/src/Assemble_jacobians.cpp
@@ -0,0 +1,317 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "Assemble.h"
+#include "ShapeTable.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+// Unless the loops in here get complicated again, this file should be
+// compiled with loop unrolling
+
+/* input: 
+
+double* coordinates[DIM*(*)]
+dim_t numQuad
+double* QuadWeights[numQuad]
+dim_t numShape
+dim_t numElements
+dim_t numNodes
+index_t* nodes[numNodes*numElements]  where NUMSIDES*numShape<=numNodes
+double* DSDv[numShape*DIM*numQuad]
+dim_t numTest
+double* DTDv[LOCDIM*numTest*numQuad] 
+index_t* elementId[numElements]
+
+output:
+
+double* dTdX[DIM*numTest*NUMSIDES*numQuad*numElements]
+double* volume[numQuad*numElements]
+
+*/
+
+#define SCALING(_nsub_,_dim_) pow(1./(double)(_nsub_),1./(double)(_dim_))
+
+namespace dudley {
+
+/****************************************************************************/
+//
+//  Jacobian 2D with area element
+//
+void Assemble_jacobians_2D(const double* coordinates, int numQuad,
+                       dim_t numElements, int numNodes, const index_t* nodes,
+                       double* dTdX, double* absD, double* quadweight,
+                       const index_t* elementId)
+{
+    const int DIM = 2;
+    const int numTest = 3; // hoping this is used in constant folding
+    *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6; // numQuad is 1 or 3
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+#define COMPDXDV0(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
+coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*1+\
+coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(0)
+
+#define COMPDXDV1(P)  coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
+coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*(0)+\
+coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(1)
+
+        double dXdv00 = COMPDXDV0(0);
+        double dXdv10 = COMPDXDV0(1);
+        double dXdv01 = COMPDXDV1(0);
+        double dXdv11 = COMPDXDV1(1);
+        const double D = dXdv00 * dXdv11 - dXdv01 * dXdv10;
+        absD[e] = std::abs(D);
+        if (D == 0.) {
+            std::stringstream ss;
+            ss << "Assemble_jacobians_2D: element " << e
+                << " (id " << elementId[e] << ") has area zero.";
+            throw DudleyException(ss.str());
+        } else {
+            const double invD = 1. / D;
+            const double dvdX00 = dXdv11 * invD;
+            const double dvdX10 = -dXdv10 * invD;
+            const double dvdX01 = -dXdv01 * invD;
+            const double dvdX11 = dXdv00 * invD;
+            if (numQuad == 1) {
+                dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
+                dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
+                dTdX[INDEX4(2, 0, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
+
+                dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
+                dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
+                dTdX[INDEX4(2, 1, 0, e, numTest, DIM, numQuad)] =
+                    DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
+
+            } else { // numQuad == 3
+                // relying on unroll loops to optimise this
+                for (int q = 0; q < numTest; ++q) {
+                    dTdX[INDEX4(0, 0, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
+                    dTdX[INDEX4(1, 0, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
+                    dTdX[INDEX4(2, 0, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
+
+                    dTdX[INDEX4(0, 1, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
+                    dTdX[INDEX4(1, 1, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
+                    dTdX[INDEX4(2, 1, q, e, numTest, DIM, numQuad)] =
+                        DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
+
+                }
+            }
+        }
+    } // end parallel for
+#undef COMPDXDV0
+#undef COMPDXDV1
+}
+
+//
+// Jacobian 1D manifold in 2D and 1D elements
+//
+void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
+                                   dim_t numElements, int numNodes,
+                                   const index_t* nodes, double* dTdX,
+                                   double* absD, double* quadweight,
+                                   const index_t* elementId)
+{
+    const int DIM = 2;
+    const int numTest = 2;
+    *quadweight = (numQuad == 1) ? 1.0 : 0.5; // numQuad is 1 or 2
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        double dXdv00 =
+            coordinates[INDEX2(0, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
+            coordinates[INDEX2(0, nodes[INDEX2(1, e, numNodes)], DIM)];
+        double dXdv10 =
+            coordinates[INDEX2(1, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
+            coordinates[INDEX2(1, nodes[INDEX2(1, e, numNodes)], DIM)];
+        const double D = dXdv00 * dXdv00 + dXdv10 * dXdv10;
+        if (D == 0.) {
+            std::stringstream ss;
+            ss << "Assemble_jacobians_2D_M1D_E1D: element " << e
+                << " (id " << elementId[e] << ") has length zero.";
+            throw DudleyException(ss.str());
+        } else {
+            const double invD = 1. / D;
+            const double dvdX00 = dXdv00 * invD;
+            const double dvdX01 = dXdv10 * invD;
+            // The number of quad points is 1 or 2
+            dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
+            dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
+            dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
+            dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
+            absD[e] = sqrt(D);
+            if (numQuad == 2) {
+                dTdX[INDEX4(0, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
+                dTdX[INDEX4(0, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
+                dTdX[INDEX4(1, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
+                dTdX[INDEX4(1, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
+            }
+        }
+    } // end parallel for
+}
+
+//
+// Jacobian 3D
+//
+void Assemble_jacobians_3D(const double* coordinates, int numQuad,
+                           dim_t numElements, int numNodes,
+                           const index_t* nodes, double* dTdX, double* absD,
+                           double* quadweight, const index_t* elementId)
+{
+    const int DIM = 3;
+    const int numShape = 4;
+    const int numTest = 4;
+    *quadweight = (numQuad == 1) ? 1. / 6 : 1. / 24; // numQuad is 1 or 4
+
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        double dXdv00 = 0;
+        double dXdv10 = 0;
+        double dXdv20 = 0;
+        double dXdv01 = 0;
+        double dXdv11 = 0;
+        double dXdv21 = 0;
+        double dXdv02 = 0;
+        double dXdv12 = 0;
+        double dXdv22 = 0;
+        for (int s = 0; s < numShape; s++) {
+            const double X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
+            const double X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
+            const double X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
+            dXdv00 += X0_loc * DTDV_3D[s][0];
+            dXdv10 += X1_loc * DTDV_3D[s][0];
+            dXdv20 += X2_loc * DTDV_3D[s][0];
+            dXdv01 += X0_loc * DTDV_3D[s][1];
+            dXdv11 += X1_loc * DTDV_3D[s][1];
+            dXdv21 += X2_loc * DTDV_3D[s][1];
+            dXdv02 += X0_loc * DTDV_3D[s][2];
+            dXdv12 += X1_loc * DTDV_3D[s][2];
+            dXdv22 += X2_loc * DTDV_3D[s][2];
+        }
+        const double D = dXdv00 * (dXdv11 * dXdv22 - dXdv12 * dXdv21)
+                       + dXdv01 * (dXdv20 * dXdv12 - dXdv10 * dXdv22)
+                       + dXdv02 * (dXdv10 * dXdv21 - dXdv20 * dXdv11);
+        absD[e] = std::abs(D);
+        if (D == 0.) {
+            std::stringstream ss;
+            ss << "Assemble_jacobians_3D: element " << e
+                << " (id " << elementId[e] << ") has volume zero.";
+            throw DudleyException(ss.str());
+        } else {
+            const double invD = 1. / D;
+            const double dvdX00 = (dXdv11 * dXdv22 - dXdv12 * dXdv21) * invD;
+            const double dvdX10 = (dXdv20 * dXdv12 - dXdv10 * dXdv22) * invD;
+            const double dvdX20 = (dXdv10 * dXdv21 - dXdv20 * dXdv11) * invD;
+            const double dvdX01 = (dXdv02 * dXdv21 - dXdv01 * dXdv22) * invD;
+            const double dvdX11 = (dXdv00 * dXdv22 - dXdv20 * dXdv02) * invD;
+            const double dvdX21 = (dXdv01 * dXdv20 - dXdv00 * dXdv21) * invD;
+            const double dvdX02 = (dXdv01 * dXdv12 - dXdv02 * dXdv11) * invD;
+            const double dvdX12 = (dXdv02 * dXdv10 - dXdv00 * dXdv12) * invD;
+            const double dvdX22 = (dXdv00 * dXdv11 - dXdv01 * dXdv10) * invD;
+            for (int q = 0; q < numQuad; q++) {
+                for (int s = 0; s < numTest; s++) {
+                    dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] =
+                        DTDV_3D[s][0] * dvdX00 + DTDV_3D[s][1] * dvdX10
+                        + DTDV_3D[s][2] * dvdX20;
+                    dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] =
+                        DTDV_3D[s][0] * dvdX01 + DTDV_3D[s][1] * dvdX11
+                        + DTDV_3D[s][2] * dvdX21;
+                    dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] =
+                        DTDV_3D[s][0] * dvdX02 + DTDV_3D[s][1] * dvdX12
+                        + DTDV_3D[s][2] * dvdX22;
+                }
+            }
+        }
+    } // end parallel for
+}
+
+//
+// Jacobian 2D manifold in 3D with 2D elements
+//
+void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
+                                   dim_t numElements, int numNodes,
+                                   const index_t* nodes, double* dTdX,
+                                   double* absD, double* quadweight,
+                                   const index_t* elementId)
+{
+    const int DIM = 3;
+    const double DTDV[3][2] = { {-1., -1.}, {1., 0.}, {0., 1.} };
+    const int numShape = 3;
+    const int numTest = 3;
+    *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6; // numQuad is 1 or 3
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        double dXdv00 = 0;
+        double dXdv10 = 0;
+        double dXdv20 = 0;
+        double dXdv01 = 0;
+        double dXdv11 = 0;
+        double dXdv21 = 0;
+        for (int s = 0; s < numShape; s++) {
+            const double X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
+            const double X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
+            const double X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
+            dXdv00 += X0_loc * DTDV[s][0];
+            dXdv10 += X1_loc * DTDV[s][0];
+            dXdv20 += X2_loc * DTDV[s][0];
+            dXdv01 += X0_loc * DTDV[s][1];
+            dXdv11 += X1_loc * DTDV[s][1];
+            dXdv21 += X2_loc * DTDV[s][1];
+        }
+        const double m00 = dXdv00 * dXdv00 + dXdv10 * dXdv10 + dXdv20 * dXdv20;
+        const double m01 = dXdv00 * dXdv01 + dXdv10 * dXdv11 + dXdv20 * dXdv21;
+        const double m11 = dXdv01 * dXdv01 + dXdv11 * dXdv11 + dXdv21 * dXdv21;
+        const double D = m00 * m11 - m01 * m01;
+        absD[e] = sqrt(D);
+        if (D == 0.) {
+            std::stringstream ss;
+            ss << "Assemble_jacobians_3D_M2D: element " << e
+                << " (id " << elementId[e] << ") has area zero.";
+            throw DudleyException(ss.str());
+        } else {
+            const double invD = 1. / D;
+            const double dvdX00 = (m00 * dXdv00 - m01 * dXdv01) * invD;
+            const double dvdX01 = (m00 * dXdv10 - m01 * dXdv11) * invD;
+            const double dvdX02 = (m00 * dXdv20 - m01 * dXdv21) * invD;
+            const double dvdX10 = (-m01 * dXdv00 + m11 * dXdv01) * invD;
+            const double dvdX11 = (-m01 * dXdv10 + m11 * dXdv11) * invD;
+            const double dvdX12 = (-m01 * dXdv20 + m11 * dXdv21) * invD;
+            for (int q = 0; q < numQuad; q++) {
+                for (int s = 0; s < numTest; s++) {
+                    dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] =
+                        DTDV[s][0] * dvdX00 + DTDV[s][1] * dvdX10;
+                    dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] =
+                        DTDV[s][0] * dvdX01 + DTDV[s][1] * dvdX11;
+                    dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] =
+                        DTDV[s][0] * dvdX02 + DTDV[s][1] * dvdX12;
+                }
+            }
+        }
+    } // end parallel for
+}
+
+} // namespace dudley
+
diff --git a/dudley/src/Assemble_setNormal.cpp b/dudley/src/Assemble_setNormal.cpp
deleted file mode 100644
index ea7fd54..0000000
--- a/dudley/src/Assemble_setNormal.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*	  assemblage routines: calculates the normal vector at quadrature points on face elements */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Assemble.h"
-#include "Util.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "ShapeTable.h"
-
-/************************************************************************************/
-
-void Dudley_Assemble_setNormal(Dudley_NodeFile * nodes, Dudley_ElementFile * elements, escript::Data* normal)
-{
-    double *local_X = NULL, *dVdv = NULL, *normal_array;
-    index_t sign;
-    dim_t e, q, NN, NS, numDim, numQuad, numDim_local;
-    bool reduced_integration;
-    const double *dSdv = 0;
-    if (nodes == NULL || elements == NULL)
-	return;
-
-    switch (elements->numDim)
-    {
-    case 2:
-	dSdv = &(DTDV_2D[0][0]);
-	break;
-    case 3:
-	dSdv = &(DTDV_3D[0][0]);
-	break;
-    default:
-	dSdv = &(DTDV_1D[0][0]);
-	break;
-    }
-    Dudley_resetError();
-    NN = elements->numNodes;
-    numDim = nodes->numDim;
-    reduced_integration = Dudley_Assemble_reducedIntegrationOrder(normal);
-    numQuad = (!reduced_integration) ? (elements->numDim + 1) : 1;
-    numDim_local = elements->numLocalDim;
-    NS = elements->numDim + 1;
-
-    /* set some parameters */
-
-    sign = 1;
-    /* check the dimensions of normal */
-    if (!(numDim == numDim_local || numDim - 1 == numDim_local))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_setNormal: Cannot calculate normal vector");
-    }
-    else if (!isDataPointShapeEqual(normal, 1, &(numDim)))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_setNormal: illegal number of samples of normal Data object");
-    }
-    else if (!numSamplesEqual(normal, numQuad, elements->numElements))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_setNormal: illegal number of samples of normal Data object");
-    }
-    else if (!isDataPointShapeEqual(normal, 1, &(numDim)))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_setNormal: illegal point data shape of normal Data object");
-    }
-    else if (!isExpanded(normal))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_Assemble_setNormal: expanded Data object is expected for normal.");
-    }
-
-    /* now we can start */
-    if (Dudley_noError())
-    {
-	requireWrite(normal);
-#pragma omp parallel private(local_X,dVdv)
-	{
-	    local_X = dVdv = NULL;
-	    /* allocation of work arrays */
-	    local_X = new  double[NS * numDim];
-	    dVdv = new  double[numQuad * numDim * numDim_local];
-	    if (!(Dudley_checkPtr(local_X) || Dudley_checkPtr(dVdv)))
-	    {
-		/* open the element loop */
-#pragma omp for private(e,q,normal_array) schedule(static)
-		for (e = 0; e < elements->numElements; e++)
-		{
-		    /* gather local coordinates of nodes into local_X: */
-		    Dudley_Util_Gather_double(NS, &(elements->Nodes[INDEX2(0, e, NN)]), numDim, nodes->Coordinates,
-					      local_X);
-		    /*  calculate dVdv(i,j,q)=local_X(i,n)*DSDv(n,j,q) */
-		    Dudley_Util_SmallMatMult(numDim, numDim_local * numQuad, dVdv, NS, local_X, dSdv);
-		    /* get normalized vector:      */
-		    normal_array = getSampleDataRW(normal, e);
-		    Dudley_NormalVector(numQuad, numDim, numDim_local, dVdv, normal_array);
-		    for (q = 0; q < numQuad * numDim; q++)
-			normal_array[q] *= sign;
-		}
-	    }
-	    delete[] local_X;
-	    delete[] dVdv;
-	}
-    }
-}
diff --git a/dudley/src/CPPAdapter/DudleyAdapterException.cpp b/dudley/src/CPPAdapter/DudleyAdapterException.cpp
deleted file mode 100644
index 03a42e0..0000000
--- a/dudley/src/CPPAdapter/DudleyAdapterException.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "DudleyAdapterException.h"
-
-
-using namespace dudley;
-
-
-const std::string 
-DudleyAdapterException::exceptionNameValue("DudleyAdapterException");
-
-
-const std::string &
-DudleyAdapterException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/dudley/src/CPPAdapter/DudleyAdapterException.h b/dudley/src/CPPAdapter/DudleyAdapterException.h
deleted file mode 100644
index 15ce534..0000000
--- a/dudley/src/CPPAdapter/DudleyAdapterException.h
+++ /dev/null
@@ -1,106 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  dudley_DudleyAdapterException_20040526_H
-#define dudley_DudleyAdapterException_20040526_H
-#include "system_dep.h"
-
-#include "esysUtils/EsysException.h"
-
-namespace dudley
-{
-
-  /**
-  \brief
-  DudleyAdapterException exception class.
-
-  Description:
-  DudleyAdapterException exception class.
-  The class provides a public function returning the exception name
-  */
-  class DudleyAdapterException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    DUDLEY_DLL_API
-    DudleyAdapterException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    DUDLEY_DLL_API
-    DudleyAdapterException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    DUDLEY_DLL_API
-    DudleyAdapterException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    DUDLEY_DLL_API
-    DudleyAdapterException(const DudleyAdapterException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    /// Destructor
-    DUDLEY_DLL_API
-    virtual ~DudleyAdapterException() THROW(NO_ARG) {}
-
-    /**
-    \brief
-    Assignment operator.
-    */
-    DUDLEY_DLL_API
-    inline DudleyAdapterException &
-    operator=(const DudleyAdapterException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    DUDLEY_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
-
-} // end of namespace
-#endif
diff --git a/dudley/src/CPPAdapter/DudleyError.cpp b/dudley/src/CPPAdapter/DudleyError.cpp
deleted file mode 100644
index de23e72..0000000
--- a/dudley/src/CPPAdapter/DudleyError.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "DudleyError.h"
-#include <iostream>
-
-namespace dudley {
-
-  void setDudleyError(Dudley_ErrorCodeType errorCode, 
-		      const std::string& errMess) 
-  {
-    Dudley_setError(errorCode,(__const char*)(errMess.c_str()));
-  }
-
-  void checkDudleyError() 
-  {
-    if (Dudley_noError()) {
-      return;
-    } else {
-      //
-      // reset the error code to no error otherwise the next call to
-      // this function may resurrect a previous error
-      Dudley_resetError();
-      throw DudleyAdapterException(Dudley_getErrorMessage());
-    }
-  }
-  void checkPasoError() 
-  {
-    if (Esys_noError()) {
-      return;
-    } else {
-      //
-      // reset the error code to no error otherwise the next call to
-      // this function may resurrect a previous error
-      Esys_resetError();
-      throw DudleyAdapterException(Esys_getErrorMessage());
-    }
-  }
-
-}  // end of namespace
diff --git a/dudley/src/CPPAdapter/DudleyError.h b/dudley/src/CPPAdapter/DudleyError.h
deleted file mode 100644
index 8b20f85..0000000
--- a/dudley/src/CPPAdapter/DudleyError.h
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  dudley_DudleyError_20040528_H
-#define dudley_DudleyError_20040528_H
-#include "system_dep.h"
-
-#include "dudley/Dudley.h"
-
-#include "DudleyAdapterException.h"
-
-#include <string>
-
-namespace dudley {
-  /**
-     \brief
-     Provide a C++ interface to the dudley C funcion of the same name.
-     Needed because of constness problems.
-  */
-  DUDLEY_DLL_API
-  void setDudleyError(Dudley_ErrorCodeType errorCode, 
-		      const std::string& errMess);
- 
-  /**
-     \brief
-     Convert a C dudley error into a C++ exception.
-  */
-  DUDLEY_DLL_API
-  void checkDudleyError();
-  /**
-     \brief
-     Convert a C paso  error into a C++ exception.
-  */
-  DUDLEY_DLL_API
-  void checkPasoError();
-} // end of namespace
-#endif
diff --git a/dudley/src/CPPAdapter/MeshAdapter.cpp b/dudley/src/CPPAdapter/MeshAdapter.cpp
deleted file mode 100644
index e952b7e..0000000
--- a/dudley/src/CPPAdapter/MeshAdapter.cpp
+++ /dev/null
@@ -1,2043 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "MeshAdapter.h"
-#include "escript/Data.h"
-#include "escript/DataFactory.h"
-#include "esysUtils/blocktimer.h"
-#include "esysUtils/EsysRandom.h"
-
-#ifdef USE_NETCDF
-#include <netcdfcpp.h>
-#endif
-#include <boost/python/tuple.hpp>
-
-using namespace std;
-using namespace escript;
-using namespace paso;
-namespace bp = boost::python;
-
-namespace dudley {
-
-//
-// define the static constants
-MeshAdapter::FunctionSpaceNamesMapType MeshAdapter::m_functionSpaceTypeNames;
-const int MeshAdapter::DegreesOfFreedom=DUDLEY_DEGREES_OF_FREEDOM;
-const int MeshAdapter::ReducedDegreesOfFreedom=DUDLEY_REDUCED_DEGREES_OF_FREEDOM;
-const int MeshAdapter::Nodes=DUDLEY_NODES;
-const int MeshAdapter::ReducedNodes=DUDLEY_REDUCED_NODES;
-const int MeshAdapter::Elements=DUDLEY_ELEMENTS;
-const int MeshAdapter::ReducedElements=DUDLEY_REDUCED_ELEMENTS;
-const int MeshAdapter::FaceElements=DUDLEY_FACE_ELEMENTS;
-const int MeshAdapter::ReducedFaceElements=DUDLEY_REDUCED_FACE_ELEMENTS;
-const int MeshAdapter::Points=DUDLEY_POINTS;
-
-MeshAdapter::MeshAdapter(Dudley_Mesh* dudleyMesh)
-{
-   setFunctionSpaceTypeNames();
-   //
-   // need to use a null_deleter as Dudley_Mesh_free deletes the pointer
-   // for us.
-   m_dudleyMesh.reset(dudleyMesh,null_deleter());
-}
-
-//
-// The copy constructor should just increment the use count
-MeshAdapter::MeshAdapter(const MeshAdapter& in):
-m_dudleyMesh(in.m_dudleyMesh)
-{
-   setFunctionSpaceTypeNames();
-}
-
-MeshAdapter::~MeshAdapter()
-{
-   //
-   // I hope the case for the pointer being zero has been taken care of.
-   //  cout << "In MeshAdapter destructor." << endl;
-   if (m_dudleyMesh.unique()) {
-      Dudley_Mesh_free(m_dudleyMesh.get());
-   }
-}
-
-int MeshAdapter::getMPISize() const
-{
-   return m_dudleyMesh.get()->MPIInfo->size;
-}
-int MeshAdapter::getMPIRank() const
-{
-   return m_dudleyMesh.get()->MPIInfo->rank;
-}
-void MeshAdapter::MPIBarrier() const
-{
-#ifdef ESYS_MPI
-   MPI_Barrier(m_dudleyMesh.get()->MPIInfo->comm);
-#endif
-   return;
-}
-bool MeshAdapter::onMasterProcessor() const
-{
-   return m_dudleyMesh.get()->MPIInfo->rank == 0;
-}
-
-MPI_Comm MeshAdapter::getMPIComm() const
-{
-    return m_dudleyMesh->MPIInfo->comm;
-}
-
-
-Dudley_Mesh* MeshAdapter::getDudley_Mesh() const
-{
-   return m_dudleyMesh.get();
-}
-
-void MeshAdapter::write(const string& fileName) const
-{
-   char *fName = (fileName.size()+1>0) ? new char[fileName.size()+1] : (char*)NULL;
-   strcpy(fName,fileName.c_str());
-   Dudley_Mesh_write(m_dudleyMesh.get(),fName);
-   checkDudleyError();
-   delete[] fName;
-}
-
-void MeshAdapter::Print_Mesh_Info(const bool full) const
-{
-   Dudley_PrintMesh_Info(m_dudleyMesh.get(), full);
-}
-
-void MeshAdapter::dump(const string& fileName) const
-{
-#ifdef USE_NETCDF
-   const NcDim* ncdims[12] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
-   NcVar *ids;
-   int *int_ptr;
-   Dudley_Mesh *mesh = m_dudleyMesh.get();
-   Dudley_TagMap* tag_map;
-   int num_Tags = 0;
-   int mpi_size				= mesh->MPIInfo->size;
-   int mpi_rank				= mesh->MPIInfo->rank;
-   int numDim				= mesh->Nodes->numDim;
-   int numNodes				= mesh->Nodes->numNodes;
-   int num_Elements			= mesh->Elements->numElements;
-   int num_FaceElements			= mesh->FaceElements->numElements;
-   int num_Points			= mesh->Points->numElements;
-   int num_Elements_numNodes		= mesh->Elements->numNodes;
-   int num_FaceElements_numNodes	= mesh->FaceElements->numNodes;
-#ifdef ESYS_MPI
-   MPI_Status status;
-#endif
-
-/* Incoming token indicates it's my turn to write */
-#ifdef ESYS_MPI
-   if (mpi_rank>0) MPI_Recv(&num_Tags, 0, MPI_INT, mpi_rank-1, 81800, mesh->MPIInfo->comm, &status);
-#endif
-
-   string newFileName(esysUtils::appendRankToFileName(
-                                            fileName, mpi_size, mpi_rank));
-
-   /* Figure out how much storage is required for tags */
-   tag_map = mesh->TagMap;
-   num_Tags = 0;
-   while (tag_map) {
-      num_Tags++;
-      tag_map=tag_map->next;
-   }
-
-   // NetCDF error handler
-   NcError err(NcError::verbose_nonfatal);
-   // Create the file.
-   NcFile dataFile(newFileName.c_str(), NcFile::Replace);
-   string msgPrefix("Error in MeshAdapter::dump: NetCDF operation failed - ");
-   // check if writing was successful
-   if (!dataFile.is_valid())
-      throw DataException(msgPrefix+"Open file for output");
-
-   // Define dimensions (num_Elements and dim_Elements are identical,
-   // dim_Elements only appears if > 0)
-   if (! (ncdims[0] = dataFile.add_dim("numNodes", numNodes)) )
-      throw DataException(msgPrefix+"add_dim(numNodes)");
-   if (! (ncdims[1] = dataFile.add_dim("numDim", numDim)) )
-      throw DataException(msgPrefix+"add_dim(numDim)");
-   if (! (ncdims[2] = dataFile.add_dim("mpi_size_plus_1", mpi_size+1)) )
-      throw DataException(msgPrefix+"add_dim(mpi_size)");
-   if (num_Elements>0)
-      if (! (ncdims[3] = dataFile.add_dim("dim_Elements", num_Elements)) )
-         throw DataException(msgPrefix+"add_dim(dim_Elements)");
-   if (num_FaceElements>0)
-      if (! (ncdims[4] = dataFile.add_dim("dim_FaceElements", num_FaceElements)) )
-         throw DataException(msgPrefix+"add_dim(dim_FaceElements)");
-   if (num_Points>0)
-      if (! (ncdims[6] = dataFile.add_dim("dim_Points", num_Points)) )
-         throw DataException(msgPrefix+"add_dim(dim_Points)");
-   if (num_Elements>0)
-      if (! (ncdims[7] = dataFile.add_dim("dim_Elements_Nodes", num_Elements_numNodes)) )
-         throw DataException(msgPrefix+"add_dim(dim_Elements_Nodes)");
-   if (num_FaceElements>0)
-      if (! (ncdims[8] = dataFile.add_dim("dim_FaceElements_numNodes", num_FaceElements_numNodes)) )
-         throw DataException(msgPrefix+"add_dim(dim_FaceElements_numNodes)");
-   if (num_Tags>0)
-      if (! (ncdims[10] = dataFile.add_dim("dim_Tags", num_Tags)) )
-         throw DataException(msgPrefix+"add_dim(dim_Tags)");
-
-   // Attributes: MPI size, MPI rank, Name, order, reduced_order
-   if (!dataFile.add_att("mpi_size", mpi_size) )
-      throw DataException(msgPrefix+"add_att(mpi_size)");
-   if (!dataFile.add_att("mpi_rank", mpi_rank) )
-      throw DataException(msgPrefix+"add_att(mpi_rank)");
-   if (!dataFile.add_att("Name",mesh->Name) )
-      throw DataException(msgPrefix+"add_att(Name)");
-   if (!dataFile.add_att("numDim",numDim) )
-      throw DataException(msgPrefix+"add_att(order)");
-   if (!dataFile.add_att("order",mesh->integrationOrder) )
-      throw DataException(msgPrefix+"add_att(order)");
-   if (!dataFile.add_att("reduced_order",mesh->reducedIntegrationOrder) )
-      throw DataException(msgPrefix+"add_att(reduced_order)");
-   if (!dataFile.add_att("numNodes",numNodes) )
-      throw DataException(msgPrefix+"add_att(numNodes)");
-   if (!dataFile.add_att("num_Elements",num_Elements) )
-      throw DataException(msgPrefix+"add_att(num_Elements)");
-   if (!dataFile.add_att("num_FaceElements",num_FaceElements) )
-      throw DataException(msgPrefix+"add_att(num_FaceElements)");
-   if (!dataFile.add_att("num_Points",num_Points) )
-      throw DataException(msgPrefix+"add_att(num_Points)");
-   if (!dataFile.add_att("num_Elements_numNodes",num_Elements_numNodes) )
-      throw DataException(msgPrefix+"add_att(num_Elements_numNodes)");
-   if (!dataFile.add_att("num_FaceElements_numNodes",num_FaceElements_numNodes) )
-      throw DataException(msgPrefix+"add_att(num_FaceElements_numNodes)");
-   if (!dataFile.add_att("Elements_TypeId", mesh->Elements->etype) )
-      throw DataException(msgPrefix+"add_att(Elements_TypeId)");
-   if (!dataFile.add_att("FaceElements_TypeId", mesh->FaceElements->etype) )
-      throw DataException(msgPrefix+"add_att(FaceElements_TypeId)");
-   if (!dataFile.add_att("Points_TypeId", mesh->Points->etype) )
-      throw DataException(msgPrefix+"add_att(Points_TypeId)");
-   if (!dataFile.add_att("num_Tags", num_Tags) )
-      throw DataException(msgPrefix+"add_att(num_Tags)");
-
-   // // // // // Nodes // // // // //
-
-   // Nodes nodeDistribution
-   if (! ( ids = dataFile.add_var("Nodes_NodeDistribution", ncInt, ncdims[2])) )
-      throw DataException(msgPrefix+"add_var(Nodes_NodeDistribution)");
-   int_ptr = &mesh->Nodes->nodesDistribution->first_component[0];
-   if (! (ids->put(int_ptr, mpi_size+1)) )
-      throw DataException(msgPrefix+"put(Nodes_NodeDistribution)");
-
-   // Nodes degreesOfFreedomDistribution
-   if (! ( ids = dataFile.add_var("Nodes_DofDistribution", ncInt, ncdims[2])) )
-      throw DataException(msgPrefix+"add_var(Nodes_DofDistribution)");
-   int_ptr = &mesh->Nodes->degreesOfFreedomDistribution->first_component[0];
-   if (! (ids->put(int_ptr, mpi_size+1)) )
-      throw DataException(msgPrefix+"put(Nodes_DofDistribution)");
-
-   // Only write nodes if non-empty because NetCDF doesn't like empty arrays
-   // (it treats them as NC_UNLIMITED)
-   if (numNodes>0) {
-
-      // Nodes Id
-      if (! ( ids = dataFile.add_var("Nodes_Id", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_Id)");
-      int_ptr = &mesh->Nodes->Id[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_Id)");
-
-      // Nodes Tag
-      if (! ( ids = dataFile.add_var("Nodes_Tag", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_Tag)");
-      int_ptr = &mesh->Nodes->Tag[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_Tag)");
-
-      // Nodes gDOF
-      if (! ( ids = dataFile.add_var("Nodes_gDOF", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_gDOF)");
-      int_ptr = &mesh->Nodes->globalDegreesOfFreedom[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_gDOF)");
-
-      // Nodes global node index
-      if (! ( ids = dataFile.add_var("Nodes_gNI", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_gNI)");
-      int_ptr = &mesh->Nodes->globalNodesIndex[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_gNI)");
-
-      // Nodes grDof
-      if (! ( ids = dataFile.add_var("Nodes_grDfI", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_grDfI)");
-      int_ptr = &mesh->Nodes->globalReducedDOFIndex[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_grDfI)");
-
-      // Nodes grNI
-      if (! ( ids = dataFile.add_var("Nodes_grNI", ncInt, ncdims[0])) )
-         throw DataException(msgPrefix+"add_var(Nodes_grNI)");
-      int_ptr = &mesh->Nodes->globalReducedNodesIndex[0];
-      if (! (ids->put(int_ptr, numNodes)) )
-         throw DataException(msgPrefix+"put(Nodes_grNI)");
-
-      // Nodes Coordinates
-      if (! ( ids = dataFile.add_var("Nodes_Coordinates", ncDouble, ncdims[0], ncdims[1]) ) )
-         throw DataException(msgPrefix+"add_var(Nodes_Coordinates)");
-      if (! (ids->put(&(mesh->Nodes->Coordinates[INDEX2(0,0,numDim)]), numNodes, numDim)) )
-         throw DataException(msgPrefix+"put(Nodes_Coordinates)");
-
-   }
-
-   // // // // // Elements // // // // //
-
-   if (num_Elements>0) {
-
-      // Elements_Id
-      if (! ( ids = dataFile.add_var("Elements_Id", ncInt, ncdims[3])) )
-         throw DataException(msgPrefix+"add_var(Elements_Id)");
-      int_ptr = &mesh->Elements->Id[0];
-      if (! (ids->put(int_ptr, num_Elements)) )
-         throw DataException(msgPrefix+"put(Elements_Id)");
-
-      // Elements_Tag
-      if (! ( ids = dataFile.add_var("Elements_Tag", ncInt, ncdims[3])) )
-         throw DataException(msgPrefix+"add_var(Elements_Tag)");
-      int_ptr = &mesh->Elements->Tag[0];
-      if (! (ids->put(int_ptr, num_Elements)) )
-         throw DataException(msgPrefix+"put(Elements_Tag)");
-
-      // Elements_Owner
-      if (! ( ids = dataFile.add_var("Elements_Owner", ncInt, ncdims[3])) )
-         throw DataException(msgPrefix+"add_var(Elements_Owner)");
-      int_ptr = &mesh->Elements->Owner[0];
-      if (! (ids->put(int_ptr, num_Elements)) )
-         throw DataException(msgPrefix+"put(Elements_Owner)");
-
-      // Elements_Color
-      if (! ( ids = dataFile.add_var("Elements_Color", ncInt, ncdims[3])) )
-         throw DataException(msgPrefix+"add_var(Elements_Color)");
-      int_ptr = &mesh->Elements->Color[0];
-      if (! (ids->put(int_ptr, num_Elements)) )
-         throw DataException(msgPrefix+"put(Elements_Color)");
-
-      // Elements_Nodes
-      if (! ( ids = dataFile.add_var("Elements_Nodes", ncInt, ncdims[3], ncdims[7]) ) )
-         throw DataException(msgPrefix+"add_var(Elements_Nodes)");
-      if (! (ids->put(&(mesh->Elements->Nodes[0]), num_Elements, num_Elements_numNodes)) )
-         throw DataException(msgPrefix+"put(Elements_Nodes)");
-
-   }
-
-   // // // // // Face_Elements // // // // //
-
-   if (num_FaceElements>0) {
-
-      // FaceElements_Id
-      if (! ( ids = dataFile.add_var("FaceElements_Id", ncInt, ncdims[4])) )
-         throw DataException(msgPrefix+"add_var(FaceElements_Id)");
-      int_ptr = &mesh->FaceElements->Id[0];
-      if (! (ids->put(int_ptr, num_FaceElements)) )
-         throw DataException(msgPrefix+"put(FaceElements_Id)");
-
-      // FaceElements_Tag
-      if (! ( ids = dataFile.add_var("FaceElements_Tag", ncInt, ncdims[4])) )
-         throw DataException(msgPrefix+"add_var(FaceElements_Tag)");
-      int_ptr = &mesh->FaceElements->Tag[0];
-      if (! (ids->put(int_ptr, num_FaceElements)) )
-         throw DataException(msgPrefix+"put(FaceElements_Tag)");
-
-      // FaceElements_Owner
-      if (! ( ids = dataFile.add_var("FaceElements_Owner", ncInt, ncdims[4])) )
-         throw DataException(msgPrefix+"add_var(FaceElements_Owner)");
-      int_ptr = &mesh->FaceElements->Owner[0];
-      if (! (ids->put(int_ptr, num_FaceElements)) )
-         throw DataException(msgPrefix+"put(FaceElements_Owner)");
-
-      // FaceElements_Color
-      if (! ( ids = dataFile.add_var("FaceElements_Color", ncInt, ncdims[4])) )
-         throw DataException(msgPrefix+"add_var(FaceElements_Color)");
-      int_ptr = &mesh->FaceElements->Color[0];
-      if (! (ids->put(int_ptr, num_FaceElements)) )
-         throw DataException(msgPrefix+"put(FaceElements_Color)");
-
-      // FaceElements_Nodes
-      if (! ( ids = dataFile.add_var("FaceElements_Nodes", ncInt, ncdims[4], ncdims[8]) ) )
-         throw DataException(msgPrefix+"add_var(FaceElements_Nodes)");
-      if (! (ids->put(&(mesh->FaceElements->Nodes[0]), num_FaceElements, num_FaceElements_numNodes)) )
-         throw DataException(msgPrefix+"put(FaceElements_Nodes)");
-
-   }
-
-   // // // // // Points // // // // //
-
-   if (num_Points>0) {
-
-      fprintf(stderr, "\n\n\nWARNING: MeshAdapter::dump has not been tested with Point elements\n\n\n");
-
-      // Points_Id
-      if (! ( ids = dataFile.add_var("Points_Id", ncInt, ncdims[6])) )
-         throw DataException(msgPrefix+"add_var(Points_Id)");
-      int_ptr = &mesh->Points->Id[0];
-      if (! (ids->put(int_ptr, num_Points)) )
-         throw DataException(msgPrefix+"put(Points_Id)");
-
-      // Points_Tag
-      if (! ( ids = dataFile.add_var("Points_Tag", ncInt, ncdims[6])) )
-         throw DataException(msgPrefix+"add_var(Points_Tag)");
-      int_ptr = &mesh->Points->Tag[0];
-      if (! (ids->put(int_ptr, num_Points)) )
-         throw DataException(msgPrefix+"put(Points_Tag)");
-
-      // Points_Owner
-      if (! ( ids = dataFile.add_var("Points_Owner", ncInt, ncdims[6])) )
-         throw DataException(msgPrefix+"add_var(Points_Owner)");
-      int_ptr = &mesh->Points->Owner[0];
-      if (! (ids->put(int_ptr, num_Points)) )
-         throw DataException(msgPrefix+"put(Points_Owner)");
-
-      // Points_Color
-      if (! ( ids = dataFile.add_var("Points_Color", ncInt, ncdims[6])) )
-         throw DataException(msgPrefix+"add_var(Points_Color)");
-      int_ptr = &mesh->Points->Color[0];
-      if (! (ids->put(int_ptr, num_Points)) )
-         throw DataException(msgPrefix+"put(Points_Color)");
-
-      // Points_Nodes
-      // mesh->Nodes->Id[mesh->Points->Nodes[INDEX2(0,i,1)]]
-      if (! ( ids = dataFile.add_var("Points_Nodes", ncInt, ncdims[6]) ) )
-         throw DataException(msgPrefix+"add_var(Points_Nodes)");
-      if (! (ids->put(&(mesh->Points->Nodes[0]), num_Points)) )
-         throw DataException(msgPrefix+"put(Points_Nodes)");
-
-   }
-
-   // // // // // TagMap // // // // //
-
-   if (num_Tags>0) {
-
-      // Temp storage to gather node IDs
-      int *Tags_keys = new int[num_Tags];
-      char name_temp[4096];
-
-      /* Copy tag data into temp arrays */
-      tag_map = mesh->TagMap;
-      if (tag_map) {
-         int i = 0;
-         while (tag_map) {
-            Tags_keys[i++] = tag_map->tag_key;
-            tag_map=tag_map->next;
-         }
-      }
-
-      // Tags_keys
-      if (! ( ids = dataFile.add_var("Tags_keys", ncInt, ncdims[10])) )
-         throw DataException(msgPrefix+"add_var(Tags_keys)");
-      int_ptr = &Tags_keys[0];
-      if (! (ids->put(int_ptr, num_Tags)) )
-         throw DataException(msgPrefix+"put(Tags_keys)");
-
-      // Tags_names_*
-      // This is an array of strings, it should be stored as an array but
-      // instead I have hacked in one attribute per string because the NetCDF
-      // manual doesn't tell how to do an array of strings
-      tag_map = mesh->TagMap;
-      if (tag_map) {
-         int i = 0;
-         while (tag_map) {
-            sprintf(name_temp, "Tags_name_%d", i);
-            if (!dataFile.add_att(name_temp, tag_map->name) )
-               throw DataException(msgPrefix+"add_att(Tags_names_XX)");
-            tag_map=tag_map->next;
-            i++;
-         }
-      }
-
-      delete[] Tags_keys;
-   }
-
-/* Send token to next MPI process so he can take his turn */
-#ifdef ESYS_MPI
-   if (mpi_rank<mpi_size-1) MPI_Send(&num_Tags, 0, MPI_INT, mpi_rank+1, 81800, mesh->MPIInfo->comm);
-#endif
-
-   // NetCDF file is closed by destructor of NcFile object
-
-#else
-   Dudley_setError(IO_ERROR, "MeshAdapter::dump: not configured with NetCDF. Please contact your installation manager.");
-#endif	/* USE_NETCDF */
-   checkDudleyError();
-}
-
-string MeshAdapter::getDescription() const
-{
-   return "DudleyMesh";
-}
-
-string MeshAdapter::functionSpaceTypeAsString(int functionSpaceType) const
-{
-   FunctionSpaceNamesMapType::iterator loc;
-   loc=m_functionSpaceTypeNames.find(functionSpaceType);
-   if (loc==m_functionSpaceTypeNames.end()) {
-      return "Invalid function space type code.";
-   } else {
-      return loc->second;
-   }
-}
-
-bool MeshAdapter::isValidFunctionSpaceType(int functionSpaceType) const
-{
-   FunctionSpaceNamesMapType::iterator loc;
-   loc=m_functionSpaceTypeNames.find(functionSpaceType);
-   return (loc!=m_functionSpaceTypeNames.end());
-}
-
-void MeshAdapter::setFunctionSpaceTypeNames()
-{
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(DegreesOfFreedom,"Dudley_DegreesOfFreedom [Solution(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(ReducedDegreesOfFreedom,"Dudley_ReducedDegreesOfFreedom [ReducedSolution(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(Nodes,"Dudley_Nodes [ContinuousFunction(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(ReducedNodes,"Dudley_Reduced_Nodes [ReducedContinuousFunction(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(Elements,"Dudley_Elements [Function(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(ReducedElements,"Dudley_Reduced_Elements [ReducedFunction(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(FaceElements,"Dudley_Face_Elements [FunctionOnBoundary(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(ReducedFaceElements,"Dudley_Reduced_Face_Elements [ReducedFunctionOnBoundary(domain)]"));
-   m_functionSpaceTypeNames.insert
-   (FunctionSpaceNamesMapType::value_type(Points,"Dudley_Points [DiracDeltaFunctions(domain)]"));
-}
-
-int MeshAdapter::getContinuousFunctionCode() const
-{
-   return Nodes;
-}
-int MeshAdapter::getReducedContinuousFunctionCode() const
-{
-   return ReducedNodes;
-}
-
-int MeshAdapter::getFunctionCode() const
-{
-   return Elements;
-}
-int MeshAdapter::getReducedFunctionCode() const
-{
-   return ReducedElements;
-}
-
-int MeshAdapter::getFunctionOnBoundaryCode() const
-{
-   return FaceElements;
-}
-int MeshAdapter::getReducedFunctionOnBoundaryCode() const
-{
-   return ReducedFaceElements;
-}
-
-int MeshAdapter::getFunctionOnContactZeroCode() const
-{
-   throw DudleyAdapterException("Dudley does not support contact elements.");
-}
-
-int MeshAdapter::getReducedFunctionOnContactZeroCode() const
-{
-   throw DudleyAdapterException("Dudley does not support contact elements.");
-}
-
-int MeshAdapter::getFunctionOnContactOneCode() const
-{
-   throw DudleyAdapterException("Dudley does not support contact elements.");
-}
-
-int MeshAdapter::getReducedFunctionOnContactOneCode() const
-{
-   throw DudleyAdapterException("Dudley does not support contact elements.");
-}
-
-int MeshAdapter::getSolutionCode() const
-{
-   return DegreesOfFreedom;
-}
-
-int MeshAdapter::getReducedSolutionCode() const
-{
-   return ReducedDegreesOfFreedom;
-}
-
-int MeshAdapter::getDiracDeltaFunctionsCode() const
-{
-   return Points;
-}
-
-//
-// return the spatial dimension of the Mesh:
-//
-int MeshAdapter::getDim() const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   int numDim=Dudley_Mesh_getDim(mesh);
-   checkDudleyError();
-   return numDim;
-}
-
-//
-// Return the number of data points summed across all MPI processes
-//
-int MeshAdapter::getNumDataPointsGlobal() const
-{
-   return Dudley_NodeFile_getGlobalNumNodes(m_dudleyMesh.get()->Nodes);
-}
-
-//
-// return the number of data points per sample and the number of samples
-// needed to represent data on a parts of the mesh.
-//
-pair<int,int> MeshAdapter::getDataShape(int functionSpaceCode) const
-{
-   int numDataPointsPerSample=0;
-   int numSamples=0;
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch (functionSpaceCode) {
-   case(Nodes):
-   numDataPointsPerSample=1;
-   numSamples=Dudley_NodeFile_getNumNodes(mesh->Nodes);
-   break;
-   case(ReducedNodes):
-   numDataPointsPerSample=1;
-   numSamples=Dudley_NodeFile_getNumReducedNodes(mesh->Nodes);
-   break;
-   case(Elements):
-   if (mesh->Elements!=NULL) {
-      numSamples=mesh->Elements->numElements;
-      numDataPointsPerSample=mesh->Elements->numLocalDim+1/*referenceElementSet->referenceElement->BasisFunctions->numQuadNodes*/;
-   }
-   break;
-   case(ReducedElements):
-   if (mesh->Elements!=NULL) {
-      numSamples=mesh->Elements->numElements;
-      numDataPointsPerSample=(mesh->Elements->numLocalDim==0)?0:1;
-   }
-   break;
-   case(FaceElements):
-   if (mesh->FaceElements!=NULL) {
-      numDataPointsPerSample=mesh->FaceElements->numLocalDim+1/*referenceElementSet->referenceElement->BasisFunctions->numQuadNodes*/;
-      numSamples=mesh->FaceElements->numElements;
-   }
-   break;
-   case(ReducedFaceElements):
-   if (mesh->FaceElements!=NULL) {
-      numDataPointsPerSample=(mesh->FaceElements->numLocalDim==0)?0:1/*referenceElementSet->referenceElementReducedQuadrature->BasisFunctions->numQuadNodes*/;
-      numSamples=mesh->FaceElements->numElements;
-   }
-   break;
-   case(Points):
-   if (mesh->Points!=NULL) {
-      numDataPointsPerSample=1;
-      numSamples=mesh->Points->numElements;
-   }
-   break;
-   case(DegreesOfFreedom):
-   if (mesh->Nodes!=NULL) {
-      numDataPointsPerSample=1;
-      numSamples=Dudley_NodeFile_getNumDegreesOfFreedom(mesh->Nodes);
-   }
-   break;
-   case(ReducedDegreesOfFreedom):
-   if (mesh->Nodes!=NULL) {
-      numDataPointsPerSample=1;
-      numSamples=Dudley_NodeFile_getNumReducedDegreesOfFreedom(mesh->Nodes);
-   }
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Invalid function space type: " << functionSpaceCode << " for domain: " << getDescription();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   return pair<int,int>(numDataPointsPerSample,numSamples);
-}
-
-//
-// adds linear PDE of second order into a given stiffness matrix and right hand side:
-//
-void MeshAdapter::addPDEToSystem(
-                                 AbstractSystemMatrix& mat, escript::Data& rhs,
-                                 const escript::Data& A, const escript::Data& B, const escript::Data& C,const  escript::Data& D,const  escript::Data& X,const  escript::Data& Y,
-                                 const escript::Data& d, const escript::Data& y,
-				 const escript::Data& d_contact, const escript::Data& y_contact,
-                                 const escript::Data& d_dirac,const escript::Data& y_dirac) const
-{
-    if (!d_contact.isEmpty())
-    {
-	throw DudleyAdapterException("Dudley does not support d_contact");
-    }
-    if (!y_contact.isEmpty())
-    {
-	throw DudleyAdapterException("Dudley does not support y_contact");
-    }
-   SystemMatrixAdapter* smat=dynamic_cast<SystemMatrixAdapter*>(&mat);
-   if (smat==0)
-   {
-	throw DudleyAdapterException("Dudley only accepts Paso system matrices");
-   }
-
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Elements,smat->getPaso_SystemMatrix(), &rhs, &A, &B, &C, &D, &X, &Y );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->FaceElements, smat->getPaso_SystemMatrix(), &rhs, 0, 0, 0, &d, 0, &y );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Points, smat->getPaso_SystemMatrix(), &rhs, 0, 0, 0, &d_dirac, 0, &y_dirac );
-   checkDudleyError();
-}
-
-void  MeshAdapter::addPDEToLumpedSystem(
-                                        escript::Data& mat,
-                                        const escript::Data& D,
-                                        const escript::Data& d,
-                                        const escript::Data& d_dirac,
-					const bool useHRZ) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-
-   Dudley_Assemble_LumpedSystem(mesh->Nodes,mesh->Elements,&mat, &D, useHRZ);
-   checkDudleyError();
-   
-   Dudley_Assemble_LumpedSystem(mesh->Nodes,mesh->FaceElements,&mat, &d, useHRZ);
-   checkDudleyError();
-
-   Dudley_Assemble_LumpedSystem(mesh->Nodes,mesh->FaceElements,&mat, &d_dirac, useHRZ);
-   checkDudleyError();
-
-}
-
-
-//
-// adds linear PDE of second order into the right hand side only
-//
-void MeshAdapter::addPDEToRHS( escript::Data& rhs, const  escript::Data& X,const  escript::Data& Y, const escript::Data& y, const escript::Data& y_contact, const escript::Data& y_dirac) const
-{
-   if (!y_contact.isEmpty())
-   {
-	throw DudleyAdapterException("Dudley does not support y_contact");
-   }
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Elements, paso::SystemMatrix_ptr(), &rhs, 0, 0, 0, 0, &X, &Y);
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->FaceElements, paso::SystemMatrix_ptr(), &rhs, 0, 0, 0, 0, 0, &y );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Points, paso::SystemMatrix_ptr(), &rhs, 0, 0, 0, 0, 0, &y_dirac );
-   checkDudleyError();
-}
-//
-// adds PDE of second order into a transport problem
-//
-void MeshAdapter::addPDEToTransportProblem(
-                                           AbstractTransportProblem& tp, escript::Data& source, const escript::Data& M,
-                                           const escript::Data& A, const escript::Data& B, const escript::Data& C,
-                                           const  escript::Data& D,const  escript::Data& X,const  escript::Data& Y,
-                                           const escript::Data& d, const escript::Data& y, 
-					   const escript::Data& d_contact,const escript::Data& y_contact,
-					   const escript::Data& d_dirac, const escript::Data& y_dirac) const
-{
-    if (!d_contact.isEmpty())
-    {
-	throw DudleyAdapterException("Dudley does not support d_contact");
-    }
-    if (!y_contact.isEmpty())
-    {
-	throw DudleyAdapterException("Dudley does not support y_contact");
-    }   
-   TransportProblemAdapter* tpa=dynamic_cast<TransportProblemAdapter*>(&tp);
-   if (tpa==0)
-   {
-	throw DudleyAdapterException("Dudley only accepts Paso transport problems");
-   }
-   DataTypes::ShapeType shape;
-   source.expand();
-
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   paso::TransportProblem_ptr _tp(tpa->getPaso_TransportProblem());
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Elements,_tp->mass_matrix, &source, 0, 0, 0, &M, 0, 0 );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Elements,_tp->transport_matrix, &source, &A, &B, &C, &D, &X, &Y );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->FaceElements, _tp->transport_matrix, &source, 0, 0, 0, &d, 0, &y );
-   checkDudleyError();
-
-   Dudley_Assemble_PDE(mesh->Nodes,mesh->Points, _tp->transport_matrix, &source, 0, 0, 0, &d_dirac, 0, &y_dirac );
-   checkDudleyError();
-}
-
-//
-// interpolates data between different function spaces:
-//
-void MeshAdapter::interpolateOnDomain(escript::Data& target,const escript::Data& in) const
-{
-   const MeshAdapter& inDomain=dynamic_cast<const MeshAdapter&>(*(in.getFunctionSpace().getDomain()));
-   const MeshAdapter& targetDomain=dynamic_cast<const MeshAdapter&>(*(target.getFunctionSpace().getDomain()));
-   if (inDomain!=*this)  
-      throw DudleyAdapterException("Error - Illegal domain of interpolant.");
-   if (targetDomain!=*this) 
-      throw DudleyAdapterException("Error - Illegal domain of interpolation target.");
-
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch(in.getFunctionSpace().getTypeCode()) {
-   case(Nodes):
-      switch(target.getFunctionSpace().getTypeCode()) {
-      case(Nodes):
-      case(ReducedNodes):
-      case(DegreesOfFreedom):
-      case(ReducedDegreesOfFreedom):
-      Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      break;
-      case(Elements):
-      case(ReducedElements):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&in,&target);
-      break;
-      case(FaceElements):
-      case(ReducedFaceElements):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&in,&target);
-      break;
-      case(Points):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->Points,&in,&target);
-      break;
-      default:
-         stringstream temp;
-         temp << "Error - Interpolation on Domain: Dudley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-         throw DudleyAdapterException(temp.str());
-         break;
-      }
-      break;
-   case(ReducedNodes):
-      switch(target.getFunctionSpace().getTypeCode()) {
-      case(Nodes):
-      case(ReducedNodes):
-      case(DegreesOfFreedom):
-      case(ReducedDegreesOfFreedom):
-      Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      break;
-      case(Elements):
-      case(ReducedElements):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&in,&target);
-      break;
-      case(FaceElements):
-      case(ReducedFaceElements):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&in,&target);
-      break;
-      case(Points):
-      Dudley_Assemble_interpolate(mesh->Nodes,mesh->Points,&in,&target);
-      break;
-      default:
-         stringstream temp;
-         temp << "Error - Interpolation on Domain: Dudley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-         throw DudleyAdapterException(temp.str());
-         break;
-      }
-      break;
-   case(Elements):
-      if (target.getFunctionSpace().getTypeCode()==Elements) {
-         Dudley_Assemble_CopyElementData(mesh->Elements,&target,&in);
-      } else if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
-         Dudley_Assemble_AverageElementData(mesh->Elements,&target,&in);
-      } else {
-         throw DudleyAdapterException("Error - No interpolation with data on elements possible.");
-      }
-      break;
-   case(ReducedElements):
-      if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
-         Dudley_Assemble_CopyElementData(mesh->Elements,&target,&in);
-      } else {
-         throw DudleyAdapterException("Error - No interpolation with data on elements with reduced integration order possible.");
-      }
-      break;
-   case(FaceElements):
-      if (target.getFunctionSpace().getTypeCode()==FaceElements) {
-         Dudley_Assemble_CopyElementData(mesh->FaceElements,&target,&in);
-      } else if (target.getFunctionSpace().getTypeCode()==ReducedFaceElements) {
-         Dudley_Assemble_AverageElementData(mesh->FaceElements,&target,&in);
-      } else {
-         throw DudleyAdapterException("Error - No interpolation with data on face elements possible.");
-      }
-      break;
-   case(ReducedFaceElements):
-      if (target.getFunctionSpace().getTypeCode()==ReducedFaceElements) {
-         Dudley_Assemble_CopyElementData(mesh->FaceElements,&target,&in);
-      } else {
-         throw DudleyAdapterException("Error - No interpolation with data on face elements with reduced integration order possible.");
-      }
-      break;
-   case(Points):
-      if (target.getFunctionSpace().getTypeCode()==Points) {
-         Dudley_Assemble_CopyElementData(mesh->Points,&target,&in);
-      } else {
-         throw DudleyAdapterException("Error - No interpolation with data on points possible.");
-      }
-      break;
-   case(DegreesOfFreedom):      
-      switch(target.getFunctionSpace().getTypeCode()) {
-      case(ReducedDegreesOfFreedom):
-      case(DegreesOfFreedom):
-      Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      break;
-   
-      case(Nodes):
-      case(ReducedNodes):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data(in);
-         temp.expand();
-         Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&temp);
-      } else {
-         Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      }
-      break;
-      case(Elements):
-      case(ReducedElements):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data( in,  continuousFunction(*this) );
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&temp,&target);
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&in,&target);
-      }
-      break;
-      case(FaceElements):
-      case(ReducedFaceElements):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data( in,  continuousFunction(*this) );
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&temp,&target);
-   
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&in,&target);
-      }
-      break;
-      case(Points):
-      if (getMPISize()>1) {
-         //escript::Data temp=escript::Data( in,  continuousFunction(*this) );
-         //escriptDataC _in2 = temp.getDataC();
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Points,&in,&target);
-      }
-      break;
-      default:
-         stringstream temp;
-         temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-         throw DudleyAdapterException(temp.str());
-         break;
-      }
-      break;
-   case(ReducedDegreesOfFreedom):
-      switch(target.getFunctionSpace().getTypeCode()) {
-      case(Nodes):
-      throw DudleyAdapterException("Error - Dudley does not support interpolation from reduced degrees of freedom to mesh nodes.");
-      break;
-      case(ReducedNodes):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data(in);
-         temp.expand();
-         Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&temp);
-      } else {
-         Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      }
-      break;
-      case(DegreesOfFreedom):
-      throw DudleyAdapterException("Error - Dudley does not support interpolation from reduced degrees of freedom to degrees of freedom");
-      break;
-      case(ReducedDegreesOfFreedom):
-      Dudley_Assemble_CopyNodalData(mesh->Nodes,&target,&in);
-      break;
-      case(Elements):
-      case(ReducedElements):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data( in,  reducedContinuousFunction(*this) );
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&temp,&target);
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Elements,&in,&target);
-      }
-      break;
-      case(FaceElements):
-      case(ReducedFaceElements):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data( in,  reducedContinuousFunction(*this) );
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&temp,&target);
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->FaceElements,&in,&target);
-      }
-      break;
-      case(Points):
-      if (getMPISize()>1) {
-         escript::Data temp=escript::Data( in,  reducedContinuousFunction(*this) );
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Points,&temp,&target);
-      } else {
-         Dudley_Assemble_interpolate(mesh->Nodes,mesh->Points,&in,&target);
-      }
-      break;
-      default:
-         stringstream temp;
-         temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-         throw DudleyAdapterException(temp.str());
-         break;
-      }
-      break;
-   default:
-      stringstream temp;
-      temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type %d" << in.getFunctionSpace().getTypeCode();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-}
-
-//
-// copies the locations of sample points into x:
-//
-void MeshAdapter::setToX(escript::Data& arg) const
-{
-   const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-   if (argDomain!=*this) 
-      throw DudleyAdapterException("Error - Illegal domain of data point locations");
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   // in case of values node coordinates we can do the job directly:
-   if (arg.getFunctionSpace().getTypeCode()==Nodes) {
-      Dudley_Assemble_NodeCoordinates(mesh->Nodes,&arg);
-   } else {
-      escript::Data tmp_data=Vector(0.0,continuousFunction(*this),true);
-      Dudley_Assemble_NodeCoordinates(mesh->Nodes,&tmp_data);
-      // this is then interpolated onto arg:
-      interpolateOnDomain(arg,tmp_data);
-   }
-   checkDudleyError();
-}
-
-//
-// return the normal vectors at the location of data points as a Data object:
-//
-void MeshAdapter::setToNormal(escript::Data& normal) const
-{
-/*   const MeshAdapter& normalDomain=dynamic_cast<const MeshAdapter&>(normal.getFunctionSpace().getDomain());*/
-   const MeshAdapter& normalDomain=dynamic_cast<const MeshAdapter&>(*(normal.getFunctionSpace().getDomain()));
-   if (normalDomain!=*this) 
-      throw DudleyAdapterException("Error - Illegal domain of normal locations");
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch(normal.getFunctionSpace().getTypeCode()) {
-   case(Nodes):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for nodes");
-   break;
-   case(ReducedNodes):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for reduced nodes");
-   break;
-   case(Elements):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for elements");
-   break;
-   case(ReducedElements):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for elements with reduced integration order");
-   break;
-   case (FaceElements):
-   Dudley_Assemble_setNormal(mesh->Nodes,mesh->FaceElements,&normal);
-   break;
-   case (ReducedFaceElements):
-   Dudley_Assemble_setNormal(mesh->Nodes,mesh->FaceElements,&normal);
-   break;
-   case(Points):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for point elements");
-   break;
-   case(DegreesOfFreedom):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for degrees of freedom.");
-   break;
-   case(ReducedDegreesOfFreedom):
-   throw DudleyAdapterException("Error - Dudley does not support surface normal vectors for reduced degrees of freedom.");
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Normal Vectors: Dudley does not know anything about function space type " << normal.getFunctionSpace().getTypeCode();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-}
-
-//
-// interpolates data to other domain:
-//
-void MeshAdapter::interpolateAcross(escript::Data& target,const escript::Data& source) const
-{
-   const_Domain_ptr targetDomain_p=target.getFunctionSpace().getDomain();
-   const MeshAdapter* targetDomain=dynamic_cast<const MeshAdapter*>(targetDomain_p.get());
-   if (targetDomain!=this) 
-      throw DudleyAdapterException("Error - Illegal domain of interpolation target");
-
-   throw DudleyAdapterException("Error - Dudley does not allow interpolation across domains yet.");
-}
-
-//
-// calculates the integral of a function defined of arg:
-//
-void MeshAdapter::setToIntegrals(vector<double>& integrals,const escript::Data& arg) const
-{
-   const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-   if (argDomain!=*this) 
-      throw DudleyAdapterException("Error - Illegal domain of integration kernel");
-
-   double blocktimer_start = blocktimer_time();
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   escript::Data temp;
-   switch(arg.getFunctionSpace().getTypeCode()) {
-   case(Nodes):
-   temp=escript::Data( arg, escript::function(*this) );
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&temp,&integrals[0]);
-   break;
-   case(ReducedNodes):
-   temp=escript::Data( arg, escript::function(*this) );
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&temp,&integrals[0]);
-   break;
-   case(Elements):
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&arg,&integrals[0]);
-   break;
-   case(ReducedElements):
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&arg,&integrals[0]);
-   break;
-   case(FaceElements):
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->FaceElements,&arg,&integrals[0]);
-   break;
-   case(ReducedFaceElements):
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->FaceElements,&arg,&integrals[0]);
-   break;
-   case(Points):
-   throw DudleyAdapterException("Error - Integral of data on points is not supported.");
-   break;
-   case(DegreesOfFreedom):
-   temp=escript::Data( arg, escript::function(*this) );
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&temp,&integrals[0]);
-   break;
-   case(ReducedDegreesOfFreedom):
-   temp=escript::Data( arg, escript::function(*this) );
-   Dudley_Assemble_integrate(mesh->Nodes,mesh->Elements,&temp,&integrals[0]);
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Integrals: Dudley does not know anything about function space type " << arg.getFunctionSpace().getTypeCode();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-   blocktimer_increment("integrate()", blocktimer_start);
-}
-
-//
-// calculates the gradient of arg:
-//
-void MeshAdapter::setToGradient(escript::Data& grad,const escript::Data& arg) const
-{
-   const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-   if (argDomain!=*this)
-      throw DudleyAdapterException("Error - Illegal domain of gradient argument");
-   const MeshAdapter& gradDomain=dynamic_cast<const MeshAdapter&>(*(grad.getFunctionSpace().getDomain()));
-   if (gradDomain!=*this)
-      throw DudleyAdapterException("Error - Illegal domain of gradient");
-
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   const escript::Data* nodeData=0;
-   escript::Data temp;
-   if (getMPISize()>1) {
-      if( arg.getFunctionSpace().getTypeCode() == DegreesOfFreedom ) {
-         temp=escript::Data( arg,  continuousFunction(*this) );
-         nodeData = &temp;
-      } else if( arg.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom ) {
-         temp=escript::Data( arg,  reducedContinuousFunction(*this) );
-         nodeData = &temp;
-      } else {
-         nodeData = &arg;
-      }
-   } else {
-      nodeData = &arg;
-   }
-   switch(grad.getFunctionSpace().getTypeCode()) {
-   case(Nodes):
-   throw DudleyAdapterException("Error - Gradient at nodes is not supported.");
-   break;
-   case(ReducedNodes):
-   throw DudleyAdapterException("Error - Gradient at reduced nodes is not supported.");
-   break;
-   case(Elements):
-   Dudley_Assemble_gradient(mesh->Nodes,mesh->Elements,&grad, nodeData);
-   break;
-   case(ReducedElements):
-   Dudley_Assemble_gradient(mesh->Nodes,mesh->Elements,&grad, nodeData);
-   break;
-   case(FaceElements):
-   Dudley_Assemble_gradient(mesh->Nodes,mesh->FaceElements,&grad, nodeData);
-   break;
-   case(ReducedFaceElements):
-   Dudley_Assemble_gradient(mesh->Nodes,mesh->FaceElements,&grad, nodeData);
-   break;
-   case(Points):
-   throw DudleyAdapterException("Error - Gradient at points is not supported.");
-   break;
-   case(DegreesOfFreedom):
-   throw DudleyAdapterException("Error - Gradient at degrees of freedom is not supported.");
-   break;
-   case(ReducedDegreesOfFreedom):
-   throw DudleyAdapterException("Error - Gradient at reduced degrees of freedom is not supported.");
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Gradient: Dudley does not know anything about function space type " << arg.getFunctionSpace().getTypeCode();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-}
-
-//
-// returns the size of elements:
-//
-void MeshAdapter::setToSize(escript::Data& size) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch(size.getFunctionSpace().getTypeCode()) {
-   case(Nodes):
-   throw DudleyAdapterException("Error - Size of nodes is not supported.");
-   break;
-   case(ReducedNodes):
-   throw DudleyAdapterException("Error - Size of reduced nodes is not supported.");
-   break;
-   case(Elements):
-   Dudley_Assemble_getSize(mesh->Nodes,mesh->Elements,&size);
-   break;
-   case(ReducedElements):
-   Dudley_Assemble_getSize(mesh->Nodes,mesh->Elements,&size);
-   break;
-   case(FaceElements):
-   Dudley_Assemble_getSize(mesh->Nodes,mesh->FaceElements,&size);
-   break;
-   case(ReducedFaceElements):
-   Dudley_Assemble_getSize(mesh->Nodes,mesh->FaceElements,&size);
-   break;
-   case(Points):
-   throw DudleyAdapterException("Error - Size of point elements is not supported.");
-   break;
-   case(DegreesOfFreedom):
-   throw DudleyAdapterException("Error - Size of degrees of freedom is not supported.");
-   break;
-   case(ReducedDegreesOfFreedom):
-   throw DudleyAdapterException("Error - Size of reduced degrees of freedom is not supported.");
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Element size: Dudley does not know anything about function space type " << size.getFunctionSpace().getTypeCode();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-}
-
-//
-// sets the location of nodes
-//
-void MeshAdapter::setNewX(const escript::Data& new_x)
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   const MeshAdapter& newDomain=dynamic_cast<const MeshAdapter&>(*(new_x.getFunctionSpace().getDomain()));
-   if (newDomain!=*this) 
-      throw DudleyAdapterException("Error - Illegal domain of new point locations");
-   if ( new_x.getFunctionSpace() == continuousFunction(*this) ) {
-       Dudley_Mesh_setCoordinates(mesh,&new_x);
-   } else {
-       throw DudleyAdapterException("As of version escript3.3 - SetNewX only accepts ContinuousFunction arguments please interpolate.");      
-
-   }
-   checkDudleyError();
-}
-
-bool MeshAdapter::ownSample(int fs_code, index_t id) const
-{
-    if (getMPISize()>1) {
-#ifdef ESYS_MPI
-        index_t myFirstNode=0, myLastNode=0, k=0;
-        index_t* globalNodeIndex=0;
-        Dudley_Mesh* mesh_p=m_dudleyMesh.get();
-        if (fs_code == DUDLEY_REDUCED_NODES) 
-        {
-            myFirstNode = Dudley_NodeFile_getFirstReducedNode(mesh_p->Nodes);
-            myLastNode = Dudley_NodeFile_getLastReducedNode(mesh_p->Nodes);
-            globalNodeIndex = Dudley_NodeFile_borrowGlobalReducedNodesIndex(mesh_p->Nodes);
-        }
-        else if (fs_code == DUDLEY_NODES)
-        {
-            myFirstNode = Dudley_NodeFile_getFirstNode(mesh_p->Nodes);
-            myLastNode = Dudley_NodeFile_getLastNode(mesh_p->Nodes);
-            globalNodeIndex = Dudley_NodeFile_borrowGlobalNodesIndex(mesh_p->Nodes);
-        }
-        else
-        {
-            throw DudleyAdapterException("unsupported function space type for ownSample()");
-        }
-        k=globalNodeIndex[id];
-        return static_cast<bool>( (myFirstNode <= k) && (k < myLastNode) );
-#endif
-    }
-    return true;
-}
-
-
-
-//
-// creates a SystemMatrixAdapter stiffness matrix an initializes it with zeros
-//
-ASM_ptr MeshAdapter::newSystemMatrix(
-                                                 const int row_blocksize,
-                                                 const escript::FunctionSpace& row_functionspace,
-                                                 const int column_blocksize,
-                                                 const escript::FunctionSpace& column_functionspace,
-                                                 const int type) const
-{
-   int reduceRowOrder=0;
-   int reduceColOrder=0;
-   // is the domain right?
-   const MeshAdapter& row_domain=dynamic_cast<const MeshAdapter&>(*(row_functionspace.getDomain()));
-   if (row_domain!=*this) 
-      throw DudleyAdapterException("Error - domain of row function space does not match the domain of matrix generator.");
-   const MeshAdapter& col_domain=dynamic_cast<const MeshAdapter&>(*(column_functionspace.getDomain()));
-   if (col_domain!=*this) 
-      throw DudleyAdapterException("Error - domain of column function space does not match the domain of matrix generator.");
-   // is the function space type right 
-   if (row_functionspace.getTypeCode()==DegreesOfFreedom) {
-      reduceRowOrder=0;
-   } else if (row_functionspace.getTypeCode()==ReducedDegreesOfFreedom) {
-      reduceRowOrder=1;
-   } else {
-      throw DudleyAdapterException("Error - illegal function space type for system matrix rows.");
-   }
-   if (column_functionspace.getTypeCode()==DegreesOfFreedom) {
-      reduceColOrder=0;
-   } else if (column_functionspace.getTypeCode()==ReducedDegreesOfFreedom) {
-      reduceColOrder=1;
-   } else {
-      throw DudleyAdapterException("Error - illegal function space type for system matrix columns.");
-   }
-   // generate matrix:
- 
-   paso::SystemMatrixPattern_ptr fsystemMatrixPattern(Dudley_getPattern(getDudley_Mesh(),reduceRowOrder,reduceColOrder));
-   checkDudleyError();
-   paso::SystemMatrix_ptr fsystemMatrix;
-   int trilinos = 0;
-   if (trilinos) {
-#ifdef TRILINOS
-      /* Allocation Epetra_VrbMatrix here */
-#endif
-   }
-   else {
-      fsystemMatrix.reset(new paso::SystemMatrix(type, fsystemMatrixPattern, row_blocksize, column_blocksize,false));
-   }
-   checkPasoError();
-   SystemMatrixAdapter* sma=new SystemMatrixAdapter(fsystemMatrix,row_blocksize,row_functionspace, column_blocksize,column_functionspace);
-   return ASM_ptr(sma);
-}
-
-//
-// creates a TransportProblemAdapter
-//
-ATP_ptr MeshAdapter::newTransportProblem(const int blocksize,
-                                         const escript::FunctionSpace& fs,
-                                         const int type) const
-{
-   int reduceOrder=0;
-   // is the domain right?
-   const MeshAdapter& domain=dynamic_cast<const MeshAdapter&>(*(fs.getDomain()));
-   if (domain!=*this) 
-      throw DudleyAdapterException("Error - domain of function space does not match the domain of transport problem generator.");
-   // is the function space type right 
-   if (fs.getTypeCode()==DegreesOfFreedom) {
-      reduceOrder=0;
-   } else if (fs.getTypeCode()==ReducedDegreesOfFreedom) {
-      reduceOrder=1;
-   } else {
-      throw DudleyAdapterException("Error - illegal function space type for system matrix rows.");
-   }
-   // generate matrix:
- 
-   paso::SystemMatrixPattern_ptr fsystemMatrixPattern(Dudley_getPattern(getDudley_Mesh(),reduceOrder,reduceOrder));
-   checkDudleyError();
-   paso::TransportProblem_ptr transportProblem(new paso::TransportProblem(
-                                            fsystemMatrixPattern, blocksize));
-   checkPasoError();
-   AbstractTransportProblem* atp=new TransportProblemAdapter(transportProblem, blocksize, fs);
-   return ATP_ptr(atp);
-}
-
-//
-// vtkObject MeshAdapter::createVtkObject() const
-// TODO:
-//
-
-//
-// returns true if data at the atom_type is considered as being cell centered:
-bool MeshAdapter::isCellOriented(int functionSpaceCode) const
-{
-   switch(functionSpaceCode) {
-   case(Nodes):
-   case(DegreesOfFreedom):
-   case(ReducedDegreesOfFreedom):
-   return false;
-   break;
-   case(Elements):
-   case(FaceElements):
-   case(Points):
-   case(ReducedElements):
-   case(ReducedFaceElements):
-   return true;
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Cell: Dudley does not know anything about function space type " << functionSpaceCode;
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-   return false;
-}
-
-bool
-MeshAdapter::commonFunctionSpace(const vector<int>& fs, int& resultcode) const
-{
-   /* The idea is to use equivalence classes. [Types which can be interpolated back and forth]
-	class 1: DOF <-> Nodes
-	class 2: ReducedDOF <-> ReducedNodes
-	class 3: Points
-	class 4: Elements
-	class 5: ReducedElements
-	class 6: FaceElements
-	class 7: ReducedFaceElements
-	class 8: ContactElementZero <-> ContactElementOne
-	class 9: ReducedContactElementZero <-> ReducedContactElementOne
-
-   There is also a set of lines. Interpolation is possible down a line but not between lines.
-   class 1 and 2 belong to all lines so aren't considered.
-	line 0: class 3
-	line 1: class 4,5
-	line 2: class 6,7
-	line 3: class 8,9
-
-   For classes with multiple members (eg class 2) we have vars to record if there is at least one instance.
-   eg hasnodes is true if we have at least one instance of Nodes.
-   */
-    if (fs.empty())
-    {
-        return false;
-    }
-    vector<int> hasclass(10);
-    vector<int> hasline(4);	
-    bool hasnodes=false;
-    bool hasrednodes=false;
-    for (int i=0;i<fs.size();++i)
-    {
-	switch(fs[i])
-	{
-	case(Nodes):   hasnodes=true;	// no break is deliberate
-	case(DegreesOfFreedom):
-		hasclass[1]=1;
-		break;
-	case(ReducedNodes):    hasrednodes=true;	// no break is deliberate
-	case(ReducedDegreesOfFreedom):
-		hasclass[2]=1;
-		break;
-	case(Points):
-		hasline[0]=1;
-		hasclass[3]=1;
-		break;
-	case(Elements):
-		hasclass[4]=1;
-		hasline[1]=1;
-		break;
-	case(ReducedElements):
-		hasclass[5]=1;
-		hasline[1]=1;
-		break;
-	case(FaceElements):
-		hasclass[6]=1;
-		hasline[2]=1;
-		break;
-	case(ReducedFaceElements):
-		hasclass[7]=1;
-		hasline[2]=1;
-		break;
-	default:
-		return false;
-	}
-    }
-    int totlines=hasline[0]+hasline[1]+hasline[2]+hasline[3];
-    // fail if we have more than one leaf group
-
-    if (totlines>1)
-    {
-	return false;	// there are at least two branches we can't interpolate between
-    }
-    else if (totlines==1)
-    {
-	if (hasline[0]==1)		// we have points
-	{
-	    resultcode=Points;
-	}
-	else if (hasline[1]==1)
-	{
-	    if (hasclass[5]==1)
-	    {
-		resultcode=ReducedElements;
-	    }
-	    else
-	    {
-		resultcode=Elements;
-	    }
-	}
-	else if (hasline[2]==1)
-	{
-	    if (hasclass[7]==1)
-	    {
-		resultcode=ReducedFaceElements;
-	    }
-	    else
-	    {
-		resultcode=FaceElements;
-	    }
-	}
-	else	// so we must be in line3
-	{
-
-	    throw DudleyAdapterException("Programmer Error - choosing between contact elements - we should never get here.");
-
-	}
-    }
-    else	// totlines==0
-    {
-	if (hasclass[2]==1)
-	{
-		// something from class 2
-		resultcode=(hasrednodes?ReducedNodes:ReducedDegreesOfFreedom);
-	}
-	else
-	{	// something from class 1
-		resultcode=(hasnodes?Nodes:DegreesOfFreedom);
-	}
-    }
-    return true;
-}
-
-signed char MeshAdapter::preferredInterpolationOnDomain(int functionSpaceType_source,int functionSpaceType_target) const
-{
-    if (probeInterpolationOnDomain(functionSpaceType_source, functionSpaceType_target))
-    {  
-        return 1;
-    }
-    else if (probeInterpolationOnDomain(functionSpaceType_target, functionSpaceType_source))
-    {  
-        return -1;
-    }
-    return 0;
-}
-
-
-
-bool MeshAdapter::probeInterpolationOnDomain(int functionSpaceType_source,int functionSpaceType_target) const
-{
-   switch(functionSpaceType_source) {
-   case(Nodes):
-   	switch(functionSpaceType_target) {
-	case(Nodes):
-	case(ReducedNodes):
-	case(ReducedDegreesOfFreedom):
-	case(DegreesOfFreedom):
-	case(Elements):
-	case(ReducedElements):
-	case(FaceElements):
-	case(ReducedFaceElements):
-	case(Points):
-	return true;
-	default:
-	      stringstream temp;
-	      temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << functionSpaceType_target;
-	      throw DudleyAdapterException(temp.str());
-   }
-   break;
-   case(ReducedNodes):
-	switch(functionSpaceType_target) {
-	case(ReducedNodes):
-	case(ReducedDegreesOfFreedom):
-	case(Elements):
-	case(ReducedElements):
-	case(FaceElements):
-	case(ReducedFaceElements):
-	case(Points):
-	return true;
-	case(Nodes):
-	case(DegreesOfFreedom):
-	return false;
-	default:
-		stringstream temp;
-		temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << functionSpaceType_target;
-		throw DudleyAdapterException(temp.str());
-   }
-   break;
-   case(Elements):
-	if (functionSpaceType_target==Elements) {
-	  return true;
-	} else if (functionSpaceType_target==ReducedElements) {
-	  return true;
-        } else {
-          return false;
-        }
-   case(ReducedElements):
-	if (functionSpaceType_target==ReducedElements) {
-	  return true;
-	} else {
-          return false;
-	}
-   case(FaceElements):
-	if (functionSpaceType_target==FaceElements) {
-      		return true;
-	} else if (functionSpaceType_target==ReducedFaceElements) {
-      		return true;
-	} else {
-      		return false;
-	}
-   case(ReducedFaceElements):
-	if (functionSpaceType_target==ReducedFaceElements) {
-      		return true;
-	} else {
-		return false;
-	}
-   case(Points):
-	if (functionSpaceType_target==Points) {
-      		return true;
-	} else {
-      		return false;
-	}
-   case(DegreesOfFreedom):
-	switch(functionSpaceType_target) {
-	case(ReducedDegreesOfFreedom):
-	case(DegreesOfFreedom):
-	case(Nodes):
-	case(ReducedNodes):
-	case(Elements):
-	case(ReducedElements):
-	case(Points):
-	case(FaceElements):
-	case(ReducedFaceElements):
-	return true;
-	default:
-		stringstream temp;
-		temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << functionSpaceType_target;
-		throw DudleyAdapterException(temp.str());
-	}
-   	break;
-   case(ReducedDegreesOfFreedom):
-   switch(functionSpaceType_target) {
-	case(ReducedDegreesOfFreedom):
-	case(ReducedNodes):
-	case(Elements):
-	case(ReducedElements):
-	case(FaceElements):
-	case(ReducedFaceElements):
-	case(Points):
-	return true;
-	case(Nodes):
-	case(DegreesOfFreedom):
-	return false;
-	default:
-		stringstream temp;
-		temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << functionSpaceType_target;
-		throw DudleyAdapterException(temp.str());
-	}
-	break;
-   default:
-      stringstream temp;
-      temp << "Error - Interpolation On Domain: Dudley does not know anything about function space type " << functionSpaceType_source;
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   checkDudleyError();
-   return false;
-}
-
-bool MeshAdapter::probeInterpolationAcross(int functionSpaceType_source,const AbstractDomain& targetDomain, int functionSpaceType_target) const
-{
-   return false;
-}
-
-bool MeshAdapter::operator==(const AbstractDomain& other) const
-{
-   const MeshAdapter* temp=dynamic_cast<const MeshAdapter*>(&other);
-   if (temp!=0) {
-      return (m_dudleyMesh==temp->m_dudleyMesh);
-   } else {
-      return false;
-   }
-}
-
-bool MeshAdapter::operator!=(const AbstractDomain& other) const
-{
-   return !(operator==(other));
-}
-
-int MeshAdapter::getSystemMatrixTypeId(const boost::python::object& options) const
-{
-    const escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy>(options);
-
-    return SystemMatrixAdapter::getSystemMatrixTypeId(sb.getSolverMethod(),
-                sb.getPreconditioner(), sb.getPackage(), sb.isSymmetric(),
-                m_dudleyMesh->MPIInfo);
-}
-
-int MeshAdapter::getTransportTypeId(int solver, int preconditioner, int package, bool symmetry) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   return TransportProblemAdapter::getTransportTypeId(solver, preconditioner,
-           package, symmetry, mesh->MPIInfo);
-}
-
-escript::Data MeshAdapter::getX() const
-{
-   return continuousFunction(*this).getX();
-}
-
-escript::Data MeshAdapter::getNormal() const
-{
-   return functionOnBoundary(*this).getNormal();
-}
-
-escript::Data MeshAdapter::getSize() const
-{
-   return escript::function(*this).getSize();
-}
-
-const int* MeshAdapter::borrowSampleReferenceIDs(int functionSpaceType) const
-{
-   int *out = NULL;
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch (functionSpaceType) {
-   case(Nodes):
-   out=mesh->Nodes->Id;
-   break;
-   case(ReducedNodes):
-   out=mesh->Nodes->reducedNodesId;
-   break;
-   case(Elements):
-   out=mesh->Elements->Id;
-   break;
-   case(ReducedElements):
-   out=mesh->Elements->Id;
-   break;
-   case(FaceElements):
-   out=mesh->FaceElements->Id;
-   break;
-   case(ReducedFaceElements):
-   out=mesh->FaceElements->Id;
-   break;
-   case(Points):
-   out=mesh->Points->Id;
-   break;
-   case(DegreesOfFreedom):
-   out=mesh->Nodes->degreesOfFreedomId;
-   break;
-   case(ReducedDegreesOfFreedom):
-   out=mesh->Nodes->reducedDegreesOfFreedomId;
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Invalid function space type: " << functionSpaceType << " for domain: " << getDescription();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   return out;
-}
-int MeshAdapter::getTagFromSampleNo(int functionSpaceType, int sampleNo) const
-{
-   int out=0;
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch (functionSpaceType) {
-   case(Nodes):
-   out=mesh->Nodes->Tag[sampleNo];
-   break;
-   case(ReducedNodes):
-   throw DudleyAdapterException(" Error - ReducedNodes does not support tags.");
-   break;
-   case(Elements):
-   out=mesh->Elements->Tag[sampleNo];
-   break;
-   case(ReducedElements):
-   out=mesh->Elements->Tag[sampleNo];
-   break;
-   case(FaceElements):
-   out=mesh->FaceElements->Tag[sampleNo];
-   break;
-   case(ReducedFaceElements):
-   out=mesh->FaceElements->Tag[sampleNo];
-   break;
-   case(Points):
-   out=mesh->Points->Tag[sampleNo];
-   break;
-   case(DegreesOfFreedom):
-   throw DudleyAdapterException(" Error - DegreesOfFreedom does not support tags.");
-   break;
-   case(ReducedDegreesOfFreedom):
-   throw DudleyAdapterException(" Error - ReducedDegreesOfFreedom does not support tags.");
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Invalid function space type: " << functionSpaceType << " for domain: " << getDescription();
-      throw DudleyAdapterException(temp.str());
-      break;
-   }
-   return out;
-}
-
-
-void MeshAdapter::setTags(const int functionSpaceType, const int newTag, const escript::Data& mask) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   switch(functionSpaceType) {
-   case(Nodes):
-   Dudley_NodeFile_setTags(mesh->Nodes,newTag,&mask);
-   break;
-   case(ReducedNodes):
-   throw DudleyAdapterException("Error - ReducedNodes does not support tags");
-   break;
-   case(DegreesOfFreedom):
-   throw DudleyAdapterException("Error - DegreesOfFreedom does not support tags");
-   break;
-   case(ReducedDegreesOfFreedom):
-   throw DudleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-   break;
-   case(Elements):
-   Dudley_ElementFile_setTags(mesh->Elements,newTag,&mask);
-   break;
-   case(ReducedElements):
-   Dudley_ElementFile_setTags(mesh->Elements,newTag,&mask);
-   break;
-   case(FaceElements):
-   Dudley_ElementFile_setTags(mesh->FaceElements,newTag,&mask);
-   break;
-   case(ReducedFaceElements):
-   Dudley_ElementFile_setTags(mesh->FaceElements,newTag,&mask);
-   break;
-   case(Points):
-   Dudley_ElementFile_setTags(mesh->Points,newTag,&mask);
-   break;
-   default:
-      stringstream temp;
-      temp << "Error - Dudley does not know anything about function space type " << functionSpaceType;
-      throw DudleyAdapterException(temp.str());
-   }
-   checkDudleyError();
-   return;
-}
-
-void MeshAdapter::setTagMap(const string& name,  int tag)
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   Dudley_Mesh_addTagMap(mesh, name.c_str(),tag);
-   checkDudleyError();
-   // throwStandardException("MeshAdapter::set TagMap is not implemented.");
-}
-
-int MeshAdapter::getTag(const string& name) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   int tag=0;
-   tag=Dudley_Mesh_getTag(mesh, name.c_str());
-   checkDudleyError();
-   // throwStandardException("MeshAdapter::getTag is not implemented.");
-   return tag;
-}
-
-bool MeshAdapter::isValidTagName(const string& name) const
-{
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   return Dudley_Mesh_isValidTagName(mesh,name.c_str());
-}
-
-string MeshAdapter::showTagNames() const
-{
-   stringstream temp;
-   Dudley_Mesh* mesh=m_dudleyMesh.get();
-   Dudley_TagMap* tag_map=mesh->TagMap;
-   while (tag_map) {
-      temp << tag_map->name;
-      tag_map=tag_map->next;
-      if (tag_map) temp << ", ";
-   }
-   return temp.str();
-}
-
-int MeshAdapter::getNumberOfTagsInUse(int functionSpaceCode) const
-{
-  Dudley_Mesh* mesh=m_dudleyMesh.get();
-  dim_t numTags=0;
-  switch(functionSpaceCode) {
-   case(Nodes):
-          numTags=mesh->Nodes->numTagsInUse;
-          break;
-   case(ReducedNodes):
-          throw DudleyAdapterException("Error - ReducedNodes does not support tags");
-          break;
-   case(DegreesOfFreedom):
-          throw DudleyAdapterException("Error - DegreesOfFreedom does not support tags");
-          break;
-   case(ReducedDegreesOfFreedom):
-          throw DudleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-          break;
-   case(Elements):
-   case(ReducedElements):
-          numTags=mesh->Elements->numTagsInUse;
-          break;
-   case(FaceElements):
-   case(ReducedFaceElements):
-          numTags=mesh->FaceElements->numTagsInUse;
-          break;
-   case(Points):
-          numTags=mesh->Points->numTagsInUse;
-          break;
-   default:
-      stringstream temp;
-      temp << "Error - Dudley does not know anything about function space type " << functionSpaceCode;
-      throw DudleyAdapterException(temp.str());
-  }
-  return numTags;
-}
-
-const int* MeshAdapter::borrowListOfTagsInUse(int functionSpaceCode) const
-{
-  Dudley_Mesh* mesh=m_dudleyMesh.get();
-  index_t* tags=NULL;
-  switch(functionSpaceCode) {
-   case(Nodes):
-          tags=mesh->Nodes->tagsInUse;
-          break;
-   case(ReducedNodes):
-          throw DudleyAdapterException("Error - ReducedNodes does not support tags");
-          break;
-   case(DegreesOfFreedom):
-          throw DudleyAdapterException("Error - DegreesOfFreedom does not support tags");
-          break;
-   case(ReducedDegreesOfFreedom):
-          throw DudleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-          break;
-   case(Elements):
-   case(ReducedElements):
-          tags=mesh->Elements->tagsInUse;
-          break;
-   case(FaceElements):
-   case(ReducedFaceElements):
-          tags=mesh->FaceElements->tagsInUse;
-          break;
-   case(Points):
-          tags=mesh->Points->tagsInUse;
-          break;
-   default:
-      stringstream temp;
-      temp << "Error - Dudley does not know anything about function space type " << functionSpaceCode;
-      throw DudleyAdapterException(temp.str());
-  }
-  return tags;
-}
-
-
-bool MeshAdapter::canTag(int functionSpaceCode) const
-{
-  switch(functionSpaceCode) {
-   case(Nodes):
-   case(Elements):
-   case(ReducedElements):
-   case(FaceElements):
-   case(ReducedFaceElements):
-   case(Points):
-          return true;
-   case(ReducedNodes):
-   case(DegreesOfFreedom):
-   case(ReducedDegreesOfFreedom):
-	  return false;
-   default:
-	return false;
-  }
-}
-
-AbstractDomain::StatusType MeshAdapter::getStatus() const
-{
-  Dudley_Mesh* mesh=m_dudleyMesh.get();
-  return Dudley_Mesh_getStatus(mesh);
-}
-
-int MeshAdapter::getApproximationOrder(const int functionSpaceCode) const
-{
-   
-  Dudley_Mesh* mesh=m_dudleyMesh.get();
-  int order =-1;
-  switch(functionSpaceCode) {
-   case(Nodes):
-   case(DegreesOfFreedom):
-          order=mesh->approximationOrder;
-          break;
-   case(ReducedNodes):
-   case(ReducedDegreesOfFreedom):
-          order=mesh->reducedApproximationOrder;
-          break;
-   case(Elements):
-   case(FaceElements):
-   case(Points):
-          order=mesh->integrationOrder;
-          break;
-   case(ReducedElements):
-   case(ReducedFaceElements):
-          order=mesh->reducedIntegrationOrder;
-          break;
-   default:
-      stringstream temp;
-      temp << "Error - Dudley does not know anything about function space type " << functionSpaceCode;
-      throw DudleyAdapterException(temp.str());
-  }
-  return order;
-}
-
-
-bool MeshAdapter::supportsContactElements() const
-{
-    return false;
-}
-
-escript::Data MeshAdapter::randomFill(const escript::DataTypes::ShapeType& shape,
-       const escript::FunctionSpace& what, long seed,
-       const boost::python::tuple& filter) const
-{
-    Data towipe(0, shape, what, true);
-    // since we just made this object, no sharing is possible and we don't need to check for
-    // exlusive write
-    escript::DataTypes::ValueType& dv=towipe.getExpandedVectorReference();
-    const size_t dvsize=dv.size();
-    esysUtils::randomFillArray(seed, &(dv[0]), dvsize);
-    return towipe;  	 
-}
-
-
-}  // end of namespace
diff --git a/dudley/src/CPPAdapter/MeshAdapter.h b/dudley/src/CPPAdapter/MeshAdapter.h
deleted file mode 100644
index 81ab65d..0000000
--- a/dudley/src/CPPAdapter/MeshAdapter.h
+++ /dev/null
@@ -1,671 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined dudley_MeshAdapter_20040526_H
-#define dudley_MeshAdapter_20040526_H
-#include "system_dep.h"
-
-#include "dudley/Mesh.h"
-#include "dudley/Dudley.h"
-#include "dudley/Assemble.h"
-#include "esysUtils/Esys_MPI.h"
-
-#include "DudleyError.h"
-#include "DudleyAdapterException.h"
-
-#include <pasowrap/SystemMatrixAdapter.h>
-#include <pasowrap/TransportProblemAdapter.h>
-#include "escript/AbstractContinuousDomain.h"
-#include "escript/FunctionSpace.h"
-#include "escript/FunctionSpaceFactory.h"
-
-#include <boost/shared_ptr.hpp>
-#include <boost/python/dict.hpp>
-#include <boost/python/extract.hpp>
-
-#include <map>
-#include <vector>
-#include <string>
-#include <sstream>
-
-namespace dudley {
-
-struct null_deleter
-{
-  void operator()(void const *ptr) const
-  {
-  }
-};
-
-
-/**
-   \brief
-   MeshAdapter implements the AbstractContinuousDomain
-   interface for the Dudley library.
-
-   Description:
-   MeshAdapter implements the AbstractContinuousDomain
-   interface for the Dudley library.
-*/
-
-class MeshAdapter : public escript::AbstractContinuousDomain
-{
- public:
-
-  //
-  // Codes for function space types supported
-  static const int DegreesOfFreedom;
-  static const int ReducedDegreesOfFreedom;
-  static const int Nodes;
-  static const int ReducedNodes;
-  static const int Elements;
-  static const int ReducedElements;
-  static const int FaceElements;
-  static const int ReducedFaceElements;
-  static const int Points;
-  static const int ContactElementsZero;
-  static const int ReducedContactElementsZero;
-  static const int ContactElementsOne;
-  static const int ReducedContactElementsOne;
-
-  /**
-     \brief
-     Constructor for MeshAdapter
-
-     Description:
-     Constructor for MeshAdapter. The pointer passed to MeshAdapter
-     is deleted using a call to Dudley_Mesh_free in the
-     MeshAdapter destructor.
-
-     Throws:
-     May throw an exception derived from EsysException
-
-     \param dudleyMesh Input - A pointer to the externally constructed 
-                               dudley mesh.The pointer passed to MeshAdapter
-                               is deleted using a call to 
-                               Dudley_Mesh_free in the MeshAdapter 
-                               destructor.
-  */
-  DUDLEY_DLL_API
-  MeshAdapter(Dudley_Mesh* dudleyMesh=0);
-
-  /**
-     \brief
-     Copy constructor.
-  */
-  DUDLEY_DLL_API
-  MeshAdapter(const MeshAdapter& in);
-
-  /**
-     \brief
-     Destructor for MeshAdapter. As specified in the constructor
-     this calls Dudley_Mesh_free for the pointer given to the 
-     constructor.
-  */
-  DUDLEY_DLL_API
-  ~MeshAdapter();
-
-  /**
-     \brief
-     return the number of processors used for this domain
-  */
-  DUDLEY_DLL_API
-  virtual int getMPISize() const;
-  /**
-     \brief
-     return the number MPI rank of this processor
-  */
-
-  DUDLEY_DLL_API
-  virtual int getMPIRank() const;
-
-  /**
-     \brief
-     If compiled for MPI then execute an MPI_Barrier, else do nothing
-  */
-
-  DUDLEY_DLL_API
-  virtual void MPIBarrier() const;
-
-  /**
-     \brief
-     Return true if on MPI processor 0, else false
-  */
-
-  DUDLEY_DLL_API
-  virtual bool onMasterProcessor() const;
-
-  DUDLEY_DLL_API
-  MPI_Comm getMPIComm() const;
-
-  /**
-     \brief
-     Write the current mesh to a file with the given name.
-     \param fileName Input - The name of the file to write to.
-  */
-  DUDLEY_DLL_API
-  void write(const std::string& fileName) const;
-
-  /**
-     \brief
-     \param full
-  */
-  DUDLEY_DLL_API
-  void Print_Mesh_Info(const bool full=false) const;
-
-  /**
-     \brief
-     dumps the mesh to a file with the given name.
-     \param fileName Input - The name of the file
-  */
-  DUDLEY_DLL_API
-  void dump(const std::string& fileName) const;
-
-  /**
-     \brief
-     return the pointer to the underlying dudley mesh structure
-  */
-  DUDLEY_DLL_API
-  Dudley_Mesh* getDudley_Mesh() const;
-
-   /**
-     \brief
-     Return the tag key for the given sample number.
-     \param functionSpaceType Input - The function space type.
-     \param sampleNo Input - The sample number.
-  */
-  DUDLEY_DLL_API
-  int getTagFromSampleNo(int functionSpaceType, int sampleNo) const;
-
-  /**
-     \brief
-     Return the reference number of  the given sample number.
-     \param functionSpaceType Input - The function space type.
-  */
-  DUDLEY_DLL_API
-  const int* borrowSampleReferenceIDs(int functionSpaceType) const;
-
-  /**
-     \brief
-     Returns true if the given integer is a valid function space type
-     for this domain.
-  */
-  DUDLEY_DLL_API
-  virtual bool isValidFunctionSpaceType(int functionSpaceType) const;
-
-  /**
-     \brief
-     Return a description for this domain
-  */
-  DUDLEY_DLL_API
-  virtual std::string getDescription() const;
-
-  /**
-     \brief
-     Return a description for the given function space type code
-  */
-  DUDLEY_DLL_API
-  virtual std::string functionSpaceTypeAsString(int functionSpaceType) const;
-
-  /**
-     \brief
-     Build the table of function space type names
-  */
-  DUDLEY_DLL_API
-  void setFunctionSpaceTypeNames();
-
-  /**
-     \brief
-     Return a continuous FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getContinuousFunctionCode() const;
-
-  /**
-     \brief
-     Return a continuous on reduced order nodes FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedContinuousFunctionCode() const;
-
-  /**
-     \brief
-     Return a function FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getFunctionCode() const;
-
-  /**
-     \brief
-     Return a function with reduced integration order FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedFunctionCode() const;
-
-  /**
-     \brief
-     Return a function on boundary FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getFunctionOnBoundaryCode() const;
-
-  /**
-     \brief
-     Return a function on boundary with reduced integration order FunctionSpace code
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedFunctionOnBoundaryCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactZero code
-  */
-  DUDLEY_DLL_API
-  virtual int getFunctionOnContactZeroCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactZero code  with reduced integration order
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedFunctionOnContactZeroCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactOne code
-  */
-  DUDLEY_DLL_API
-  virtual int getFunctionOnContactOneCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactOne code  with reduced integration order
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedFunctionOnContactOneCode() const;
-
-  /**
-     \brief
-     Return a Solution code
-  */
-  DUDLEY_DLL_API
-  virtual int getSolutionCode() const;
-
-  /**
-     \brief
-     Return a ReducedSolution code
-  */
-  DUDLEY_DLL_API
-  virtual int getReducedSolutionCode() const;
-
-  /**
-     \brief
-     Return a DiracDeltaFunctions code
-  */
-  DUDLEY_DLL_API
-  virtual int getDiracDeltaFunctionsCode() const;
-
-  /**
-		 5B
-     \brief
-  */
-  typedef std::map<int, std::string> FunctionSpaceNamesMapType;
-
-  /**
-     \brief
-  */
-  DUDLEY_DLL_API
-  virtual int getDim() const;
-
-  /**
-     \brief
-      Returns a status indicator of the domain. The status identifier should be unique over 
-      the live time if the object but may be updated if changes to the domain happen, e.g. 
-      modifications to its geometry. 
-
-     This has to be implemented by the actual Domain adapter.
-  */
-  DUDLEY_DLL_API
-  virtual StatusType getStatus() const;
-
-
-  /**
-     \brief
-     Return the number of data points summed across all MPI processes
-  */
-  DUDLEY_DLL_API
-  virtual int getNumDataPointsGlobal() const;
-
-  /**
-     \brief
-     Return the number of data points per sample, and the number of samples as a pair.
-     \param functionSpaceCode Input -
-  */
-  DUDLEY_DLL_API
-  virtual std::pair<int,int> getDataShape(int functionSpaceCode) const;
-
-  /**
-     \brief
-     copies the location of data points into arg. The domain of arg has to match this.
-     has to be implemented by the actual Domain adapter.
-  */
-  DUDLEY_DLL_API
-  virtual void setToX(escript::Data& arg) const;
-
-  /**
-     \brief
-     sets a map from a clear tag name to a tag key
-     \param name Input - tag name.
-     \param tag Input - tag key.
-  */
-  DUDLEY_DLL_API
-  virtual void setTagMap(const std::string& name,  int tag);
-
-  /**
-     \brief
-     Return the tag key for tag name.
-     \param name Input - tag name
-  */
-  DUDLEY_DLL_API
-  virtual int getTag(const std::string& name) const;
-
-  /**
-     \brief
-     Returns true if name is a defined tage name. 
-     \param name Input - tag name to be checked.
-  */
-  DUDLEY_DLL_API
-  virtual bool isValidTagName(const std::string& name) const;
-
-  /**
-     \brief
-     Returns all tag names in a single string sperated by commas
-  */
-  DUDLEY_DLL_API
-  virtual std::string showTagNames() const;
-
-  /**
-     \brief
-     assigns new location to the domain
-  */
-  DUDLEY_DLL_API
-  virtual void setNewX(const escript::Data& arg);
-
-  /**
-     \brief
-     interpolates data given on source onto target where source and target have to be given on the same domain.
-  */
-  DUDLEY_DLL_API
-  virtual void interpolateOnDomain(escript::Data& target, const escript::Data& source) const;
-
-
-  DUDLEY_DLL_API
-  virtual bool probeInterpolationOnDomain(int functionSpaceType_source,int functionSpaceType_target) const;
-
-  DUDLEY_DLL_API
-  virtual signed char preferredInterpolationOnDomain(int functionSpaceType_source,int functionSpaceType_target) const;
-
-
-  /**
-    \brief given a vector of FunctionSpace typecodes, pass back a code which then can all be interpolated to.
-    \return true is result is valid, false if not
-  */
-  DUDLEY_DLL_API
-  bool
-  commonFunctionSpace(const std::vector<int>& fs, int& resultcode) const;
-
-  /**
-     \brief
-     interpolates data given on source onto target where source and target are given on different domains.
-     has to be implemented by the actual Domain adapter.
-  */
-  DUDLEY_DLL_API
-  virtual void interpolateAcross(escript::Data& target, const escript::Data& source) const;
-
-  /**
-  \brief determines whether interpolation from source to target is possible.
-  Must be implemented by the actual Domain adapter
-  */
-  DUDLEY_DLL_API
-  virtual bool probeInterpolationAcross(int functionSpaceType_source,const escript::AbstractDomain& targetDomain, int functionSpaceType_target) const;
-
-  /**
-     \brief
-     copies the surface normals at data points into out. The actual function space to be considered
-     is defined by out. out has to be defined on this.
-  */
-  DUDLEY_DLL_API
-  virtual void setToNormal(escript::Data& out) const;
-
-  /**
-     \brief
-     copies the size of samples into out. The actual function space to be considered
-     is defined by out. out has to be defined on this.
-  */
-  DUDLEY_DLL_API
-  virtual void setToSize(escript::Data& out) const;
-
-  /**
-     \brief
-     copies the gradient of arg into grad. The actual function space to be considered
-     for the gradient is defined by grad. arg and grad have to be defined on this.
-  */
-  DUDLEY_DLL_API
-  virtual void setToGradient(escript::Data& grad,const escript::Data& arg) const;
-
-  /**
-     \brief
-     copies the integrals of the function defined by arg into integrals.
-     arg has to be defined on this.
-  */
-  DUDLEY_DLL_API
-  virtual void setToIntegrals(std::vector<double>& integrals,const escript::Data& arg) const;
-
-  /**
-     \brief
-     return the identifier of the matrix type to be used for the global
-     stiffness matrix when a particular solver, package, preconditioner,
-     and symmetric matrix is used.
-     
-     \param options a SolverBuddy instance with the desired options set
-  */
-  DUDLEY_DLL_API
-  virtual int getSystemMatrixTypeId(const boost::python::object& options) const;
-
-  /**
-     \brief
-     return the identifier of the transport problem type to be used when a particular solver, perconditioner, package
-     and symmetric matrix is used.
-     \param solver 
-     \param preconditioner
-     \param package
-     \param symmetry 
-  */
-  DUDLEY_DLL_API
-  virtual int getTransportTypeId(const int solver, const int preconditioner, const int package, const bool symmetry) const;
-
-  /**
-     \brief
-     returns true if data on this domain and a function space of type functionSpaceCode has to 
-     considered as cell centered data.
-  */
-  DUDLEY_DLL_API
-  virtual bool isCellOriented(int functionSpaceCode) const;
-
-  DUDLEY_DLL_API
-  virtual bool ownSample(int fs_code, index_t id) const;
-
-  /**
-     \brief
-     adds a PDE onto the stiffness matrix mat and a rhs 
-  */
-  DUDLEY_DLL_API
-  virtual void addPDEToSystem(
-                     escript::AbstractSystemMatrix& mat, escript::Data& rhs,
-                     const escript::Data& A, const escript::Data& B, const escript::Data& C, 
-                     const escript::Data& D, const escript::Data& X, const escript::Data& Y,
-                     const escript::Data& d, const escript::Data& y, 
-		     const escript::Data& d_contact, const escript::Data& y_contact,
-                     const escript::Data& d_dirac, const escript::Data& y_dirac) const;
-
-
-  /**
-     \brief
-     adds a PDE onto the lumped stiffness matrix matrix
-  */
-  DUDLEY_DLL_API
-  virtual void addPDEToLumpedSystem(
-                     escript::Data& mat,
-                     const escript::Data& D, 
-                     const escript::Data& d,
-                     const escript::Data& d_dirac,
-                     const bool useHRZ) const;
-
-  /**
-     \brief
-     adds a PDE onto the stiffness matrix mat and a rhs 
-  */
-  DUDLEY_DLL_API
-  virtual void addPDEToRHS(escript::Data& rhs,
-                     const escript::Data& X, const escript::Data& Y,
-                     const escript::Data& y, const escript::Data& y_contact, const escript::Data& y_dirac) const;
-  /**
-     \brief
-     adds a PDE onto a transport problem
-  */
-
-  DUDLEY_DLL_API
-  virtual void addPDEToTransportProblem(
-                     escript::AbstractTransportProblem& tp, escript::Data& source, 
-                     const escript::Data& M,
-                     const escript::Data& A, const escript::Data& B, const escript::Data& C,const  escript::Data& D,
-                     const  escript::Data& X,const  escript::Data& Y,
-                     const escript::Data& d, const escript::Data& y,
-                     const escript::Data& d_contact,const escript::Data& y_contact,
-                     const escript::Data& d_dirac,const escript::Data& y_dirac) const;
-
-
-  /**
-     \brief
-    creates a SystemMatrixAdapter stiffness matrix and initializes it with zeros:
-  */
-  DUDLEY_DLL_API
-  escript::ASM_ptr newSystemMatrix(
-                      const int row_blocksize,
-                      const escript::FunctionSpace& row_functionspace,
-                      const int column_blocksize,
-                      const escript::FunctionSpace& column_functionspace,
-                      const int type) const;
-  /**
-   \brief 
-    creates a TransportProblemAdapter 
-
-  */
-
-  DUDLEY_DLL_API
-  escript::ATP_ptr newTransportProblem(
-                      const int blocksize,
-                      const escript::FunctionSpace& functionspace,
-                      const int type) const;
-
-  /**
-     \brief returns locations in the FEM nodes
-  */
-  DUDLEY_DLL_API
-  virtual escript::Data getX() const;
-
-  /**
-     \brief return boundary normals at the quadrature point on the face elements
-  */
-  DUDLEY_DLL_API
-  virtual escript::Data getNormal() const;
-
-  /**
-     \brief returns the element size
-  */
-  DUDLEY_DLL_API
-  virtual escript::Data getSize() const;
-
-  /**
-     \brief comparison operators
-  */
-  DUDLEY_DLL_API
-  virtual bool operator==(const escript::AbstractDomain& other) const;
-  DUDLEY_DLL_API
-  virtual bool operator!=(const escript::AbstractDomain& other) const;
-
-  /**
-     \brief assigns new tag newTag to all samples of functionspace with a positive
-     value of mask for any its sample point.
-
-  */
-  DUDLEY_DLL_API
-  virtual void setTags(const int functionSpaceType, const int newTag, const escript::Data& mask) const;
-
-  /**
-      \brief
-          return the number of tags in use and a pointer to an array with the number of tags in use
-  */
-  DUDLEY_DLL_API
-  virtual int getNumberOfTagsInUse(int functionSpaceCode) const;
-
-  DUDLEY_DLL_API 
-  virtual const int* borrowListOfTagsInUse(int functionSpaceCode) const;
-
-
-  /**
-     \brief Checks if this domain allows tags for the specified functionSpaceCode.
-  */
-  DUDLEY_DLL_API
-  virtual
-  bool canTag(int functionSpaceCode) const;
-
-   /**
-   \brief returns the approximation order used for a function space functionSpaceCode
-   */
-
-  DUDLEY_DLL_API
-  virtual 
-  int getApproximationOrder(const int functionSpaceCode) const;
-
-
-  DUDLEY_DLL_API
-  bool supportsContactElements() const;
-  
- 	  
-  DUDLEY_DLL_API
-  virtual escript::Data randomFill(const escript::DataTypes::ShapeType& shape,
-       const escript::FunctionSpace& what, long seed, const boost::python::tuple& filter) const;         
-  
- protected:
-
- private:
-  void extractArgsFromDict(const boost::python::dict& arg, int& numData,
-                             char**& names, escript::Data*& data,
-                             escript::Data**& dataPtr) const;
-
-  //
-  // pointer to the externally created dudley mesh
-  boost::shared_ptr<Dudley_Mesh> m_dudleyMesh;
- 
-  static FunctionSpaceNamesMapType m_functionSpaceTypeNames;
-
-};
-
-} // end of namespace
-
-#endif
diff --git a/dudley/src/CPPAdapter/MeshAdapterFactory.cpp b/dudley/src/CPPAdapter/MeshAdapterFactory.cpp
deleted file mode 100644
index 3abe831..0000000
--- a/dudley/src/CPPAdapter/MeshAdapterFactory.cpp
+++ /dev/null
@@ -1,714 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "MeshAdapterFactory.h"
-#include "DudleyError.h"
-#include "esysUtils/blocktimer.h"
-#include "dudley/Dudley.h"
-#include "dudley/Mesh.h"
-#include "dudley/TriangularMesh.h"
-#ifdef ESYS_MPI
-#include "esysUtils/Esys_MPI.h"
-#endif
-
-#include "escript/SubWorld.h"
-
-#ifdef USE_NETCDF
-#include <netcdfcpp.h>
-#endif
-
-#include <boost/python/extract.hpp>
-#include <boost/scoped_array.hpp>
-
-#include <sstream>
-
-using namespace std;
-using namespace escript;
-
-namespace dudley {
-
-#ifdef USE_NETCDF
-  // A convenience method to retrieve an integer attribute from a NetCDF file
-  int NetCDF_Get_Int_Attribute(NcFile *dataFile, const std::string &fName, char *attr_name)
-  {
-    NcAtt *attr;
-    char error_msg[LenErrorMsg_MAX];
-    if (! (attr=dataFile->get_att(attr_name)) ) {
-      sprintf(error_msg,"loadMesh: Error retrieving integer attribute '%s' from NetCDF file '%s'", attr_name, fName.c_str());
-      throw DataException(error_msg);
-    }
-    int temp = attr->as_int(0);
-    delete attr;
-    return(temp);
-  }
-#endif
-
-  inline void cleanupAndThrow(Dudley_Mesh* mesh, string msg)
-  {
-      Dudley_Mesh_free(mesh);
-      string msgPrefix("loadMesh: NetCDF operation failed - ");
-      throw DataException(msgPrefix+msg);
-  }
-
-//   AbstractContinuousDomain* loadMesh(const std::string& fileName)
-  Domain_ptr loadMesh(const std::string& fileName)
-  {
-#ifdef USE_NETCDF
-    esysUtils::JMPI mpi_info = esysUtils::makeInfo( MPI_COMM_WORLD );
-    Dudley_Mesh *mesh_p=NULL;
-    char error_msg[LenErrorMsg_MAX];
-
-    std::string fName(esysUtils::appendRankToFileName(fileName, mpi_info->size,
-                                                      mpi_info->rank));
-
-    double blocktimer_start = blocktimer_time();
-    Dudley_resetError();
-    int *first_DofComponent, *first_NodeComponent;
-
-    // Open NetCDF file for reading
-    NcAtt *attr;
-    NcVar *nc_var_temp;
-    // netCDF error handler
-    NcError err(NcError::silent_nonfatal);
-    // Create the NetCDF file.
-    NcFile dataFile(fName.c_str(), NcFile::ReadOnly);
-    if (!dataFile.is_valid()) {
-      sprintf(error_msg,"loadMesh: Opening NetCDF file '%s' for reading failed.", fName.c_str());
-      Dudley_setError(IO_ERROR,error_msg);
-      throw DataException(error_msg);
-    }
-
-    // Read NetCDF integer attributes
-    int mpi_size                        = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"mpi_size");
-    int mpi_rank                        = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"mpi_rank");
-    int numDim                          = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"numDim");
-    int numNodes                        = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"numNodes");
-    int num_Elements                    = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_Elements");
-    int num_FaceElements                = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_FaceElements");
-    int num_Points                      = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_Points");
-    int num_Elements_numNodes           = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_Elements_numNodes");
-    int Elements_TypeId                 = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"Elements_TypeId");
-    int num_FaceElements_numNodes       = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_FaceElements_numNodes");
-    int FaceElements_TypeId             = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"FaceElements_TypeId");
-    int Points_TypeId                   = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"Points_TypeId");
-    int num_Tags                        = NetCDF_Get_Int_Attribute(&dataFile, fName, (char *)"num_Tags");
-
-    // Verify size and rank
-    if (mpi_info->size != mpi_size) {
-      sprintf(error_msg, "loadMesh: The NetCDF file '%s' can only be read on %d CPUs instead of %d", fName.c_str(), mpi_size, mpi_info->size);
-      throw DataException(error_msg);
-    }
-    if (mpi_info->rank != mpi_rank) {
-      sprintf(error_msg, "loadMesh: The NetCDF file '%s' should be read on CPU #%d instead of %d", fName.c_str(), mpi_rank, mpi_info->rank);
-      throw DataException(error_msg);
-    }
-
-    // Read mesh name
-    if (! (attr=dataFile.get_att("Name")) ) {
-      sprintf(error_msg,"loadMesh: Error retrieving mesh name from NetCDF file '%s'", fName.c_str());
-      throw DataException(error_msg);
-    }
-    boost::scoped_array<char> name(attr->as_string(0));
-    delete attr;
-
-    /* allocate mesh */
-    mesh_p = Dudley_Mesh_alloc(name.get(), numDim, mpi_info);
-    if (Dudley_noError()) {
-
-        /* read nodes */
-        Dudley_NodeFile_allocTable(mesh_p->Nodes, numNodes);
-        // Nodes_Id
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_Id")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Id)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->Id[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Id)");
-        // Nodes_Tag
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_Tag")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Tag)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->Tag[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Tag)");
-        // Nodes_gDOF
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_gDOF")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_gDOF)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalDegreesOfFreedom[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_gDOF)");
-        // Nodes_gNI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_gNI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_gNI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalNodesIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_gNI)");
-        // Nodes_grDfI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_grDfI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_grDfI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalReducedDOFIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_grDfI)");
-        // Nodes_grNI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_grNI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_grNI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalReducedNodesIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_grNI)");
-        // Nodes_Coordinates
-        if (!(nc_var_temp = dataFile.get_var("Nodes_Coordinates")))
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Coordinates)");
-        if (! nc_var_temp->get(&(mesh_p->Nodes->Coordinates[0]), numNodes, numDim) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Coordinates)");
-
-        Dudley_NodeFile_setTagsInUse(mesh_p->Nodes);
-
-        /* read elements */
-        if (Dudley_noError()) {
-            mesh_p->Elements=Dudley_ElementFile_alloc((Dudley_ElementTypeId)Elements_TypeId, mpi_info);
-            if (Dudley_noError())
-                Dudley_ElementFile_allocTable(mesh_p->Elements, num_Elements);
-            if (Dudley_noError()) {
-                mesh_p->Elements->minColor=0;
-                mesh_p->Elements->maxColor=num_Elements-1;
-                if (num_Elements>0) {
-                   // Elements_Id
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Id")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Id)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Id[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Id)");
-                   // Elements_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Tag")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Tag[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Tag)");
-                   // Elements_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Owner")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Owner)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Owner[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Owner)");
-                   // Elements_Color
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Color")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Color)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Color[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Color)");
-                   // Elements_Nodes
-                   int *Elements_Nodes = new int[num_Elements*num_Elements_numNodes];
-                   if (!(nc_var_temp = dataFile.get_var("Elements_Nodes"))) {
-                       delete[] Elements_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(Elements_Nodes[0]), num_Elements, num_Elements_numNodes) ) {
-                       delete[] Elements_Nodes;
-                       cleanupAndThrow(mesh_p, "get(Elements_Nodes)");
-                   }
-
-                   // Copy temp array into mesh_p->Elements->Nodes
-                   for (int i=0; i<num_Elements; i++) {
-                       for (int j=0; j<num_Elements_numNodes; j++) {
-                           mesh_p->Elements->Nodes[INDEX2(j,i,num_Elements_numNodes)]
-                                = Elements_Nodes[INDEX2(j,i,num_Elements_numNodes)];
-                       }
-                   }
-                   delete[] Elements_Nodes;
-                   Dudley_ElementFile_setTagsInUse(mesh_p->Elements);
-                } /* num_Elements>0 */
-            }
-        }
-
-        /* get the face elements */
-        if (Dudley_noError()) {
-            mesh_p->FaceElements=Dudley_ElementFile_alloc((Dudley_ElementTypeId)FaceElements_TypeId, mpi_info);
-            if (Dudley_noError())
-                Dudley_ElementFile_allocTable(mesh_p->FaceElements, num_FaceElements);
-            if (Dudley_noError()) {
-                mesh_p->FaceElements->minColor=0;
-                mesh_p->FaceElements->maxColor=num_FaceElements-1;
-                if (num_FaceElements>0) {
-                   // FaceElements_Id
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Id")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Id)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Id[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Id)");
-                   // FaceElements_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Tag")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Tag[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Tag)");
-                   // FaceElements_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Owner")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Owner)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Owner[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Owner)");
-                   // FaceElements_Color
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Color")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Color)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Color[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Color)");
-                   // FaceElements_Nodes
-                   int *FaceElements_Nodes = new int[num_FaceElements*num_FaceElements_numNodes];
-                   if (!(nc_var_temp = dataFile.get_var("FaceElements_Nodes"))) {
-                       delete[] FaceElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(FaceElements_Nodes[0]), num_FaceElements, num_FaceElements_numNodes) ) {
-                       delete[] FaceElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Nodes)");
-                   }
-                   // Copy temp array into mesh_p->FaceElements->Nodes
-                   for (int i=0; i<num_FaceElements; i++) {
-                       for (int j=0; j<num_FaceElements_numNodes; j++) {
-                           mesh_p->FaceElements->Nodes[INDEX2(j,i,num_FaceElements_numNodes)] = FaceElements_Nodes[INDEX2(j,i,num_FaceElements_numNodes)];
-                       }
-                   }
-                   delete[] FaceElements_Nodes;
-                   Dudley_ElementFile_setTagsInUse(mesh_p->FaceElements);
-                } /* num_FaceElements>0 */
-            }
-        }
-
-        /* get the Points (nodal elements) */
-        if (Dudley_noError()) {
-            mesh_p->Points=Dudley_ElementFile_alloc((Dudley_ElementTypeId)Points_TypeId, mpi_info);
-            if (Dudley_noError())
-                Dudley_ElementFile_allocTable(mesh_p->Points, num_Points);
-            if (Dudley_noError()) {
-                mesh_p->Points->minColor=0;
-                mesh_p->Points->maxColor=num_Points-1;
-                if (num_Points>0) {
-                   // Points_Id
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Id")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Id)");
-                   if (! nc_var_temp->get(&mesh_p->Points->Id[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Id)");
-                   // Points_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Tag")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->Points->Tag[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Tag)");
-                   // Points_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Owner")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Owner)");
-                   if (!nc_var_temp->get(&mesh_p->Points->Owner[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Owner)");
-                   // Points_Color
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Color")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Color)");
-                   if (!nc_var_temp->get(&mesh_p->Points->Color[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Color)");
-                   // Points_Nodes
-                   int *Points_Nodes = new int[num_Points];
-                   if (!(nc_var_temp = dataFile.get_var("Points_Nodes"))) {
-                       delete[] Points_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(Points_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(Points_Nodes[0]), num_Points) ) {
-                       delete[] Points_Nodes;
-                       cleanupAndThrow(mesh_p, "get(Points_Nodes)");
-                   }
-                   // Copy temp array into mesh_p->Points->Nodes
-                   for (int i=0; i<num_Points; i++) {
-                       mesh_p->Points->Id[mesh_p->Points->Nodes[INDEX2(0,i,1)]] = Points_Nodes[i];
-                   }
-                   delete[] Points_Nodes;
-                   Dudley_ElementFile_setTagsInUse(mesh_p->Points);
-                } /* num_Points>0 */
-            }
-        }
-
-        /* get the tags */
-        if (Dudley_noError()) {
-          if (num_Tags>0) {
-            // Temp storage to gather node IDs
-            int *Tags_keys = new int[num_Tags];
-            char name_temp[4096];
-            int i;
-
-            // Tags_keys
-            if (! ( nc_var_temp = dataFile.get_var("Tags_keys")) ) {
-                delete[] Tags_keys;
-                cleanupAndThrow(mesh_p, "get_var(Tags_keys)");
-            }
-            if (! nc_var_temp->get(&Tags_keys[0], num_Tags) ) {
-                delete[] Tags_keys;
-                cleanupAndThrow(mesh_p, "get(Tags_keys)");
-            }
-            for (i=0; i<num_Tags; i++) {
-              // Retrieve tag name
-              sprintf(name_temp, "Tags_name_%d", i);
-              if (! (attr=dataFile.get_att(name_temp)) ) {
-                  delete[] Tags_keys;
-                  sprintf(error_msg,"get_att(%s)", name_temp);
-                  cleanupAndThrow(mesh_p, error_msg);
-              }
-              boost::scoped_array<char> name(attr->as_string(0));
-              delete attr;
-              Dudley_Mesh_addTagMap(mesh_p, name.get(), Tags_keys[i]);
-            }
-            delete[] Tags_keys;
-          }
-        }
-   
-        if (Dudley_noError()) {
-            // Nodes_DofDistribution
-            first_DofComponent = new index_t[mpi_size+1];
-            if (! ( nc_var_temp = dataFile.get_var("Nodes_DofDistribution")) ) {
-                delete[] first_DofComponent;
-                cleanupAndThrow(mesh_p, "get_var(Nodes_DofDistribution)");
-            }
-            if (! nc_var_temp->get(&first_DofComponent[0], mpi_size+1) ) {
-                delete[] first_DofComponent;
-                cleanupAndThrow(mesh_p, "get(Nodes_DofDistribution)");
-            }
-
-            // Nodes_NodeDistribution
-            first_NodeComponent = new index_t[mpi_size+1];
-            if (! ( nc_var_temp = dataFile.get_var("Nodes_NodeDistribution")) ) {
-                delete[] first_DofComponent;
-                delete[] first_NodeComponent;
-                cleanupAndThrow(mesh_p, "get_var(Nodes_NodeDistribution)");
-            }
-            if (! nc_var_temp->get(&first_NodeComponent[0], mpi_size+1) ) {
-                delete[] first_DofComponent;
-                delete[] first_NodeComponent;
-                cleanupAndThrow(mesh_p, "get(Nodes_NodeDistribution)");
-            }
-            Dudley_Mesh_createMappings(mesh_p, first_DofComponent, first_NodeComponent);
-            delete[] first_DofComponent;
-            delete[] first_NodeComponent;
-        }
-
-    } /* Dudley_noError() after Dudley_Mesh_alloc() */
-
-    checkDudleyError();
-    AbstractContinuousDomain* dom=new MeshAdapter(mesh_p);
-
-    if (! Dudley_noError()) {
-        Dudley_Mesh_free(mesh_p);
-    }
-
-    blocktimer_increment("LoadMesh()", blocktimer_start);
-    return dom->getPtr();
-#else
-    throw DataException("loadMesh: not compiled with NetCDF. Please contact your installation manager.");
-#endif /* USE_NETCDF */
-  }
-
-  Domain_ptr readMesh(const std::string& fileName,
-                      int integrationOrder,
-                      int reducedIntegrationOrder,
-                      int optimize)
-  {
-    //
-    // create a copy of the filename to overcome the non-constness of call
-    // to Dudley_Mesh_read
-    Dudley_Mesh* fMesh=0;
-    // Win32 refactor
-    if( fileName.size() == 0 )
-    {
-       throw DataException("Null file name!");
-    }
-
-    char *fName = new char[fileName.size()+1];
-        
-    strcpy(fName,fileName.c_str());
-    double blocktimer_start = blocktimer_time();
-
-    fMesh=Dudley_Mesh_read(fName,integrationOrder, reducedIntegrationOrder, (optimize ? TRUE : FALSE));
-    checkDudleyError();
-    AbstractContinuousDomain* temp=new MeshAdapter(fMesh);
-    
-    delete[] fName;
-    
-    blocktimer_increment("ReadMesh()", blocktimer_start);
-    return temp->getPtr();
-  }
-
-  Domain_ptr readGmsh(const std::string& fileName,
-                                     int numDim,
-                                     int integrationOrder,
-                                     int reducedIntegrationOrder,
-                                     int optimize,
-                                     int useMacroElements)
-  {
-    //
-    // create a copy of the filename to overcome the non-constness of call
-    // to Dudley_Mesh_read
-    Dudley_Mesh* fMesh=0;
-    // Win32 refactor
-    if( fileName.size() == 0 )
-    {
-       throw DataException("Null file name!");
-    }
-
-    char *fName = new char[fileName.size()+1];
-        
-    strcpy(fName,fileName.c_str());
-    double blocktimer_start = blocktimer_time();
-
-    fMesh=Dudley_Mesh_readGmsh(fName, numDim, integrationOrder, reducedIntegrationOrder, (optimize ? TRUE : FALSE), (useMacroElements ? TRUE : FALSE));
-    checkDudleyError();
-    AbstractContinuousDomain* temp=new MeshAdapter(fMesh);
-    
-    delete[] fName;
-    
-    blocktimer_increment("ReadGmsh()", blocktimer_start);
-    return temp->getPtr();
-  }
-
-  Domain_ptr brick(esysUtils::JMPI& mpi_info, double n0, double n1,double n2,int order,
-                   double l0,double l1,double l2,
-                   int periodic0,int periodic1,
-                   int periodic2,
-                   int integrationOrder,
-                   int reducedIntegrationOrder,
-                   int useElementsOnFace,
-                   int useFullElementOrder,
-                   int optimize)
-  {
-    int numElements[]={static_cast<int>(n0),static_cast<int>(n1),static_cast<int>(n2)};
-    double length[]={l0,l1,l2};
-
-    if (periodic0 || periodic1) // we don't support periodic boundary conditions
-    {
-        throw DudleyAdapterException("Dudley does not support periodic boundary conditions.");
-    }
-    else if (integrationOrder>3 || reducedIntegrationOrder>1)
-    {
-        throw DudleyAdapterException("Dudley does not support the requested integrationOrders.");
-    }
-    else if (useElementsOnFace || useFullElementOrder)
-    {
-        throw DudleyAdapterException("Dudley does not support useElementsOnFace or useFullElementOrder.");
-    }
-    if (order>1)
-    {
-        throw DudleyAdapterException("Dudley does not support element order greater than 1.");
-    }
-
-    //
-    // linearInterpolation
-    Dudley_Mesh* fMesh=NULL;
-
-    fMesh=Dudley_TriangularMesh_Tet4(numElements, length, integrationOrder,
-                        reducedIntegrationOrder, (optimize ? TRUE : FALSE),
-                        mpi_info);
-
-    //
-    // Convert any dudley errors into a C++ exception
-    checkDudleyError();
-    AbstractContinuousDomain* temp=new MeshAdapter(fMesh);
-    return temp->getPtr();
-  }
-
-  Domain_ptr brick_driver(const boost::python::list& args)
-  {
-      using boost::python::extract;
-
-//       // we need to convert lists to stl vectors
-//       boost::python::list pypoints=extract<boost::python::list>(args[15]);
-//       boost::python::list pytags=extract<boost::python::list>(args[16]);
-//       int numpts=extract<int>(pypoints.attr("__len__")());
-//       int numtags=extract<int>(pytags.attr("__len__")());
-//       vector<double> points;
-//       vector<int> tags;
-//       tags.resize(numtags, -1);
-//       for (int i=0;i<numpts;++i) {
-//           boost::python::object temp=pypoints[i];
-//           int l=extract<int>(temp.attr("__len__")());
-//           for (int k=0;k<l;++k) {
-//               points.push_back(extract<double>(temp[k]));           
-//           }
-//       }
-//       map<string, int> namestonums;
-//       int curmax=40; // bricks use up to 30
-//       for (int i=0;i<numtags;++i) {
-//           extract<int> ex_int(pytags[i]);
-//           extract<string> ex_str(pytags[i]);
-//           if (ex_int.check()) {
-//               tags[i]=ex_int();
-//               if (tags[i]>= curmax) {
-//                   curmax=tags[i]+1;
-//               }
-//           } else if (ex_str.check()) {
-//               string s=ex_str();
-//               map<string, int>::iterator it=namestonums.find(s);
-//               if (it!=namestonums.end()) {
-//                   // we have the tag already so look it up
-//                   tags[i]=it->second;
-//               } else {
-//                   namestonums[s]=curmax;
-//                   tags[i]=curmax;
-//                   curmax++;
-//               }
-//           } else {
-//               throw DudleyAdapterException("Error - Unable to extract tag value.");
-//           }
-//         
-//       }
-      boost::python::object pworld=args[15];
-      esysUtils::JMPI info;
-      if (!pworld.is_none())
-      {
-	  extract<SubWorld_ptr> ex(pworld);
-	  if (!ex.check())
-	  {	  
-	      throw DudleyAdapterException("Invalid escriptworld parameter.");
-	  }
-	  info=ex()->getMPI();
-      }
-      else
-      {
-	  info=esysUtils::makeInfo(MPI_COMM_WORLD);
-
-      }
-      return brick(info, static_cast<int>(extract<float>(args[0])),
-                   static_cast<int>(extract<float>(args[1])),
-                   static_cast<int>(extract<float>(args[2])),
-                   extract<int>(args[3]), extract<double>(args[4]),
-                   extract<double>(args[5]), extract<double>(args[6]),
-                   extract<int>(args[7]), extract<int>(args[8]),
-                   extract<int>(args[9]), extract<int>(args[10]),
-                   extract<int>(args[11]), extract<int>(args[12]),
-                   extract<int>(args[13]), extract<int>(args[14])
-                   );
-  }  
-  
-  
-  Domain_ptr rectangle_driver(const boost::python::list& args)
-  {
-      using boost::python::extract;
-/*
-      // we need to convert lists to stl vectors
-      boost::python::list pypoints=extract<boost::python::list>(args[12]);
-      boost::python::list pytags=extract<boost::python::list>(args[13]);
-      int numpts=extract<int>(pypoints.attr("__len__")());
-      int numtags=extract<int>(pytags.attr("__len__")());
-      vector<double> points;
-      vector<int> tags;
-      tags.resize(numtags, -1);
-      for (int i=0;i<numpts;++i)
-      {
-          boost::python::object temp=pypoints[i];
-          int l=extract<int>(temp.attr("__len__")());
-          for (int k=0;k<l;++k)
-          {
-              points.push_back(extract<double>(temp[k]));           
-          }
-      }
-      map<string, int> tagstonames;
-      int curmax=40;
-      // but which order to assign tags to names?????
-      for (int i=0;i<numtags;++i)
-      {
-          extract<int> ex_int(pytags[i]);
-          extract<string> ex_str(pytags[i]);
-          if (ex_int.check())
-          {
-              tags[i]=ex_int();
-              if (tags[i]>= curmax)
-              {
-                  curmax=tags[i]+1;
-              }
-          } 
-          else if (ex_str.check())
-          {
-              string s=ex_str();
-              map<string, int>::iterator it=tagstonames.find(s);
-              if (it!=tagstonames.end())
-              {
-                  // we have the tag already so look it up
-                  tags[i]=it->second;
-              }
-              else
-              {
-                  tagstonames[s]=curmax;
-                  tags[i]=curmax;
-                  curmax++;
-              }
-          }
-          else
-          {
-              throw DudleyAdapterException("Error - Unable to extract tag value.");
-          }
-      }*/
-      boost::python::object pworld=args[12];
-      esysUtils::JMPI info;
-      if (!pworld.is_none())
-      {
-          extract<SubWorld_ptr> ex(pworld);
-	  if (!ex.check())
-	  {
-	      throw DudleyAdapterException("Invalid escriptworld parameter.");
-          }
-          info=ex()->getMPI();
-      }
-      else
-      {
-          info=esysUtils::makeInfo(MPI_COMM_WORLD);
-      }
-
-      return rectangle(info, static_cast<int>(extract<float>(args[0])),
-                       static_cast<int>(extract<float>(args[1])),
-                       extract<int>(args[2]), extract<double>(args[3]),
-                       extract<double>(args[4]), extract<int>(args[5]),
-                       extract<int>(args[6]), extract<int>(args[7]),
-                       extract<int>(args[8]), extract<int>(args[9]),
-                       extract<int>(args[10]), extract<int>(args[11]) 
-		       );
-  }  
-  
-  
-  
-  Domain_ptr rectangle(esysUtils::JMPI& mpi_info, double n0, double n1, int order,
-                       double l0, double l1,
-                       int periodic0,int periodic1,
-                       int integrationOrder,
-                       int reducedIntegrationOrder,
-                       int useElementsOnFace,
-                       int useFullElementOrder,
-                       int optimize)
-  {
-    int numElements[]={static_cast<int>(n0), static_cast<int>(n1)};
-    double length[]={l0,l1};
-
-    if (periodic0 || periodic1) // we don't support periodic boundary conditions
-    {
-        throw DudleyAdapterException("Dudley does not support periodic boundary conditions.");
-    }
-    else if (integrationOrder>3 || reducedIntegrationOrder>1)
-    {
-        throw DudleyAdapterException("Dudley does not support the requested integrationOrders.");
-    }
-    else if (useElementsOnFace || useFullElementOrder)
-    {
-        throw DudleyAdapterException("Dudley does not support useElementsOnFace or useFullElementOrder.");
-    }
-
-    if (order>1)
-    {
-        throw DudleyAdapterException("Dudley does not support element order greater than 1.");
-    }
-    Dudley_Mesh* fMesh=Dudley_TriangularMesh_Tri3(numElements, length,
-          integrationOrder, reducedIntegrationOrder, (optimize ? TRUE : FALSE),
-          mpi_info);
-    //
-    // Convert any dudley errors into a C++ exception
-    checkDudleyError();
-    MeshAdapter* ma=new MeshAdapter(fMesh);
-    return Domain_ptr(ma);
-  }
-
-  // end of namespace
-
-}
-
diff --git a/dudley/src/CPPAdapter/MeshAdapterFactory.h b/dudley/src/CPPAdapter/MeshAdapterFactory.h
deleted file mode 100644
index eef4abe..0000000
--- a/dudley/src/CPPAdapter/MeshAdapterFactory.h
+++ /dev/null
@@ -1,168 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  dudley_MeshAdapterFactory_20040526_H
-#define dudley_MeshAdapterFactory_20040526_H
-#include "system_dep.h"
-
-#include "MeshAdapter.h"
-
-#include "escript/AbstractContinuousDomain.h"
-
-#include <boost/python/list.hpp>
-
-#include <sstream>
-
-namespace dudley {
-  /**
-     \brief
-     A suite of factory methods for creating various MeshAdapters.
-
-     Description:
-     A suite of factory methods for creating various MeshAdapters.
-  */
- 
-  /**
-     \brief
-     recovers mesg from a dump file
-     \param fileName Input -  The name of the file.
-  */
-  DUDLEY_DLL_API
-/*  escript::AbstractContinuousDomain* loadMesh(const std::string& fileName);*/
-  escript::Domain_ptr loadMesh(const std::string& fileName);
-  /**
-     \brief
-     Read a mesh from a file. For MPI parallel runs fan out the mesh to multiple processes.
-     \param fileName Input -  The name of the file.
-     \param integrationOrder Input - order of the quadrature scheme.  
-     If integrationOrder<0 the integration order is selected independently.
-     \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-     If reducedIntegrationOrder<0 the integration order is selected independently.
-     \param optimize Input - switches on the optimization of node labels 
-  */
-  DUDLEY_DLL_API
-//   escript::AbstractContinuousDomain* readMesh(const std::string& fileName,
-   escript::Domain_ptr readMesh(const std::string& fileName,
-				     int integrationOrder=-1,
-                                     int reducedIntegrationOrder=-1,
-                                     int optimize=0);
-  /**
-     \brief
-     Read a gmsh mesh file
-     \param fileName Input -  The name of the file.
-     \param numDim Input -  spatial dimension
-     \param integrationOrder Input - order of the quadrature scheme.  
-     If integrationOrder<0 the integration order is selected independently.
-     \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-     If reducedIntegrationOrder<0 the integration order is selected independently.
-     \param optimize Input - switches on the optimization of node labels 
-     \param useMacroElements
-  */
-  DUDLEY_DLL_API
-//   escript::AbstractContinuousDomain* readGmsh(const std::string& fileName,
-  escript::Domain_ptr readGmsh(const std::string& fileName,
-				     int numDim, 
-				     int integrationOrder=-1,
-				     int reducedIntegrationOrder=-1, 
-				     int optimize=0,
-				     int useMacroElements=0);
-				     
-				     
-   /**
-   \brief Python driver for brick()
-   \param args see brick() definition for order of params
-   */
-   DUDLEY_DLL_API
-   escript::Domain_ptr brick_driver(const boost::python::list& args);
-
-   /**
-   \brief Python driver for rectangle()
-   \param args see rectangle() definition for order of params
-   */
-   DUDLEY_DLL_API
-   escript::Domain_ptr rectangle_driver(const boost::python::list& args);   
-   
-  /**
-     \brief
-     Creates a rectangular mesh with n0 x n1 x n2 elements over the brick 
-     [0,l0] x [0,l1] x [0,l2].
-
-     \param n0,n1,n2 Input - number of elements in each dimension
-     \param order Input - =1, =-1 or =2 gives the order of shape function (-1= macro elements of order 1)
-     \param l0,l1,l2 Input - length of each side of brick
-     \param integrationOrder Input - order of the quadrature scheme.  
-     If integrationOrder<0 the integration order is selected independently.
-     \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-     If reducedIntegrationOrder<0 the integration order is selected independently.
-     \param useElementsOnFace Input - whether or not to use elements on face
-     \param periodic0, periodic1, periodic2 Input - whether or not boundary 
-     conditions of the dimension are periodic
-     \param useFullElementOrder
-     \param optimize
-  */
-  DUDLEY_DLL_API
-  escript::Domain_ptr brick(esysUtils::JMPI& mpi_info, double n0=1,double n1=1,double n2=1,int order=1,
-                    double l0=1.0,double l1=1.0,double l2=1.0,
-                    int periodic0=0,int periodic1=0,
-                    int periodic2=0,
-                    int integrationOrder=-1,
-                    int reducedIntegrationOrder=-1,
-                    int useElementsOnFace=0,
-                    int useFullElementOrder=0,
-                    int optimize=0);
-
-  /**
-     \brief
-     Creates a rectangular mesh with n0 x n1 elements over the brick 
-     [0,l0] x [0,l1].
-
-     \param n0,n1 Input - number of elements in each dimension [We only except floats for py transition]
-     \param order Input - =1, =-1 or =2 gives the order of shape function (-1= macro elements of order 1)
-     \param l0,l1 Input - length of each side of brick
-     \param integrationOrder Input - order of the quadrature scheme. 
-     If integrationOrder<0 the integration order is selected 
-     independently.
-     \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-     If reducedIntegrationOrder<0 the integration order is selected independently.
-     \param periodic0, periodic1 Input - whether or not the boundary
-     conditions of the dimension are periodic
-     \param useElementsOnFace Input - whether or not to use elements on face
-     \param useFullElementOrder
-     \param optimize
-  */
-  DUDLEY_DLL_API
-  escript::Domain_ptr rectangle(esysUtils::JMPI& mpi_info, double n0=1,double n1=1,int order=1,
-                                      double l0=1.0, double l1=1.0,
-                                      int periodic0=false,int periodic1=false,
-                                      int integrationOrder=-1,
-                                      int reducedIntegrationOrder=-1,
-                                      int useElementsOnFace=0,
-                                      int useFullElementOrder=0,
-                                      int optimize=0);
-
-//  /**
-//     \brief
-//     Merges a list of meshes into one list.
-//     \param meshList Input - The list of meshes.
-//  */
-//  DUDLEY_DLL_API
-// //   escript::AbstractContinuousDomain* meshMerge(const boost::python::list& meshList);
-//  escript::Domain_ptr meshMerge(const boost::python::list& meshList);
-
- 
-} // end of namespace
-#endif
diff --git a/dudley/src/DomainFactory.cpp b/dudley/src/DomainFactory.cpp
new file mode 100644
index 0000000..8e48717
--- /dev/null
+++ b/dudley/src/DomainFactory.cpp
@@ -0,0 +1,473 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <dudley/DomainFactory.h>
+
+#include <escript/index.h>
+#include <escript/SubWorld.h>
+
+#ifdef ESYS_HAVE_NETCDF
+#include <netcdfcpp.h>
+#endif
+
+#include <boost/python/extract.hpp>
+#include <boost/scoped_array.hpp>
+
+#include <sstream>
+
+using namespace std;
+using namespace escript;
+namespace bp = boost::python;
+
+namespace dudley {
+
+#ifdef ESYS_HAVE_NETCDF
+// A convenience method to retrieve an integer attribute from a NetCDF file
+template<typename T>
+T ncReadAtt(NcFile* dataFile, const string& fName, const string& attrName)
+{
+    NcAtt* attr = dataFile->get_att(attrName.c_str());
+    if (!attr) {
+        stringstream msg;
+        msg << "loadMesh: Error retrieving integer attribute '" << attrName
+            << "' from NetCDF file '" << fName << "'";
+        throw IOError(msg.str());
+    }
+    T value = (sizeof(T) > 4 ? attr->as_long(0) : attr->as_int(0));
+    delete attr;
+    return value;
+}
+#endif
+
+inline void cleanupAndThrow(DudleyDomain* dom, string msg)
+{
+    delete dom;
+    string msgPrefix("loadMesh: NetCDF operation failed - ");
+    throw IOError(msgPrefix+msg);
+}
+
+Domain_ptr DudleyDomain::load(const string& fileName)
+{
+#ifdef ESYS_HAVE_NETCDF
+    JMPI mpiInfo = makeInfo(MPI_COMM_WORLD);
+    const string fName(mpiInfo->appendRankToFileName(fileName));
+
+    // Open NetCDF file for reading
+    NcAtt *attr;
+    NcVar *nc_var_temp;
+    // netCDF error handler
+    NcError err(NcError::silent_nonfatal);
+    // Create the NetCDF file.
+    NcFile dataFile(fName.c_str(), NcFile::ReadOnly);
+    if (!dataFile.is_valid()) {
+        stringstream msg;
+        msg << "loadMesh: Opening NetCDF file '" << fName << "' for reading failed.";
+        throw IOError(msg.str());
+    }
+
+    // Read NetCDF integer attributes
+
+    // index_size was only introduced with 64-bit index support so fall back
+    // to 32 bits if not found.
+    int index_size;
+    try {
+        index_size = ncReadAtt<int>(&dataFile, fName, "index_size");
+    } catch (IOError& e) {
+        index_size = 4;
+    }
+    // technically we could cast if reading 32-bit data on 64-bit escript
+    // but cost-benefit analysis clearly favours this implementation for now
+    if (sizeof(index_t) != index_size) {
+        throw IOError("loadMesh: size of index types at runtime differ from dump file");
+    }
+
+    int mpi_size = ncReadAtt<int>(&dataFile, fName, "mpi_size");
+    int mpi_rank = ncReadAtt<int>(&dataFile, fName, "mpi_rank");
+    int numDim = ncReadAtt<int>(&dataFile, fName, "numDim");
+    dim_t numNodes = ncReadAtt<dim_t>(&dataFile, fName, "numNodes");
+    dim_t num_Elements = ncReadAtt<dim_t>(&dataFile, fName, "num_Elements");
+    dim_t num_FaceElements = ncReadAtt<dim_t>(&dataFile, fName, "num_FaceElements");
+    dim_t num_Points = ncReadAtt<dim_t>(&dataFile, fName, "num_Points");
+    int num_Elements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_Elements_numNodes");
+    int Elements_TypeId = ncReadAtt<int>(&dataFile, fName, "Elements_TypeId");
+    int num_FaceElements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_FaceElements_numNodes");
+    int FaceElements_TypeId = ncReadAtt<int>(&dataFile, fName, "FaceElements_TypeId");
+    int Points_TypeId = ncReadAtt<int>(&dataFile, fName, "Points_TypeId");
+    int num_Tags = ncReadAtt<int>(&dataFile, fName, "num_Tags");
+
+    // Verify size and rank
+    if (mpiInfo->size != mpi_size) {
+        stringstream msg;
+        msg << "loadMesh: The NetCDF file '" << fName
+            << "' can only be read on " << mpi_size
+            << " CPUs. Currently running: " << mpiInfo->size;
+        throw DudleyException(msg.str());
+    }
+    if (mpiInfo->rank != mpi_rank) {
+        stringstream msg;
+        msg << "loadMesh: The NetCDF file '" << fName
+            << "' should be read on CPU #" << mpi_rank
+            << " and NOT on #" << mpiInfo->rank;
+        throw DudleyException(msg.str());
+    }
+
+    // Read mesh name
+    if (! (attr=dataFile.get_att("Name")) ) {
+        stringstream msg;
+        msg << "loadMesh: Error retrieving mesh name from NetCDF file '"
+            << fName << "'";
+        throw IOError(msg.str());
+    }
+    boost::scoped_array<char> name(attr->as_string(0));
+    delete attr;
+
+    // allocate mesh
+    DudleyDomain* dom = new DudleyDomain(name.get(), numDim, mpiInfo);
+
+    // read nodes
+    NodeFile* nodes = dom->getNodes();
+    nodes->allocTable(numNodes);
+    // Nodes_Id
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_Id")) )
+        cleanupAndThrow(dom, "get_var(Nodes_Id)");
+    if (! nc_var_temp->get(&nodes->Id[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_Id)");
+    // Nodes_Tag
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_Tag")) )
+        cleanupAndThrow(dom, "get_var(Nodes_Tag)");
+    if (! nc_var_temp->get(&nodes->Tag[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_Tag)");
+    // Nodes_gDOF
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_gDOF")) )
+        cleanupAndThrow(dom, "get_var(Nodes_gDOF)");
+    if (! nc_var_temp->get(&nodes->globalDegreesOfFreedom[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_gDOF)");
+    // Nodes_gNI
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_gNI")) )
+        cleanupAndThrow(dom, "get_var(Nodes_gNI)");
+    if (! nc_var_temp->get(&nodes->globalNodesIndex[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_gNI)");
+    // Nodes_Coordinates
+    if (!(nc_var_temp = dataFile.get_var("Nodes_Coordinates")))
+        cleanupAndThrow(dom, "get_var(Nodes_Coordinates)");
+    if (! nc_var_temp->get(&nodes->Coordinates[0], numNodes, numDim) )
+        cleanupAndThrow(dom, "get(Nodes_Coordinates)");
+
+    nodes->updateTagList();
+
+    // read elements
+    ElementFile* elements = new ElementFile((ElementTypeId)Elements_TypeId, mpiInfo);
+    dom->setElements(elements);
+    elements->allocTable(num_Elements);
+    elements->minColor = 0;
+    elements->maxColor = num_Elements-1;
+    if (num_Elements > 0) {
+       // Elements_Id
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Id")) )
+           cleanupAndThrow(dom, "get_var(Elements_Id)");
+       if (! nc_var_temp->get(&elements->Id[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Id)");
+       // Elements_Tag
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Tag")) )
+           cleanupAndThrow(dom, "get_var(Elements_Tag)");
+       if (! nc_var_temp->get(&elements->Tag[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Tag)");
+       // Elements_Owner
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Owner")) )
+           cleanupAndThrow(dom, "get_var(Elements_Owner)");
+       if (! nc_var_temp->get(&elements->Owner[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Owner)");
+       // Elements_Color
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Color")) )
+           cleanupAndThrow(dom, "get_var(Elements_Color)");
+       if (! nc_var_temp->get(&elements->Color[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Color)");
+       // Elements_Nodes
+       int* Elements_Nodes = new int[num_Elements*num_Elements_numNodes];
+       if (!(nc_var_temp = dataFile.get_var("Elements_Nodes"))) {
+           delete[] Elements_Nodes;
+           cleanupAndThrow(dom, "get_var(Elements_Nodes)");
+       }
+       if (! nc_var_temp->get(&Elements_Nodes[0], num_Elements, num_Elements_numNodes) ) {
+           delete[] Elements_Nodes;
+           cleanupAndThrow(dom, "get(Elements_Nodes)");
+       }
+
+       // Copy temp array into elements->Nodes
+       for (index_t i = 0; i < num_Elements; i++) {
+           for (int j = 0; j < num_Elements_numNodes; j++) {
+               elements->Nodes[INDEX2(j,i,num_Elements_numNodes)]
+                    = Elements_Nodes[INDEX2(j,i,num_Elements_numNodes)];
+           }
+       }
+       delete[] Elements_Nodes;
+    } // num_Elements > 0
+    elements->updateTagList();
+
+    // get the face elements
+    ElementFile* faces = new ElementFile((ElementTypeId)FaceElements_TypeId, mpiInfo);
+    dom->setFaceElements(faces);
+    faces->allocTable(num_FaceElements);
+    faces->minColor = 0;
+    faces->maxColor = num_FaceElements-1;
+    if (num_FaceElements > 0) {
+        // FaceElements_Id
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Id")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Id)");
+        if (! nc_var_temp->get(&faces->Id[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Id)");
+        // FaceElements_Tag
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Tag")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Tag)");
+        if (! nc_var_temp->get(&faces->Tag[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Tag)");
+        // FaceElements_Owner
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Owner")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Owner)");
+        if (! nc_var_temp->get(&faces->Owner[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Owner)");
+        // FaceElements_Color
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Color")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Color)");
+        if (! nc_var_temp->get(&faces->Color[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Color)");
+        // FaceElements_Nodes
+        int* FaceElements_Nodes = new int[num_FaceElements*num_FaceElements_numNodes];
+        if (!(nc_var_temp = dataFile.get_var("FaceElements_Nodes"))) {
+            delete[] FaceElements_Nodes;
+            cleanupAndThrow(dom, "get_var(FaceElements_Nodes)");
+        }
+        if (! nc_var_temp->get(&(FaceElements_Nodes[0]), num_FaceElements, num_FaceElements_numNodes) ) {
+            delete[] FaceElements_Nodes;
+            cleanupAndThrow(dom, "get(FaceElements_Nodes)");
+        }
+        // Copy temp array into faces->Nodes
+        for (index_t i = 0; i < num_FaceElements; i++) {
+            for (int j = 0; j < num_FaceElements_numNodes; j++) {
+                faces->Nodes[INDEX2(j,i,num_FaceElements_numNodes)] = FaceElements_Nodes[INDEX2(j,i,num_FaceElements_numNodes)];
+            }
+        }
+        delete[] FaceElements_Nodes;
+    } // num_FaceElements > 0
+    faces->updateTagList();
+
+    // get the Points (nodal elements)
+    ElementFile* points = new ElementFile((ElementTypeId)Points_TypeId, mpiInfo);
+    dom->setPoints(points);
+    points->allocTable(num_Points);
+    points->minColor = 0;
+    points->maxColor = num_Points-1;
+    if (num_Points > 0) {
+        // Points_Id
+        if (! ( nc_var_temp = dataFile.get_var("Points_Id")))
+            cleanupAndThrow(dom, "get_var(Points_Id)");
+        if (! nc_var_temp->get(&points->Id[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Id)");
+        // Points_Tag
+        if (! ( nc_var_temp = dataFile.get_var("Points_Tag")))
+            cleanupAndThrow(dom, "get_var(Points_Tag)");
+        if (! nc_var_temp->get(&points->Tag[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Tag)");
+        // Points_Owner
+        if (! ( nc_var_temp = dataFile.get_var("Points_Owner")))
+            cleanupAndThrow(dom, "get_var(Points_Owner)");
+        if (!nc_var_temp->get(&points->Owner[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Owner)");
+        // Points_Color
+        if (! ( nc_var_temp = dataFile.get_var("Points_Color")))
+            cleanupAndThrow(dom, "get_var(Points_Color)");
+        if (!nc_var_temp->get(&points->Color[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Color)");
+        // Points_Nodes
+        int* Points_Nodes = new int[num_Points];
+        if (!(nc_var_temp = dataFile.get_var("Points_Nodes"))) {
+            delete[] Points_Nodes;
+            cleanupAndThrow(dom, "get_var(Points_Nodes)");
+        }
+        if (! nc_var_temp->get(&Points_Nodes[0], num_Points) ) {
+            delete[] Points_Nodes;
+            cleanupAndThrow(dom, "get(Points_Nodes)");
+        }
+        // Copy temp array into points->Nodes
+        for (index_t i = 0; i < num_Points; i++) {
+            points->Id[points->Nodes[INDEX2(0,i,1)]] = Points_Nodes[i];
+        }
+        delete[] Points_Nodes;
+    } // num_Points > 0
+    points->updateTagList();
+
+    // get the tags
+    if (num_Tags > 0) {
+        // Temp storage to gather node IDs
+        int *Tags_keys = new int[num_Tags];
+        char name_temp[4096];
+        int i;
+
+        // Tags_keys
+        if (! ( nc_var_temp = dataFile.get_var("Tags_keys")) ) {
+            delete[] Tags_keys;
+            cleanupAndThrow(dom, "get_var(Tags_keys)");
+        }
+        if (! nc_var_temp->get(&Tags_keys[0], num_Tags) ) {
+            delete[] Tags_keys;
+            cleanupAndThrow(dom, "get(Tags_keys)");
+        }
+        for (i=0; i<num_Tags; i++) {
+          // Retrieve tag name
+          sprintf(name_temp, "Tags_name_%d", i);
+          if (! (attr=dataFile.get_att(name_temp)) ) {
+              delete[] Tags_keys;
+              stringstream msg;
+              msg << "get_att(" << name_temp << ")";
+              cleanupAndThrow(dom, msg.str());
+          }
+          boost::scoped_array<char> name(attr->as_string(0));
+          delete attr;
+          dom->setTagMap(name.get(), Tags_keys[i]);
+        }
+        delete[] Tags_keys;
+    }
+
+    // Nodes_DofDistribution
+    IndexVector first_DofComponent(mpi_size+1);
+    if (! (nc_var_temp = dataFile.get_var("Nodes_DofDistribution")) ) {
+        cleanupAndThrow(dom, "get_var(Nodes_DofDistribution)");
+    }
+    if (!nc_var_temp->get(&first_DofComponent[0], mpi_size+1)) {
+        cleanupAndThrow(dom, "get(Nodes_DofDistribution)");
+    }
+
+    // Nodes_NodeDistribution
+    IndexVector first_NodeComponent(mpi_size+1);
+    if (! (nc_var_temp = dataFile.get_var("Nodes_NodeDistribution")) ) {
+        cleanupAndThrow(dom, "get_var(Nodes_NodeDistribution)");
+    }
+    if (!nc_var_temp->get(&first_NodeComponent[0], mpi_size+1)) {
+        cleanupAndThrow(dom, "get(Nodes_NodeDistribution)");
+    }
+    dom->createMappings(first_DofComponent, first_NodeComponent);
+
+    return dom->getPtr();
+#else
+    throw DudleyException("loadMesh: not compiled with NetCDF. Please contact your installation manager.");
+#endif // ESYS_HAVE_NETCDF
+}
+
+Domain_ptr readMesh(const string& fileName, int /*integrationOrder*/,
+                    int /*reducedIntegrationOrder*/, bool optimize)
+{
+    JMPI mpiInfo = makeInfo(MPI_COMM_WORLD);
+    return DudleyDomain::read(mpiInfo, fileName, optimize);
+}
+
+Domain_ptr readGmsh(const string& fileName, int numDim,
+                    int /*integrationOrder*/, int /*reducedIntegrationOrder*/,
+                    bool optimize)
+{
+    JMPI mpiInfo = makeInfo(MPI_COMM_WORLD);
+    return DudleyDomain::readGmsh(mpiInfo, fileName, numDim, optimize);
+}
+
+Domain_ptr brick(JMPI info, dim_t n0, dim_t n1, dim_t n2, int order,
+                 double l0, double l1, double l2,
+                 bool periodic0, bool periodic1, bool periodic2,
+                 int integrationOrder, int reducedIntegrationOrder,
+                 bool useElementsOnFace, bool useFullElementOrder,
+                 bool optimize)
+{
+    // we don't support periodic boundary conditions
+    if (periodic0 || periodic1)
+        throw ValueError("Dudley does not support periodic boundary conditions.");
+
+    if (integrationOrder > 3 || reducedIntegrationOrder > 1)
+        throw ValueError("Dudley does not support the requested integration order.");
+
+    if (useElementsOnFace || useFullElementOrder)
+        throw ValueError("Dudley does not support useElementsOnFace or useFullElementOrder.");
+
+    if (order > 1)
+        throw ValueError("Dudley does not support element order greater than 1.");
+
+    return DudleyDomain::create3D(n0, n1, n2, l0, l1, l2, optimize, info);
+}
+
+Domain_ptr brick_driver(const bp::list& args)
+{
+    bp::object pworld = args[15];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+    return brick(info, static_cast<dim_t>(bp::extract<float>(args[0])),
+                 static_cast<dim_t>(bp::extract<float>(args[1])),
+                 static_cast<dim_t>(bp::extract<float>(args[2])),
+                 bp::extract<int>(args[3]), bp::extract<double>(args[4]),
+                 bp::extract<double>(args[5]), bp::extract<double>(args[6]),
+                 bp::extract<int>(args[7]), bp::extract<int>(args[8]),
+                 bp::extract<int>(args[9]), bp::extract<int>(args[10]),
+                 bp::extract<int>(args[11]), bp::extract<int>(args[12]),
+                 bp::extract<int>(args[13]), bp::extract<int>(args[14])
+                 );
+}
+
+Domain_ptr rectangle(JMPI info, dim_t n0, dim_t n1, int order,
+                     double l0, double l1, bool periodic0, bool periodic1,
+                     int integrationOrder, int reducedIntegrationOrder,
+                     bool useElementsOnFace, bool useFullElementOrder,
+                     bool optimize)
+{
+    if (periodic0 || periodic1) // we don't support periodic boundary conditions
+        throw ValueError("Dudley does not support periodic boundary conditions.");
+    if (integrationOrder > 3 || reducedIntegrationOrder > 1)
+        throw ValueError("Dudley does not support the requested integrationorders.");
+    if (useElementsOnFace || useFullElementOrder)
+        throw ValueError("Dudley does not support useElementsOnFace or useFullElementOrder.");
+    if (order > 1)
+        throw ValueError("Dudley only supports first-order elements.");
+    return DudleyDomain::create2D(n0, n1, l0, l1, optimize, info);
+}
+
+Domain_ptr rectangle_driver(const bp::list& args)
+{
+    bp::object pworld = args[12];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+
+    return rectangle(info, static_cast<dim_t>(bp::extract<float>(args[0])),
+                     static_cast<dim_t>(bp::extract<float>(args[1])),
+                     bp::extract<int>(args[2]), bp::extract<double>(args[3]),
+                     bp::extract<double>(args[4]), bp::extract<int>(args[5]),
+                     bp::extract<int>(args[6]), bp::extract<int>(args[7]),
+                     bp::extract<int>(args[8]), bp::extract<int>(args[9]),
+                     bp::extract<int>(args[10]), bp::extract<int>(args[11])
+                     );
+}
+
+} // namespace dudley
diff --git a/dudley/src/DomainFactory.h b/dudley/src/DomainFactory.h
new file mode 100644
index 0000000..eccb5c0
--- /dev/null
+++ b/dudley/src/DomainFactory.h
@@ -0,0 +1,125 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __DUDLEY_DOMAINFACTORY_H__
+#define __DUDLEY_DOMAINFACTORY_H__
+
+#include <dudley/DudleyDomain.h>
+
+#include <boost/python/list.hpp>
+
+#include <sstream>
+
+/**
+    \brief
+    A suite of factory methods for creating 2D and 3D dudley domains.
+*/
+
+namespace dudley {
+
+/**
+    \brief
+    reads a mesh from a fly file. For MPI parallel runs fans out the mesh to
+    multiple processes.
+    \param fileName the name of the file
+    \param integrationOrder ignored
+    \param reducedIntegrationOrder ignored
+    \param optimize whether to optimize the node labels
+*/
+escript::Domain_ptr readMesh(const std::string& fileName,
+                             int integrationOrder = -1,
+                             int reducedIntegrationOrder = -1,
+                             bool optimize = false);
+
+/**
+    \brief
+    reads a gmsh mesh file
+    \param fileName the name of the file
+    \param numDim spatial dimensionality
+    \param integrationOrder ignored
+    \param reducedIntegrationOrder ignored
+    \param optimize whether to optimize the node labels 
+*/
+escript::Domain_ptr readGmsh(const std::string& fileName, int numDim,
+                             int integrationOrder = -1,
+                             int reducedIntegrationOrder = -1,
+                             bool optimize = false);
+
+/**
+    \brief
+    Creates a rectangular mesh with n0 x n1 x n2 elements over the brick 
+    [0,l0] x [0,l1] x [0,l2].
+
+    \param jmpi pointer to MPI world information structure
+    \param n0,n1,n2 number of elements in each dimension
+    \param order ignored
+    \param l0,l1,l2 length of each side of brick
+    \param integrationOrder ignored
+    \param reducedIntegrationOrder ignored
+    \param optimize
+*/
+escript::Domain_ptr brick(escript::JMPI jmpi,
+                    dim_t n0=1, dim_t n1=1, dim_t n2=1, int order=1,
+                    double l0=1.0, double l1=1.0, double l2=1.0,
+                    bool periodic0=false, bool periodic1=false, bool periodic2=false,
+                    int integrationOrder=-1, int reducedIntegrationOrder=-1,
+                    bool useElementsOnFace=false, bool useFullElementOrder=false,
+                    bool optimize=false);
+
+/**
+    \brief Python driver for brick()
+    \param args see brick() definition for order of params
+*/
+escript::Domain_ptr brick_driver(const boost::python::list& args);
+
+/**
+    \brief
+    Creates a 2-dimensional rectangular mesh with n0 x n1 x 2 Tri3 elements
+    over the rectangle [0,l0] x [0,l1]. The doubling of elements is due to
+    splitting of rectangular elements.
+
+    \param jmpi pointer to MPI world information structure
+    \param n0,n1 number of elements in each dimension
+    \param order ignored
+    \param l0,l1 length of each side of rectangle
+    \param periodic0,periodic1 ignored
+    \param integrationOrder ignored
+    \param reducedIntegrationOrder ignored
+    \param useElementsOnFace ignored
+    \param useFullElementOrder ignored
+    \param optimize whether to optimize labelling
+*/
+escript::Domain_ptr rectangle(escript::JMPI jmpi,
+                              dim_t n0 = 1, dim_t n1 = 1, int order = 1,
+                              double l0 = 1.0, double l1 = 1.0,
+                              bool periodic0 = false, bool periodic1 = false,
+                              int integrationOrder = -1,
+                              int reducedIntegrationOrder = -1,
+                              bool useElementsOnFace = false,
+                              bool useFullElementOrder = false,
+                              bool optimize = false);
+
+/**
+    \brief Python driver for rectangle()
+    \param args see rectangle() definition for order of params
+*/
+escript::Domain_ptr rectangle_driver(const boost::python::list& args);
+
+
+} // end of namespace
+
+#endif // __DUDLEY_DOMAINFACTORY_H__
+
diff --git a/dudley/src/Dudley.cpp b/dudley/src/Dudley.cpp
deleted file mode 100644
index 35b6314..0000000
--- a/dudley/src/Dudley.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*    Dudley finite element solver */
-
-/************************************************************************************/
-
-#include "Dudley.h"
-
-/* This function returns a time mark */
-double Dudley_timer(void)
-{
-    return Esys_timer();
-}
-
-/* This function checks if the pointer ptr has a target. If not an
-   error is raised and TRUE is returned. */
-bool Dudley_checkPtr(void *arg)
-{
-    return Esys_checkPtr(arg);
-}
-
-/* reset the error to NO_ERROR */
-void Dudley_resetError(void)
-{
-    Esys_resetError();
-}
-
-/* sets an error */
-void Dudley_setError(Dudley_ErrorCodeType err, __const char *msg)
-{
-    Esys_setError(err, msg);
-}
-
-/* checks if there is no error */
-bool Dudley_noError(void)
-{
-    return Esys_noError();
-}
-
-/* return the error code */
-Dudley_ErrorCodeType Dudley_getErrorType(void)
-{
-    return Esys_getErrorType();
-}
-
-/* return the error message */
-char *Dudley_getErrorMessage(void)
-{
-    return Esys_getErrorMessage();
-}
-
-/* return the error message */
-void Dudley_convertPasoError(void)
-{
-    /* nothing has to be done here */
-}
-
-/* checks that there is no error across all processes in a communicator */
-/* NOTE : does not make guarantee consistency of error string on each process */
-bool Dudley_MPI_noError(esysUtils::JMPI& mpi_info)
-{
-    return esysUtils::Esys_MPIInfo_noError(mpi_info);
-}
diff --git a/dudley/src/Dudley.h b/dudley/src/Dudley.h
index 1820b7d..b0185c8 100644
--- a/dudley/src/Dudley.h
+++ b/dudley/src/Dudley.h
@@ -14,54 +14,58 @@
 *
 *****************************************************************************/
 
-#ifndef INC_DUDLEY
-#define INC_DUDLEY
+#ifndef __DUDLEY_H__
+#define __DUDLEY_H__
 
-/************************************************************************************/
+/****************************************************************************/
 
 /*    Dudley finite element solver */
 
-/************************************************************************************/
+/****************************************************************************/
 
-#include "esysUtils/types.h"
-#include "esysUtils/Esys_MPI.h"
-#include "esysUtils/error.h"
-#include <cstring>
+#include <escript/DataTypes.h>
+
+#include <dudley/DudleyException.h>
+
+#include <escript/Data.h>
+#include <escript/EsysMPI.h>
+
+namespace dudley {
+
+using escript::DataTypes::index_t;
+using escript::DataTypes::dim_t;
+using escript::DataTypes::IndexVector;
 
-/************************************************************************************/
-/*#define Dudley_TRACE */
 #define DUDLEY_UNKNOWN -1
 #define DUDLEY_DEGREES_OF_FREEDOM 1
 #define DUDLEY_NODES 3
 #define DUDLEY_ELEMENTS 4
 #define DUDLEY_FACE_ELEMENTS 5
 #define DUDLEY_POINTS 6
-#define DUDLEY_REDUCED_DEGREES_OF_FREEDOM 2
-#define DUDLEY_REDUCED_NODES 14
 #define DUDLEY_REDUCED_ELEMENTS 10
 #define DUDLEY_REDUCED_FACE_ELEMENTS 11
 
-/* status stuff */
-typedef int Dudley_Status_t;
-#define Dudley_increaseStatus(self) ((self)->status)++
-#define DUDLEY_INITIAL_STATUS 0
+//
+// Codes for function space types supported
+enum {
+    DegreesOfFreedom = DUDLEY_DEGREES_OF_FREEDOM,
+    Nodes = DUDLEY_NODES,
+    Elements = DUDLEY_ELEMENTS,
+    ReducedElements = DUDLEY_REDUCED_ELEMENTS,
+    FaceElements = DUDLEY_FACE_ELEMENTS,
+    ReducedFaceElements = DUDLEY_REDUCED_FACE_ELEMENTS,
+    Points = DUDLEY_POINTS
+};
 
-/* error codes */
+inline bool hasReducedIntegrationOrder(const escript::Data& in)
+{
+    const int fs = in.getFunctionSpace().getTypeCode();
+    return (fs == ReducedElements || fs == ReducedFaceElements);
+}
 
-typedef Esys_ErrorCodeType Dudley_ErrorCodeType;
+#define DUDLEY_INITIAL_STATUS 0
 
-/* interfaces */
+} // namespace dudley
 
-double Dudley_timer(void);
-bool Dudley_checkPtr(void *);
-void Dudley_resetError(void);
-void Dudley_setError(Dudley_ErrorCodeType err, __const char *msg);
-bool Dudley_noError(void);
-Dudley_ErrorCodeType Dudley_getErrorType(void);
-char *Dudley_getErrorMessage(void);
-void Dudley_convertPasoError(void);
-bool Dudley_MPI_noError(esysUtils::JMPI& mpi_info);
-// void Dudley_setTagsInUse(const index_t Tag, const dim_t numTags, dim_t * numTagsInUse, index_t ** tagsInUse,
-// 			 esysUtils::JMPI& mpiinfo);
+#endif // __DUDLEY_H__
 
-#endif				/* #ifndef INC_DUDLEY */
diff --git a/dudley/src/DudleyDomain.cpp b/dudley/src/DudleyDomain.cpp
new file mode 100644
index 0000000..c010034
--- /dev/null
+++ b/dudley/src/DudleyDomain.cpp
@@ -0,0 +1,1733 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <dudley/DudleyDomain.h>
+#include <dudley/Assemble.h>
+#include <dudley/DudleyException.h>
+#include <dudley/IndexList.h>
+
+#include <escript/Data.h>
+#include <escript/DataFactory.h>
+#include <escript/Random.h>
+#include <escript/SolverOptions.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#include <paso/Transport.h>
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/TrilinosMatrixAdapter.h>
+
+using esys_trilinos::TrilinosMatrixAdapter;
+using esys_trilinos::const_TrilinosGraph_ptr;
+#endif
+
+#include <boost/scoped_array.hpp>
+
+#ifdef ESYS_HAVE_NETCDF
+#include <netcdfcpp.h>
+#endif
+
+using namespace std;
+namespace bp = boost::python;
+using escript::NotImplementedError;
+using escript::ValueError;
+
+namespace dudley {
+
+DudleyDomain::FunctionSpaceNamesMapType DudleyDomain::m_functionSpaceTypeNames;
+
+DudleyDomain::DudleyDomain(const string& name, int numDim, escript::JMPI jmpi) :
+    m_mpiInfo(jmpi),
+    m_name(name),
+    m_elements(NULL),
+    m_faceElements(NULL),
+    m_points(NULL)
+{
+    // allocate node table
+    m_nodes = new NodeFile(numDim, m_mpiInfo);
+    setFunctionSpaceTypeNames();
+}
+
+DudleyDomain::DudleyDomain(const DudleyDomain& in) :
+    m_mpiInfo(in.m_mpiInfo),
+    m_name(in.m_name),
+    m_nodes(in.m_nodes),
+    m_elements(in.m_elements),
+    m_faceElements(in.m_faceElements),
+    m_points(in.m_points)
+{
+    setFunctionSpaceTypeNames();
+}
+
+DudleyDomain::~DudleyDomain()
+{
+    delete m_nodes;
+    delete m_elements;
+    delete m_faceElements;
+    delete m_points;
+}
+
+void DudleyDomain::MPIBarrier() const
+{
+#ifdef ESYS_MPI
+    MPI_Barrier(getMPIComm());
+#endif
+}
+
+void DudleyDomain::setElements(ElementFile* elements)
+{
+    delete m_elements;
+    m_elements = elements;
+}
+
+void DudleyDomain::setFaceElements(ElementFile* elements)
+{
+    delete m_faceElements;
+    m_faceElements = elements;
+}
+
+void DudleyDomain::setPoints(ElementFile* elements)
+{
+    delete m_points;
+    m_points = elements;
+}
+
+void DudleyDomain::createMappings(const IndexVector& dofDist,
+                                  const IndexVector& nodeDist)
+{
+    m_nodes->createNodeMappings(dofDist, nodeDist);
+
+#ifdef ESYS_HAVE_TRILINOS
+    // TODO?: the following block should probably go into prepare() but
+    // Domain::load() only calls createMappings which is why it's here...
+    // make sure trilinos distribution graph is available for matrix building
+    // and interpolation
+    const index_t numTargets = m_nodes->getNumDegreesOfFreedomTargets();
+    const index_t* target = m_nodes->borrowTargetDegreesOfFreedom();
+    boost::scoped_array<IndexList> indexList(new IndexList[numTargets]);
+
+#pragma omp parallel
+    {
+        // insert contributions from element matrices into columns in
+        // index list
+        IndexList_insertElements(indexList.get(), m_elements, target);
+        IndexList_insertElements(indexList.get(), m_faceElements, target);
+        IndexList_insertElements(indexList.get(), m_points, target);
+    }
+    m_nodes->createTrilinosGraph(indexList.get());
+#endif
+}
+
+void DudleyDomain::markNodes(vector<short>& mask, index_t offset) const
+{
+    m_elements->markNodes(mask, offset);
+    m_faceElements->markNodes(mask, offset);
+    m_points->markNodes(mask, offset);
+}
+
+void DudleyDomain::relabelElementNodes(const index_t* newNode, index_t offset)
+{
+    m_elements->relabelNodes(newNode, offset);
+    m_faceElements->relabelNodes(newNode, offset);
+    m_points->relabelNodes(newNode, offset);
+}
+
+void DudleyDomain::dump(const string& fileName) const
+{
+#ifdef ESYS_HAVE_NETCDF
+    const NcDim* ncdims[12] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
+    NcVar* ids;
+    index_t* index_ptr;
+#ifdef ESYS_INDEXTYPE_LONG
+    NcType ncIdxType = ncLong;
+#else
+    NcType ncIdxType = ncInt;
+#endif
+    int num_Tags = 0;
+    int mpi_size                  = getMPISize();
+    int mpi_rank                  = getMPIRank();
+    int numDim                    = m_nodes->numDim;
+    dim_t numNodes                = m_nodes->getNumNodes();
+    dim_t num_Elements            = m_elements->numElements;
+    dim_t num_FaceElements        = m_faceElements->numElements;
+    dim_t num_Points              = m_points->numElements;
+    int num_Elements_numNodes     = m_elements->numNodes;
+    int num_FaceElements_numNodes = m_faceElements->numNodes;
+#ifdef ESYS_MPI
+    MPI_Status status;
+#endif
+
+    // Incoming token indicates it's my turn to write
+#ifdef ESYS_MPI
+    if (mpi_rank > 0)
+        MPI_Recv(&num_Tags, 0, MPI_INT, mpi_rank-1, 81800, getMPIComm(), &status);
+#endif
+
+    const string newFileName(m_mpiInfo->appendRankToFileName(fileName));
+
+    // Figure out how much storage is required for tags
+    num_Tags = m_tagMap.size();
+
+    // NetCDF error handler
+    NcError err(NcError::verbose_nonfatal);
+    // Create the file
+    NcFile dataFile(newFileName.c_str(), NcFile::Replace);
+    string msgPrefix("Error in DudleyDomain::dump: NetCDF operation failed - ");
+    // check if writing was successful
+    if (!dataFile.is_valid())
+        throw DudleyException(msgPrefix + "Open file for output");
+
+    // Define dimensions (num_Elements and dim_Elements are identical,
+    // dim_Elements only appears if > 0)
+    if (! (ncdims[0] = dataFile.add_dim("numNodes", numNodes)) )
+        throw DudleyException(msgPrefix+"add_dim(numNodes)");
+    if (! (ncdims[1] = dataFile.add_dim("numDim", numDim)) )
+        throw DudleyException(msgPrefix+"add_dim(numDim)");
+    if (! (ncdims[2] = dataFile.add_dim("mpi_size_plus_1", mpi_size+1)) )
+        throw DudleyException(msgPrefix+"add_dim(mpi_size)");
+    if (num_Elements > 0)
+        if (! (ncdims[3] = dataFile.add_dim("dim_Elements", num_Elements)) )
+            throw DudleyException(msgPrefix+"add_dim(dim_Elements)");
+    if (num_FaceElements > 0)
+        if (! (ncdims[4] = dataFile.add_dim("dim_FaceElements", num_FaceElements)) )
+         throw DudleyException(msgPrefix+"add_dim(dim_FaceElements)");
+    if (num_Points > 0)
+        if (! (ncdims[6] = dataFile.add_dim("dim_Points", num_Points)) )
+            throw DudleyException(msgPrefix+"add_dim(dim_Points)");
+    if (num_Elements > 0)
+        if (! (ncdims[7] = dataFile.add_dim("dim_Elements_Nodes", num_Elements_numNodes)) )
+            throw DudleyException(msgPrefix+"add_dim(dim_Elements_Nodes)");
+    if (num_FaceElements > 0)
+        if (! (ncdims[8] = dataFile.add_dim("dim_FaceElements_numNodes", num_FaceElements_numNodes)) )
+            throw DudleyException(msgPrefix+"add_dim(dim_FaceElements_numNodes)");
+    if (num_Tags > 0)
+        if (! (ncdims[10] = dataFile.add_dim("dim_Tags", num_Tags)) )
+            throw DudleyException(msgPrefix+"add_dim(dim_Tags)");
+
+    // Attributes: MPI size, MPI rank, Name, order, reduced_order
+    if (!dataFile.add_att("index_size", (int)sizeof(index_t)))
+        throw DudleyException(msgPrefix+"add_att(index_size)");
+    if (!dataFile.add_att("mpi_size", mpi_size))
+        throw DudleyException(msgPrefix+"add_att(mpi_size)");
+    if (!dataFile.add_att("mpi_rank", mpi_rank))
+        throw DudleyException(msgPrefix+"add_att(mpi_rank)");
+    if (!dataFile.add_att("Name", m_name.c_str()))
+        throw DudleyException(msgPrefix+"add_att(Name)");
+    if (!dataFile.add_att("numDim", numDim))
+        throw DudleyException(msgPrefix+"add_att(numDim)");
+    if (!dataFile.add_att("order", 2))
+        throw DudleyException(msgPrefix+"add_att(order)");
+    if (!dataFile.add_att("reduced_order", 0))
+        throw DudleyException(msgPrefix+"add_att(reduced_order)");
+    if (!dataFile.add_att("numNodes", numNodes))
+        throw DudleyException(msgPrefix+"add_att(numNodes)");
+    if (!dataFile.add_att("num_Elements", num_Elements))
+        throw DudleyException(msgPrefix+"add_att(num_Elements)");
+    if (!dataFile.add_att("num_FaceElements", num_FaceElements))
+        throw DudleyException(msgPrefix+"add_att(num_FaceElements)");
+    if (!dataFile.add_att("num_Points", num_Points))
+        throw DudleyException(msgPrefix+"add_att(num_Points)");
+    if (!dataFile.add_att("num_Elements_numNodes", num_Elements_numNodes))
+        throw DudleyException(msgPrefix+"add_att(num_Elements_numNodes)");
+    if (!dataFile.add_att("num_FaceElements_numNodes", num_FaceElements_numNodes))
+        throw DudleyException(msgPrefix+"add_att(num_FaceElements_numNodes)");
+    if (!dataFile.add_att("Elements_TypeId", m_elements->etype))
+        throw DudleyException(msgPrefix+"add_att(Elements_TypeId)");
+    if (!dataFile.add_att("FaceElements_TypeId", m_faceElements->etype))
+        throw DudleyException(msgPrefix+"add_att(FaceElements_TypeId)");
+    if (!dataFile.add_att("Points_TypeId", m_points->etype))
+        throw DudleyException(msgPrefix+"add_att(Points_TypeId)");
+    if (!dataFile.add_att("num_Tags", num_Tags))
+        throw DudleyException(msgPrefix+"add_att(num_Tags)");
+
+    // // // // // Nodes // // // // //
+
+    // Nodes nodeDistribution
+    if (! (ids = dataFile.add_var("Nodes_NodeDistribution", ncIdxType, ncdims[2])) )
+        throw DudleyException(msgPrefix+"add_var(Nodes_NodeDistribution)");
+    index_ptr = &m_nodes->nodesDistribution->first_component[0];
+    if (! (ids->put(index_ptr, mpi_size+1)) )
+        throw DudleyException(msgPrefix+"put(Nodes_NodeDistribution)");
+
+    // Nodes degreesOfFreedomDistribution
+    if (! ( ids = dataFile.add_var("Nodes_DofDistribution", ncIdxType, ncdims[2])) )
+        throw DudleyException(msgPrefix+"add_var(Nodes_DofDistribution)");
+    index_ptr = &m_nodes->dofDistribution->first_component[0];
+    if (! (ids->put(index_ptr, mpi_size+1)) )
+        throw DudleyException(msgPrefix+"put(Nodes_DofDistribution)");
+
+    // Only write nodes if non-empty because NetCDF doesn't like empty arrays
+    // (it treats them as NC_UNLIMITED)
+    if (numNodes > 0) {
+        // Nodes Id
+        if (! ( ids = dataFile.add_var("Nodes_Id", ncIdxType, ncdims[0])) )
+            throw DudleyException(msgPrefix+"add_var(Nodes_Id)");
+        if (! (ids->put(m_nodes->Id, numNodes)) )
+            throw DudleyException(msgPrefix+"put(Nodes_Id)");
+
+        // Nodes Tag
+        if (! ( ids = dataFile.add_var("Nodes_Tag", ncInt, ncdims[0])) )
+            throw DudleyException(msgPrefix+"add_var(Nodes_Tag)");
+        if (! (ids->put(m_nodes->Tag, numNodes)) )
+            throw DudleyException(msgPrefix+"put(Nodes_Tag)");
+
+        // Nodes gDOF
+        if (! ( ids = dataFile.add_var("Nodes_gDOF", ncIdxType, ncdims[0])) )
+            throw DudleyException(msgPrefix+"add_var(Nodes_gDOF)");
+        if (! (ids->put(m_nodes->globalDegreesOfFreedom, numNodes)) )
+            throw DudleyException(msgPrefix+"put(Nodes_gDOF)");
+
+        // Nodes global node index
+        if (! ( ids = dataFile.add_var("Nodes_gNI", ncIdxType, ncdims[0])) )
+            throw DudleyException(msgPrefix+"add_var(Nodes_gNI)");
+        if (! (ids->put(m_nodes->globalNodesIndex, numNodes)) )
+            throw DudleyException(msgPrefix+"put(Nodes_gNI)");
+
+        // Nodes Coordinates
+        if (! ( ids = dataFile.add_var("Nodes_Coordinates", ncDouble, ncdims[0], ncdims[1]) ) )
+            throw DudleyException(msgPrefix+"add_var(Nodes_Coordinates)");
+        if (! (ids->put(m_nodes->Coordinates, numNodes, numDim)) )
+            throw DudleyException(msgPrefix+"put(Nodes_Coordinates)");
+    }
+
+    // // // // // Elements // // // // //
+    if (num_Elements > 0) {
+        // Elements_Id
+        if (! ( ids = dataFile.add_var("Elements_Id", ncIdxType, ncdims[3])) )
+            throw DudleyException(msgPrefix+"add_var(Elements_Id)");
+        if (! (ids->put(m_elements->Id, num_Elements)) )
+            throw DudleyException(msgPrefix+"put(Elements_Id)");
+
+        // Elements_Tag
+        if (! ( ids = dataFile.add_var("Elements_Tag", ncInt, ncdims[3])) )
+            throw DudleyException(msgPrefix+"add_var(Elements_Tag)");
+        if (! (ids->put(m_elements->Tag, num_Elements)) )
+            throw DudleyException(msgPrefix+"put(Elements_Tag)");
+
+        // Elements_Owner
+        if (! ( ids = dataFile.add_var("Elements_Owner", ncInt, ncdims[3])) )
+            throw DudleyException(msgPrefix+"add_var(Elements_Owner)");
+        if (! (ids->put(m_elements->Owner, num_Elements)) )
+            throw DudleyException(msgPrefix+"put(Elements_Owner)");
+
+        // Elements_Color
+        if (! ( ids = dataFile.add_var("Elements_Color", ncIdxType, ncdims[3])) )
+            throw DudleyException(msgPrefix+"add_var(Elements_Color)");
+        if (! (ids->put(m_elements->Color, num_Elements)) )
+            throw DudleyException(msgPrefix+"put(Elements_Color)");
+
+        // Elements_Nodes
+        if (! ( ids = dataFile.add_var("Elements_Nodes", ncIdxType, ncdims[3], ncdims[7]) ) )
+            throw DudleyException(msgPrefix+"add_var(Elements_Nodes)");
+        if (! (ids->put(m_elements->Nodes, num_Elements, num_Elements_numNodes)) )
+            throw DudleyException(msgPrefix+"put(Elements_Nodes)");
+    }
+
+    // // // // // Face_Elements // // // // //
+    if (num_FaceElements > 0) {
+        // FaceElements_Id
+        if (!(ids = dataFile.add_var("FaceElements_Id", ncIdxType, ncdims[4])))
+            throw DudleyException(msgPrefix+"add_var(FaceElements_Id)");
+        if (!(ids->put(m_faceElements->Id, num_FaceElements)))
+            throw DudleyException(msgPrefix+"put(FaceElements_Id)");
+
+        // FaceElements_Tag
+        if (!(ids = dataFile.add_var("FaceElements_Tag", ncInt, ncdims[4])))
+            throw DudleyException(msgPrefix+"add_var(FaceElements_Tag)");
+        if (!(ids->put(m_faceElements->Tag, num_FaceElements)))
+            throw DudleyException(msgPrefix+"put(FaceElements_Tag)");
+
+        // FaceElements_Owner
+        if (!(ids = dataFile.add_var("FaceElements_Owner", ncInt, ncdims[4])))
+            throw DudleyException(msgPrefix+"add_var(FaceElements_Owner)");
+        if (!(ids->put(m_faceElements->Owner, num_FaceElements)))
+            throw DudleyException(msgPrefix+"put(FaceElements_Owner)");
+
+        // FaceElements_Color
+        if (!(ids = dataFile.add_var("FaceElements_Color", ncIdxType, ncdims[4])))
+            throw DudleyException(msgPrefix+"add_var(FaceElements_Color)");
+        if (!(ids->put(m_faceElements->Color, num_FaceElements)))
+            throw DudleyException(msgPrefix+"put(FaceElements_Color)");
+
+        // FaceElements_Nodes
+        if (!(ids = dataFile.add_var("FaceElements_Nodes", ncIdxType, ncdims[4], ncdims[8])))
+            throw DudleyException(msgPrefix+"add_var(FaceElements_Nodes)");
+        if (!(ids->put(m_faceElements->Nodes, num_FaceElements, num_FaceElements_numNodes)))
+            throw DudleyException(msgPrefix+"put(FaceElements_Nodes)");
+    }
+
+    // // // // // Points // // // // //
+    if (num_Points > 0) {
+        // Points_Id
+        if (!(ids = dataFile.add_var("Points_Id", ncIdxType, ncdims[6])))
+            throw DudleyException(msgPrefix+"add_var(Points_Id)");
+        if (!(ids->put(m_points->Id, num_Points)))
+            throw DudleyException(msgPrefix+"put(Points_Id)");
+
+        // Points_Tag
+        if (!(ids = dataFile.add_var("Points_Tag", ncInt, ncdims[6])))
+            throw DudleyException(msgPrefix+"add_var(Points_Tag)");
+        if (!(ids->put(m_points->Tag, num_Points)))
+            throw DudleyException(msgPrefix+"put(Points_Tag)");
+
+        // Points_Owner
+        if (!(ids = dataFile.add_var("Points_Owner", ncInt, ncdims[6])))
+            throw DudleyException(msgPrefix+"add_var(Points_Owner)");
+        if (!(ids->put(m_points->Owner, num_Points)))
+            throw DudleyException(msgPrefix+"put(Points_Owner)");
+
+        // Points_Color
+        if (!(ids = dataFile.add_var("Points_Color", ncIdxType, ncdims[6])))
+            throw DudleyException(msgPrefix+"add_var(Points_Color)");
+        if (!(ids->put(m_points->Color, num_Points)))
+            throw DudleyException(msgPrefix+"put(Points_Color)");
+
+        // Points_Nodes
+        if (!(ids = dataFile.add_var("Points_Nodes", ncIdxType, ncdims[6])))
+            throw DudleyException(msgPrefix+"add_var(Points_Nodes)");
+        if (!(ids->put(m_points->Nodes, num_Points)))
+            throw DudleyException(msgPrefix+"put(Points_Nodes)");
+    }
+
+    // // // // // TagMap // // // // //
+    if (num_Tags > 0) {
+        // Temp storage to gather node IDs
+        vector<int> Tags_keys;
+
+        // Copy tag data into temp arrays
+        TagMap::const_iterator it;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
+            Tags_keys.push_back(it->second);
+        }
+
+        // Tags_keys
+        if (!(ids = dataFile.add_var("Tags_keys", ncInt, ncdims[10])))
+            throw DudleyException(msgPrefix+"add_var(Tags_keys)");
+        if (!(ids->put(&Tags_keys[0], num_Tags)))
+            throw DudleyException(msgPrefix+"put(Tags_keys)");
+
+        // Tags_names_*
+        // This is an array of strings, it should be stored as an array but
+        // instead I have hacked in one attribute per string because the NetCDF
+        // manual doesn't tell how to do an array of strings
+        int i = 0;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++, i++) {
+            stringstream ss;
+            ss << "Tags_name_" << i;
+            const string name(ss.str());
+            if (!dataFile.add_att(name.c_str(), it->first.c_str()))
+                throw DudleyException(msgPrefix+"add_att(Tags_names_X)");
+        }
+    }
+
+    // Send token to next MPI process so he can take his turn
+#ifdef ESYS_MPI
+    if (mpi_rank < mpi_size-1)
+        MPI_Send(&num_Tags, 0, MPI_INT, mpi_rank+1, 81800, getMPIComm());
+#endif
+
+    // NetCDF file is closed by destructor of NcFile object
+
+#else
+    throw DudleyException("DudleyDomain::dump: not configured with netCDF. "
+                          "Please contact your installation manager.");
+#endif // ESYS_HAVE_NETCDF
+}
+
+string DudleyDomain::getDescription() const
+{
+    return "DudleyMesh";
+}
+
+string DudleyDomain::functionSpaceTypeAsString(int functionSpaceType) const
+{
+    FunctionSpaceNamesMapType::iterator loc;
+    loc = m_functionSpaceTypeNames.find(functionSpaceType);
+    if (loc == m_functionSpaceTypeNames.end()) {
+        return "Invalid function space type code.";
+    } else {
+        return loc->second;
+    }
+}
+
+bool DudleyDomain::isValidFunctionSpaceType(int functionSpaceType) const
+{
+    FunctionSpaceNamesMapType::iterator loc;
+    loc = m_functionSpaceTypeNames.find(functionSpaceType);
+    return (loc != m_functionSpaceTypeNames.end());
+}
+
+void DudleyDomain::setFunctionSpaceTypeNames()
+{
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                DegreesOfFreedom,"Dudley_DegreesOfFreedom [Solution(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Nodes,"Dudley_Nodes [ContinuousFunction(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Elements,"Dudley_Elements [Function(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedElements,"Dudley_Reduced_Elements [ReducedFunction(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                FaceElements,"Dudley_Face_Elements [FunctionOnBoundary(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedFaceElements,"Dudley_Reduced_Face_Elements [ReducedFunctionOnBoundary(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Points,"Dudley_Points [DiracDeltaFunctions(domain)]"));
+}
+
+int DudleyDomain::getContinuousFunctionCode() const
+{
+    return Nodes;
+}
+
+int DudleyDomain::getReducedContinuousFunctionCode() const
+{
+    return Nodes;
+}
+
+int DudleyDomain::getFunctionCode() const
+{
+    return Elements;
+}
+
+int DudleyDomain::getReducedFunctionCode() const
+{
+    return ReducedElements;
+}
+
+int DudleyDomain::getFunctionOnBoundaryCode() const
+{
+    return FaceElements;
+}
+
+int DudleyDomain::getReducedFunctionOnBoundaryCode() const
+{
+    return ReducedFaceElements;
+}
+
+int DudleyDomain::getFunctionOnContactZeroCode() const
+{
+    throw DudleyException("Dudley does not support contact elements.");
+}
+
+int DudleyDomain::getReducedFunctionOnContactZeroCode() const
+{
+    throw DudleyException("Dudley does not support contact elements.");
+}
+
+int DudleyDomain::getFunctionOnContactOneCode() const
+{
+    throw DudleyException("Dudley does not support contact elements.");
+}
+
+int DudleyDomain::getReducedFunctionOnContactOneCode() const
+{
+    throw DudleyException("Dudley does not support contact elements.");
+}
+
+int DudleyDomain::getSolutionCode() const
+{
+    return DegreesOfFreedom;
+}
+
+int DudleyDomain::getReducedSolutionCode() const
+{
+    return DegreesOfFreedom;
+}
+
+int DudleyDomain::getDiracDeltaFunctionsCode() const
+{
+    return Points;
+}
+
+//
+// Return the number of data points summed across all MPI processes
+//
+dim_t DudleyDomain::getNumDataPointsGlobal() const
+{
+    return m_nodes->getGlobalNumNodes();
+}
+
+//
+// return the number of data points per sample and the number of samples
+// needed to represent data on a parts of the mesh.
+//
+pair<int,dim_t> DudleyDomain::getDataShape(int functionSpaceCode) const
+{
+    int numDataPointsPerSample = 0;
+    dim_t numSamples = 0;
+    switch (functionSpaceCode) {
+        case Nodes:
+            numDataPointsPerSample = 1;
+            numSamples = m_nodes->getNumNodes();
+        break;
+        case Elements:
+            if (m_elements) {
+                numSamples = m_elements->numElements;
+                numDataPointsPerSample = m_elements->numLocalDim + 1;
+            }
+        break;
+        case ReducedElements:
+            if (m_elements) {
+                numSamples = m_elements->numElements;
+                numDataPointsPerSample = (m_elements->numLocalDim==0) ? 0 : 1;
+            }
+        break;
+        case FaceElements:
+            if (m_faceElements) {
+                numSamples = m_faceElements->numElements;
+                numDataPointsPerSample = m_faceElements->numLocalDim+1;
+            }
+        break;
+        case ReducedFaceElements:
+            if (m_faceElements) {
+                numSamples = m_faceElements->numElements;
+                numDataPointsPerSample = (m_faceElements->numLocalDim==0)? 0:1;
+            }
+        break;
+        case Points:
+            if (m_points) {
+                numSamples = m_points->numElements;
+                numDataPointsPerSample = 1;
+            }
+        break;
+        case DegreesOfFreedom:
+            if (m_nodes) {
+                numSamples = m_nodes->getNumDegreesOfFreedom();
+                numDataPointsPerSample = 1;
+            }
+        break;
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceCode
+                << " for domain " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return pair<int,dim_t>(numDataPointsPerSample, numSamples);
+}
+
+//
+// adds linear PDE of second order into a given stiffness matrix and
+// right hand side
+//
+void DudleyDomain::addPDEToSystem(
+        escript::AbstractSystemMatrix& mat, escript::Data& rhs,
+        const escript::Data& A, const escript::Data& B, const escript::Data& C,
+        const escript::Data& D, const escript::Data& X, const escript::Data& Y,
+        const escript::Data& d, const escript::Data& y,
+        const escript::Data& d_contact, const escript::Data& y_contact,
+        const escript::Data& d_dirac, const escript::Data& y_dirac) const
+{
+    if (!d_contact.isEmpty() || !y_contact.isEmpty())
+        throw DudleyException("Dudley does not support contact elements");
+
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(&mat);
+    if (tm) {
+        tm->resumeFill();
+    }
+#endif
+
+    Assemble_PDE(m_nodes, m_elements, mat.getPtr(), rhs, A, B, C, D, X, Y);
+    Assemble_PDE(m_nodes, m_faceElements, mat.getPtr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(), d,
+                 escript::Data(), y);
+    Assemble_PDE(m_nodes, m_points, mat.getPtr(), rhs, escript::Data(),
+                 escript::Data(), escript::Data(), d_dirac,
+                 escript::Data(), y_dirac);
+
+#ifdef ESYS_HAVE_TRILINOS
+    if (tm) {
+        tm->fillComplete(true);
+    }
+#endif
+}
+
+void DudleyDomain::addPDEToLumpedSystem(escript::Data& mat,
+                                        const escript::Data& D,
+                                        const escript::Data& d,
+                                        const escript::Data& d_dirac,
+                                        bool useHRZ) const
+{
+    Assemble_LumpedSystem(m_nodes, m_elements, mat, D, useHRZ);
+    Assemble_LumpedSystem(m_nodes, m_faceElements, mat, d, useHRZ);
+    Assemble_LumpedSystem(m_nodes, m_points, mat, d_dirac, useHRZ);
+}
+
+//
+// adds linear PDE of second order into the right hand side only
+//
+void DudleyDomain::addPDEToRHS(escript::Data& rhs, const escript::Data& X,
+          const escript::Data& Y, const escript::Data& y,
+          const escript::Data& y_contact, const escript::Data& y_dirac) const
+{
+    if (!y_contact.isEmpty())
+        throw DudleyException("Dudley does not support y_contact");
+
+    Assemble_PDE(m_nodes, m_elements, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), X, Y);
+
+    Assemble_PDE(m_nodes, m_faceElements, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), escript::Data(), y);
+
+    Assemble_PDE(m_nodes, m_points, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), escript::Data(), y_dirac);
+}
+
+//
+// adds PDE of second order into a transport problem
+//
+void DudleyDomain::addPDEToTransportProblem(
+        escript::AbstractTransportProblem& tp, escript::Data& source,
+        const escript::Data& M, const escript::Data& A, const escript::Data& B,
+        const escript::Data& C, const escript::Data& D, const escript::Data& X,
+        const escript::Data& Y, const escript::Data& d, const escript::Data& y,
+        const escript::Data& d_contact, const escript::Data& y_contact,
+        const escript::Data& d_dirac, const escript::Data& y_dirac) const
+{
+    if (!d_contact.isEmpty())
+        throw DudleyException("Dudley does not support d_contact");
+    if (!y_contact.isEmpty())
+        throw DudleyException("Dudley does not support y_contact");
+
+#ifdef ESYS_HAVE_PASO
+    paso::TransportProblem* ptp = dynamic_cast<paso::TransportProblem*>(&tp);
+    if (!ptp)
+        throw ValueError("Dudley only supports Paso transport problems.");
+
+    source.expand();
+
+    escript::ASM_ptr mm(boost::static_pointer_cast<escript::AbstractSystemMatrix>(
+                ptp->borrowMassMatrix()));
+    escript::ASM_ptr tm(boost::static_pointer_cast<escript::AbstractSystemMatrix>(
+                ptp->borrowTransportMatrix()));
+
+    Assemble_PDE(m_nodes, m_elements, mm, source, escript::Data(),
+                 escript::Data(), escript::Data(), M, escript::Data(),
+                 escript::Data());
+    Assemble_PDE(m_nodes, m_elements, tm, source, A, B, C, D, X, Y);
+    Assemble_PDE(m_nodes, m_faceElements, tm, source, escript::Data(),
+                 escript::Data(), escript::Data(), d, escript::Data(), y);
+    Assemble_PDE(m_nodes, m_points, tm, source, escript::Data(),
+                 escript::Data(), escript::Data(), d_dirac, escript::Data(),
+                 y_dirac);
+#else
+    throw DudleyException("Transport problems require the Paso library which "
+                          "is not available.");
+#endif
+}
+
+//
+// interpolates data between different function spaces
+//
+void DudleyDomain::interpolateOnDomain(escript::Data& target,
+                                      const escript::Data& in) const
+{
+    if (*in.getFunctionSpace().getDomain() != *this)
+        throw ValueError("Illegal domain of interpolant.");
+    if (*target.getFunctionSpace().getDomain() != *this)
+        throw ValueError("Illegal domain of interpolation target.");
+
+    switch (in.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            switch (target.getFunctionSpace().getTypeCode()) {
+                case Nodes:
+                case DegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                break;
+                case Elements:
+                case ReducedElements:
+                    Assemble_interpolate(m_nodes, m_elements, in, target);
+                break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                break;
+                case Points:
+                    Assemble_interpolate(m_nodes, m_points, in, target);
+                break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Dudley does not know anything "
+                          "about function space type "
+                          << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+        break;
+        case Elements:
+            if (target.getFunctionSpace().getTypeCode() == Elements) {
+                Assemble_CopyElementData(m_elements, target, in);
+            } else if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
+                Assemble_AverageElementData(m_elements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on elements possible.");
+            }
+            break;
+        case ReducedElements:
+            if (target.getFunctionSpace().getTypeCode() == ReducedElements) {
+                Assemble_CopyElementData(m_elements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on elements "
+                                 "with reduced integration order possible.");
+            }
+            break;
+        case FaceElements:
+            if (target.getFunctionSpace().getTypeCode() == FaceElements) {
+                Assemble_CopyElementData(m_faceElements, target, in);
+            } else if (target.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+                Assemble_AverageElementData(m_faceElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on face elements possible.");
+            }
+            break;
+        case ReducedFaceElements:
+            if (target.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+                Assemble_CopyElementData(m_faceElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on face "
+                         "elements with reduced integration order possible.");
+            }
+            break;
+        case Points:
+            if (target.getFunctionSpace().getTypeCode() == Points) {
+                Assemble_CopyElementData(m_points, target, in);
+            } else {
+                throw ValueError("No interpolation with data on points possible.");
+            }
+            break;
+        case DegreesOfFreedom:
+            switch (target.getFunctionSpace().getTypeCode()) {
+                case DegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                break;
+
+                case Nodes:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in);
+                        temp.expand();
+                        Assemble_CopyNodalData(m_nodes, target, temp);
+                    } else {
+                        Assemble_CopyNodalData(m_nodes, target, in);
+                    }
+                break;
+                case Elements:
+                case ReducedElements:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in, continuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_elements, temp, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_elements, in, target);
+                    }
+                break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in, continuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_faceElements, temp, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                    }
+                break;
+                case Points:
+                    if (getMPISize() > 1) {
+                        //escript::Data temp(in, continuousFunction(*this));
+                    } else {
+                        Assemble_interpolate(m_nodes, m_points, in, target);
+                    }
+                break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Dudley does not know anything "
+                          "about function space type "
+                       << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+            break;
+        default:
+            stringstream ss;
+            ss << "interpolateOnDomain: Dudley does not know anything about "
+                "function space type " << in.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// copies the locations of sample points into x
+//
+void DudleyDomain::setToX(escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToX: Illegal domain of data point locations");
+
+    // in case of appropriate function space we can do the job directly:
+    if (arg.getFunctionSpace().getTypeCode() == Nodes) {
+        Assemble_NodeCoordinates(m_nodes, arg);
+    } else {
+        escript::Data tmp_data = Vector(0., continuousFunction(*this), true);
+        Assemble_NodeCoordinates(m_nodes, tmp_data);
+        // this is then interpolated onto arg:
+        interpolateOnDomain(arg, tmp_data);
+    }
+}
+
+//
+// return the normal vectors at the location of data points as a Data object
+//
+void DudleyDomain::setToNormal(escript::Data& normal) const
+{
+    if (*normal.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToNormal: Illegal domain of normal locations");
+
+    if (normal.getFunctionSpace().getTypeCode() == FaceElements ||
+            normal.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+        Assemble_getNormal(m_nodes, m_faceElements, normal);
+    } else {
+        stringstream ss;
+        ss << "setToNormal: Illegal function space type "
+           << normal.getFunctionSpace().getTypeCode();
+        throw ValueError(ss.str());
+    }
+}
+
+//
+// interpolates data to other domain
+//
+void DudleyDomain::interpolateAcross(escript::Data& /*target*/,
+                                    const escript::Data& /*source*/) const
+{
+    throw escript::NotImplementedError("Dudley does not allow interpolation "
+                                       "across domains.");
+}
+
+//
+// calculates the integral of a function defined on arg
+//
+void DudleyDomain::setToIntegrals(vector<double>& integrals,
+                                  const escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToIntegrals: Illegal domain of integration kernel");
+
+    switch (arg.getFunctionSpace().getTypeCode()) {
+        case Nodes: // fall through
+        case DegreesOfFreedom:
+        {
+            escript::Data temp(arg, escript::function(*this));
+            Assemble_integrate(m_nodes, m_elements, temp, integrals);
+        }
+        break;
+        case Elements: // fall through
+        case ReducedElements:
+            Assemble_integrate(m_nodes, m_elements, arg, integrals);
+        break;
+        case FaceElements: // fall through
+        case ReducedFaceElements:
+            Assemble_integrate(m_nodes, m_faceElements, arg, integrals);
+        break;
+        case Points:
+            throw ValueError("Integral of data on points is not supported.");
+        default:
+            stringstream ss;
+            ss << "setToIntegrals: Dudley does not know anything about "
+                "function space type " << arg.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// calculates the gradient of arg
+//
+void DudleyDomain::setToGradient(escript::Data& grad, const escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToGradient: Illegal domain of gradient argument");
+    if (*grad.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToGradient: Illegal domain of gradient");
+
+    escript::Data nodeData;
+    if (getMPISize() > 1) {
+        if (arg.getFunctionSpace().getTypeCode() == DegreesOfFreedom) {
+            nodeData = escript::Data(arg, continuousFunction(*this));
+        } else {
+            nodeData = arg;
+        }
+    } else {
+        nodeData = arg;
+    }
+    switch (grad.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            throw ValueError("Gradient at nodes is not supported.");
+        case Elements:
+        case ReducedElements:
+            Assemble_gradient(m_nodes, m_elements, grad, nodeData);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            Assemble_gradient(m_nodes, m_faceElements, grad, nodeData);
+            break;
+        case Points:
+            throw ValueError("Gradient at points is not supported.");
+        case DegreesOfFreedom:
+            throw ValueError("Gradient at degrees of freedom is not supported.");
+        default:
+            stringstream ss;
+            ss << "Gradient: Dudley does not know anything about function "
+                  "space type " << arg.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// returns the size of elements
+//
+void DudleyDomain::setToSize(escript::Data& size) const
+{
+    switch (size.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            throw ValueError("Size of nodes is not supported.");
+        case Elements:
+        case ReducedElements:
+            Assemble_getSize(m_nodes, m_elements, size);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            Assemble_getSize(m_nodes, m_faceElements, size);
+            break;
+        case Points:
+            throw ValueError("Size of point elements is not supported.");
+        case DegreesOfFreedom:
+            throw ValueError("Size of degrees of freedom is not supported.");
+        default:
+            stringstream ss;
+            ss << "setToSize: Dudley does not know anything about function "
+                  "space type " << size.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// sets the location of nodes
+//
+void DudleyDomain::setNewX(const escript::Data& newX)
+{
+    if (*newX.getFunctionSpace().getDomain() != *this)
+        throw DudleyException("Illegal domain of new point locations");
+
+    if (newX.getFunctionSpace() == continuousFunction(*this)) {
+        m_nodes->setCoordinates(newX);
+    } else {
+        throw ValueError("As of escript version 3.3 setNewX only accepts "
+                         "ContinuousFunction arguments. Please interpolate.");
+    }
+}
+
+bool DudleyDomain::ownSample(int fs_code, index_t id) const
+{
+#ifdef ESYS_MPI
+    if (getMPISize() > 1) {
+        if (fs_code == Nodes) {
+            const index_t myFirstNode = m_nodes->getFirstNode();
+            const index_t myLastNode = m_nodes->getLastNode();
+            const index_t k = m_nodes->borrowGlobalNodesIndex()[id];
+            return (myFirstNode <= k && k < myLastNode);
+        } else {
+            throw ValueError("ownSample: unsupported function space type");
+        }
+    }
+#endif
+    return true;
+}
+
+//
+// creates a stiffness matrix and initializes it with zeros
+//
+escript::ASM_ptr DudleyDomain::newSystemMatrix(int row_blocksize,
+                            const escript::FunctionSpace& row_functionspace,
+                            int column_blocksize,
+                            const escript::FunctionSpace& column_functionspace,
+                            int type) const
+{
+    // is the domain right?
+    if (*row_functionspace.getDomain() != *this)
+        throw ValueError("domain of row function space does not match the domain of matrix generator.");
+    if (*column_functionspace.getDomain() != *this)
+        throw ValueError("domain of column function space does not match the domain of matrix generator.");
+
+    // is the function space type right?
+    if (row_functionspace.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for system matrix rows.");
+    }
+    if (column_functionspace.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for system matrix columns.");
+    }
+
+    // generate matrix
+    if (type & (int)SMT_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        const_TrilinosGraph_ptr graph(getTrilinosGraph());
+        bool isComplex = (type & (int)SMT_COMPLEX);
+        bool unroll = (type & (int)SMT_UNROLL);
+        escript::ASM_ptr sm(new TrilinosMatrixAdapter(m_mpiInfo, row_blocksize,
+                    row_functionspace, graph, isComplex, unroll));
+        return sm;
+#else
+        throw DudleyException("newSystemMatrix: dudley was not compiled "
+                "with Trilinos support so the Trilinos solver stack cannot be "
+                "used.");
+#endif
+    } else if (type & (int)SMT_PASO) {
+#ifdef ESYS_HAVE_PASO
+        paso::SystemMatrixPattern_ptr pattern(getPasoPattern());
+        paso::SystemMatrix_ptr sm(new paso::SystemMatrix(type, pattern,
+                  row_blocksize, column_blocksize, false, row_functionspace,
+                  column_functionspace));
+        return sm;
+#else
+        throw DudleyException("newSystemMatrix: dudley was not compiled "
+                "with Paso support so the Paso solver stack cannot be used.");
+#endif
+    } else {
+        throw DudleyException("newSystemMatrix: unknown matrix type ID");
+    }
+}
+
+//
+// creates a TransportProblem
+//
+escript::ATP_ptr DudleyDomain::newTransportProblem(int blocksize,
+                                             const escript::FunctionSpace& fs,
+                                             int type) const
+{
+    // is the domain right?
+    if (*fs.getDomain() != *this)
+        throw ValueError("domain of function space does not match the domain of transport problem generator.");
+    // is the function space type right
+    if (fs.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for transport problem.");
+    }
+
+#ifdef ESYS_HAVE_PASO
+    // generate transport problem
+    paso::SystemMatrixPattern_ptr pattern(getPasoPattern());
+    paso::TransportProblem_ptr transportProblem(new paso::TransportProblem(
+                                              pattern, blocksize, fs));
+    return transportProblem;
+#else
+    throw DudleyException("Transport problems require the Paso library which "
+                          "is not available.");
+#endif
+}
+
+//
+// returns true if data on functionSpaceCode is considered as being cell centered
+//
+bool DudleyDomain::isCellOriented(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+        case DegreesOfFreedom:
+            return false;
+        case Elements:
+        case FaceElements:
+        case Points:
+        case ReducedElements:
+        case ReducedFaceElements:
+            return true;
+    }
+    stringstream ss;
+    ss << "isCellOriented: Dudley does not know anything about "
+          "function space type " << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+bool
+DudleyDomain::commonFunctionSpace(const vector<int>& fs, int& resultcode) const
+{
+    if (fs.empty())
+        return false;
+    // The idea is to use equivalence classes, i.e. types which can be
+    // interpolated back and forth
+    //    class 1: DOF <-> Nodes
+    //    class 3: Points
+    //    class 4: Elements
+    //    class 5: ReducedElements
+    //    class 6: FaceElements
+    //    class 7: ReducedFaceElements
+
+    // There is also a set of lines. Interpolation is possible down a line but
+    // not between lines.
+    // class 1 and 2 belong to all lines so aren't considered.
+    //    line 0: class 3
+    //    line 1: class 4,5
+    //    line 2: class 6,7
+
+    // For classes with multiple members (class 1) we have vars to record
+    // if there is at least one instance -> hasnodes is true if we have at
+    // least one instance of Nodes.
+    vector<int> hasclass(8);
+    vector<int> hasline(3);
+    bool hasnodes = false;
+    for (int i = 0; i < fs.size(); ++i) {
+        switch (fs[i]) {
+            case Nodes:
+                hasnodes = true; // fall through
+            case DegreesOfFreedom:
+                hasclass[1] = 1;
+                break;
+            case Points:
+                hasline[0] = 1;
+                hasclass[3] = 1;
+                break;
+            case Elements:
+                hasclass[4] = 1;
+                hasline[1] = 1;
+                break;
+            case ReducedElements:
+                hasclass[5] = 1;
+                hasline[1] = 1;
+                break;
+            case FaceElements:
+                hasclass[6] = 1;
+                hasline[2] = 1;
+                break;
+            case ReducedFaceElements:
+                hasclass[7] = 1;
+                hasline[2] = 1;
+                break;
+            default:
+                return false;
+        }
+    }
+    int totlines = hasline[0]+hasline[1]+hasline[2];
+    // fail if we have more than one leaf group
+    if (totlines > 1)
+        // there are at least two branches we can't interpolate between
+        return false;
+
+    if (totlines == 1) {
+        if (hasline[0] == 1) // we have points
+            resultcode = Points;
+        else if (hasline[1] == 1) {
+            if (hasclass[5] == 1)
+                resultcode=ReducedElements;
+            else
+                resultcode=Elements;
+        } else if (hasline[2] == 1) {
+            if (hasclass[7] == 1)
+                resultcode=ReducedFaceElements;
+            else
+                resultcode=FaceElements;
+        }
+    } else { // totlines==0
+        // something from class 1
+        resultcode = (hasnodes ? Nodes : DegreesOfFreedom);
+    }
+    return true;
+}
+
+bool DudleyDomain::probeInterpolationOnDomain(int functionSpaceType_source,
+                                              int functionSpaceType_target) const
+{
+    switch(functionSpaceType_source) {
+        case Nodes:
+            switch (functionSpaceType_target) {
+                case Nodes:
+                case DegreesOfFreedom:
+                case Elements:
+                case ReducedElements:
+                case FaceElements:
+                case ReducedFaceElements:
+                case Points:
+                    return true;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Dudley does not know "
+                        "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+        case Elements:
+            return (functionSpaceType_target == Elements ||
+                    functionSpaceType_target == ReducedElements);
+        case ReducedElements:
+            return (functionSpaceType_target == ReducedElements);
+        case FaceElements:
+            return (functionSpaceType_target == FaceElements ||
+                    functionSpaceType_target == ReducedFaceElements);
+        case ReducedFaceElements:
+            return (functionSpaceType_target == ReducedFaceElements);
+        case Points:
+            return (functionSpaceType_target == Points);
+        case DegreesOfFreedom:
+            switch (functionSpaceType_target) {
+                case DegreesOfFreedom:
+                case Nodes:
+                case Elements:
+                case ReducedElements:
+                case Points:
+                case FaceElements:
+                case ReducedFaceElements:
+                    return true;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Dudley does not know "
+                          "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+            break;
+    }
+    stringstream ss;
+    ss << "Interpolation On Domain: Dudley does not know anything "
+          "about function space type " << functionSpaceType_source;
+    throw ValueError(ss.str());
+}
+
+signed char DudleyDomain::preferredInterpolationOnDomain(
+        int functionSpaceType_source, int functionSpaceType_target) const
+{
+    if (probeInterpolationOnDomain(functionSpaceType_source, functionSpaceType_target))
+        return 1;
+    if (probeInterpolationOnDomain(functionSpaceType_target, functionSpaceType_source))
+        return -1;
+
+    return 0;
+}
+
+bool DudleyDomain::probeInterpolationAcross(int /*source*/,
+        const AbstractDomain& /*targetDomain*/, int /*target*/) const
+{
+    return false;
+}
+
+bool DudleyDomain::operator==(const AbstractDomain& other) const
+{
+    const DudleyDomain* temp = dynamic_cast<const DudleyDomain*>(&other);
+    if (temp) {
+        return (m_nodes == temp->m_nodes &&
+                m_elements == temp->m_elements &&
+                m_faceElements == temp->m_faceElements &&
+                m_points == temp->m_points);
+    }
+    return false;
+}
+
+bool DudleyDomain::operator!=(const AbstractDomain& other) const
+{
+    return !(operator==(other));
+}
+
+int DudleyDomain::getSystemMatrixTypeId(const bp::object& options) const
+{
+    const escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy>(options);
+
+    int package = sb.getPackage();
+    escript::SolverOptions method = sb.getSolverMethod();
+#ifdef ESYS_HAVE_TRILINOS
+    bool isDirect = escript::isDirectSolver(method);
+#endif
+
+    // the configuration of dudley should have taken care that we have either
+    // paso or trilinos so here's how we prioritize
+#if defined(ESYS_HAVE_PASO) && defined(ESYS_HAVE_TRILINOS)
+    // we have Paso & Trilinos so use Trilinos for parallel direct solvers and
+    // for complex problems
+    if (package == escript::SO_DEFAULT) {
+        if ((method == escript::SO_METHOD_DIRECT && getMPISize() > 1)
+                || isDirect
+                || sb.isComplex()) {
+            package = escript::SO_PACKAGE_TRILINOS;
+        }
+    }
+#endif
+#ifdef ESYS_HAVE_PASO
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_PASO;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_TRILINOS;
+#endif
+    if (package == escript::SO_PACKAGE_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        int type = (int)SMT_TRILINOS;
+        if (sb.isComplex())
+            type |= (int)SMT_COMPLEX;
+        // This is required because MueLu (AMG) and Amesos2 (direct) do not
+        // support block matrices at this point. Remove if they ever do...
+        if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_AMG ||
+                sb.getPreconditioner() == escript::SO_PRECONDITIONER_ILUT ||
+                isDirect) {
+            type |= (int)SMT_UNROLL;
+        }
+        return type;
+#else
+        throw DudleyException("Trilinos requested but not built with Trilinos.");
+#endif
+    }
+#ifdef ESYS_HAVE_PASO
+    if (sb.isComplex()) {
+        throw NotImplementedError("Paso does not support complex-valued matrices");
+    }
+    return (int)SMT_PASO | paso::SystemMatrix::getSystemMatrixTypeId(
+                method, sb.getPreconditioner(), sb.getPackage(),
+                sb.isSymmetric(), m_mpiInfo);
+#else
+    throw DudleyException("Unable to find a working solver library!");
+#endif
+}
+
+int DudleyDomain::getTransportTypeId(int solver, int preconditioner,
+                                    int package, bool symmetry) const
+{
+#ifdef ESYS_HAVE_PASO
+    return paso::TransportProblem::getTypeId(solver, preconditioner, package,
+                                             symmetry, getMPI());
+#else
+    throw DudleyException("Transport solvers require Paso but dudley was not "
+                          "compiled with Paso!");
+#endif
+}
+
+escript::Data DudleyDomain::getX() const
+{
+    return continuousFunction(*this).getX();
+}
+
+escript::Data DudleyDomain::getNormal() const
+{
+    return functionOnBoundary(*this).getNormal();
+}
+
+escript::Data DudleyDomain::getSize() const
+{
+    return escript::function(*this).getSize();
+}
+
+const index_t* DudleyDomain::borrowSampleReferenceIDs(int functionSpaceType) const
+{
+    index_t* out = NULL;
+    switch (functionSpaceType) {
+        case Nodes:
+            out = m_nodes->Id;
+            break;
+        case Elements:
+            out = m_elements->Id;
+            break;
+        case ReducedElements:
+            out = m_elements->Id;
+            break;
+        case FaceElements:
+            out = m_faceElements->Id;
+            break;
+        case ReducedFaceElements:
+            out = m_faceElements->Id;
+            break;
+        case Points:
+            out = m_points->Id;
+            break;
+        case DegreesOfFreedom:
+            out = m_nodes->degreesOfFreedomId;
+            break;
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceType
+               << " for domain: " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return out;
+}
+
+int DudleyDomain::getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const
+{
+    int out = 0;
+    switch (functionSpaceType) {
+        case Nodes:
+            out = m_nodes->Tag[sampleNo];
+            break;
+        case Elements:
+        case ReducedElements:
+            out = m_elements->Tag[sampleNo];
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            out = m_faceElements->Tag[sampleNo];
+            break;
+        case Points:
+            out = m_points->Tag[sampleNo];
+            break;
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags.");
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceType
+               << " for domain: " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return out;
+}
+
+
+void DudleyDomain::setTags(int functionSpaceType, int newTag,
+                           const escript::Data& mask) const
+{
+    switch (functionSpaceType) {
+        case Nodes:
+            m_nodes->setTags(newTag, mask);
+            break;
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags");
+        case Elements: // fall through
+        case ReducedElements:
+            m_elements->setTags(newTag, mask);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            m_faceElements->setTags(newTag, mask);
+            break;
+        case Points:
+            m_points->setTags(newTag, mask);
+            break;
+        default:
+            stringstream ss;
+            ss << "Dudley does not know anything about function space type "
+               << functionSpaceType;
+            throw ValueError(ss.str());
+    }
+}
+
+void DudleyDomain::setTagMap(const string& name, int tag)
+{
+    m_tagMap[name] = tag;
+}
+
+int DudleyDomain::getTag(const string& name) const
+{
+    TagMap::const_iterator it = m_tagMap.find(name);
+    if (it == m_tagMap.end()) {
+        stringstream ss;
+        ss << "getTag: unknown tag name " << name << ".";
+        throw escript::ValueError(ss.str());
+    }
+    return it->second;
+}
+
+bool DudleyDomain::isValidTagName(const string& name) const
+{
+    return (m_tagMap.count(name) > 0);
+}
+
+string DudleyDomain::showTagNames() const
+{
+    stringstream ss;
+    TagMap::const_iterator it = m_tagMap.begin();
+    while (it != m_tagMap.end()) {
+        ss << it->first;
+        ++it;
+        if (it != m_tagMap.end())
+            ss << ", ";
+    }
+    return ss.str();
+}
+
+int DudleyDomain::getNumberOfTagsInUse(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+            return m_nodes->tagsInUse.size();
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags");
+        case Elements: // fall through
+        case ReducedElements:
+            return m_elements->tagsInUse.size();
+        case FaceElements: // fall through
+        case ReducedFaceElements:
+            return m_faceElements->tagsInUse.size();
+        case Points:
+            return m_points->tagsInUse.size();
+    }
+    stringstream ss;
+    ss << "Dudley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+const int* DudleyDomain::borrowListOfTagsInUse(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+            if (m_nodes->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_nodes->tagsInUse[0];
+        case DegreesOfFreedom:
+            throw DudleyException("DegreesOfFreedom does not support tags");
+        case Elements: // fall through
+        case ReducedElements:
+            if (m_elements->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_elements->tagsInUse[0];
+        case FaceElements: // fall through
+        case ReducedFaceElements:
+            if (m_faceElements->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_faceElements->tagsInUse[0];
+        case Points:
+            if (m_points->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_points->tagsInUse[0];
+    }
+    stringstream ss;
+    ss << "Dudley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+
+bool DudleyDomain::canTag(int functionSpaceCode) const
+{
+    switch(functionSpaceCode) {
+        case Nodes:
+        case Elements:
+        case ReducedElements:
+        case FaceElements:
+        case ReducedFaceElements:
+        case Points:
+            return true;
+        default:
+            return false;
+    }
+}
+
+DudleyDomain::StatusType DudleyDomain::getStatus() const
+{
+    return m_nodes->status;
+}
+
+int DudleyDomain::getApproximationOrder(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+        case DegreesOfFreedom:
+            return 1;
+        case Elements:
+        case FaceElements:
+        case Points:
+            return 2;
+        case ReducedElements:
+        case ReducedFaceElements:
+            return 0;
+    }
+    stringstream ss;
+    ss << "Dudley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+escript::Data DudleyDomain::randomFill(
+                                const escript::DataTypes::ShapeType& shape,
+                                const escript::FunctionSpace& what, long seed,
+                                const bp::tuple& filter) const
+{
+    escript::Data towipe(0, shape, what, true);
+    // since we just made this object, no sharing is possible and we don't
+    // need to check for exclusive write
+    escript::DataTypes::RealVectorType& dv(towipe.getExpandedVectorReference());
+    escript::randomFillArray(seed, &dv[0], dv.size());
+    return towipe;
+}
+
+/// prepares the mesh for further use
+void DudleyDomain::prepare(bool optimize)
+{
+    // first step is to distribute the elements according to a global
+    // distribution of DOF
+    IndexVector distribution(m_mpiInfo->size + 1);
+
+    // first we create dense labeling for the DOFs
+    dim_t newGlobalNumDOFs = m_nodes->createDenseDOFLabeling();
+
+    // create a distribution of the global DOFs and determine the MPI rank
+    // controlling the DOFs on this processor
+    m_mpiInfo->setDistribution(0, newGlobalNumDOFs - 1, &distribution[0]);
+
+    // now the mesh is re-distributed according to the distribution vector
+    // this will redistribute the Nodes and Elements including overlap and
+    // will create an element colouring but will not create any mappings
+    // (see later in this function)
+    distributeByRankOfDOF(distribution);
+
+    // at this stage we are able to start an optimization of the DOF
+    // distribution using ParaMetis. On return distribution is altered and
+    // new DOF IDs have been assigned
+    if (optimize && m_mpiInfo->size > 1) {
+        optimizeDOFDistribution(distribution);
+        distributeByRankOfDOF(distribution);
+    }
+    // the local labelling of the degrees of freedom is optimized
+    if (optimize) {
+        optimizeDOFLabeling(distribution);
+    }
+
+    // rearrange elements with the aim of bringing elements closer to memory
+    // locations of the nodes (distributed shared memory!):
+    optimizeElementOrdering();
+
+    // create the global indices
+    IndexVector nodeDistribution(m_mpiInfo->size + 1);
+
+    m_nodes->createDenseNodeLabeling(nodeDistribution, distribution);
+    // create the missing mappings
+    createMappings(distribution, nodeDistribution);
+
+    updateTagList();
+}
+
+/// tries to reduce the number of colours for all element files
+void DudleyDomain::createColoring(const index_t* node_localDOF_map)
+{
+    m_elements->createColoring(m_nodes->getNumNodes(), node_localDOF_map);
+    m_faceElements->createColoring(m_nodes->getNumNodes(), node_localDOF_map);
+    m_points->createColoring(m_nodes->getNumNodes(), node_localDOF_map);
+}
+
+/// redistributes elements to minimize communication during assemblage
+void DudleyDomain::optimizeElementOrdering()
+{
+    m_elements->optimizeOrdering();
+    m_faceElements->optimizeOrdering();
+    m_points->optimizeOrdering();
+}
+
+/// regenerates list of tags in use for node file and element files
+void DudleyDomain::updateTagList()
+{
+    m_nodes->updateTagList();
+    m_elements->updateTagList();
+    m_faceElements->updateTagList();
+    m_points->updateTagList();
+}
+
+} // end of namespace
+
diff --git a/dudley/src/DudleyDomain.h b/dudley/src/DudleyDomain.h
new file mode 100644
index 0000000..760284d
--- /dev/null
+++ b/dudley/src/DudleyDomain.h
@@ -0,0 +1,743 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __DUDLEY_DOMAIN_H__
+#define __DUDLEY_DOMAIN_H__
+
+/****************************************************************************
+
+   Dudley: Domain
+
+   A mesh is built from nodes and elements which describe the domain, surface,
+   and point sources (the latter are needed to establish links with other
+   codes, in particular particle codes). The nodes are stored in a NodeFile
+   and elements in ElementFiles. Dudley domains have three ElementFiles
+   containing the elements, surface and point sources, respectively.
+   Notice that the surface elements do not necessarily cover the entire
+   surface of the domain.
+
+   The element type is either Tri3 or Tet4 depending on dimensionality
+   and also determines the type of surface elements to be used.
+
+   The numbering of the nodes starts with 0.
+
+   Important: it is assumed that every node appears in at least one element or
+   surface element and that any node used in an element, surface element or as
+   a point is specified in the NodeFile, see also resolveNodeIds.
+
+   All nodes and elements are tagged. The tag allows to group nodes and
+   elements. A typical application is to mark surface elements on a
+   certain portion of the domain with the same tag. All these surface
+   elements can then be assigned the same value e.g. for the pressure.
+
+*****************************************************************************/
+
+#include <dudley/Dudley.h>
+#include <dudley/ElementFile.h>
+#include <dudley/NodeFile.h>
+#include <dudley/Util.h>
+
+#include <escript/AbstractContinuousDomain.h>
+#include <escript/FunctionSpace.h>
+#include <escript/FunctionSpaceFactory.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrixPattern.h>
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/types.h>
+#endif
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace dudley {
+
+typedef std::map<std::string, int> TagMap;
+
+enum SystemMatrixType {
+    SMT_PASO = 1<<8,
+    SMT_TRILINOS = 1<<10,
+    SMT_COMPLEX = 1<<16,
+    SMT_UNROLL = 1<<17
+};
+
+/**
+    \brief
+    DudleyDomain implements the AbstractContinuousDomain interface for the
+    Dudley library.
+*/
+class DudleyDomain : public escript::AbstractContinuousDomain
+{
+public:
+    /**
+     \brief
+     recovers domain from a dump file
+     \param filename the name of the file
+    */
+    static escript::Domain_ptr load(const std::string& filename);
+
+    /**
+     \brief
+     reads a mesh from a fly file. For MPI parallel runs fans out the mesh
+     to multiple processes.
+     \param mpiInfo the MPI information structure
+     \param fileName the name of the file
+     \param optimize whether to optimize the node labels
+    */
+    static escript::Domain_ptr read(escript::JMPI mpiInfo,
+                                    const std::string& filename, bool optimize);
+
+    /**
+     \brief
+     reads a gmsh mesh file.
+     \param mpiInfo the MPI information structure
+     \param filename the name of the gmsh file
+     \param numDim spatial dimensionality
+     \param optimize whether to optimize the node labels 
+    */
+    static escript::Domain_ptr readGmsh(escript::JMPI mpiInfo,
+                                        const std::string& filename, int numDim,
+                                        bool optimize);
+
+    /**
+     \brief
+     Creates a 2-dimensional rectangular domain.
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr create2D(dim_t NE0, dim_t NE1, double l0,
+                                        double l1, bool optimize,
+                                        escript::JMPI jmpi);
+
+    /**
+     \brief
+     Creates a 3-dimensional rectangular domain.
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param NE2 Input - number of elements in third dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param l2 Input - length of domain in third dimension (depth)
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr create3D(dim_t NE0, dim_t NE1, dim_t NE2,
+                                        double l0, double l1, double l2,
+                                        bool optimize, escript::JMPI jmpi);
+
+    /**
+     \brief
+     Constructor for DudleyDomain
+
+     \param name a descriptive name for the domain
+     \param numDim dimensionality of the domain (2 or 3)
+     \param jmpi shared pointer to MPI Information to be used
+    */
+    DudleyDomain(const std::string& name, int numDim, escript::JMPI jmpi);
+
+    /**
+     \brief
+     Copy constructor.
+    */
+    DudleyDomain(const DudleyDomain& in);
+
+    /**
+     \brief
+     Destructor for DudleyDomain
+    */
+    ~DudleyDomain();
+
+    /**
+     \brief
+     returns a pointer to this domain's node file
+    */
+    NodeFile* getNodes() const { return m_nodes; }
+
+    /**
+     \brief
+     replaces the element file by `elements`
+    */
+    void setElements(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's element file
+    */
+    ElementFile* getElements() const { return m_elements; }
+
+    /**
+     \brief
+     replaces the face element file by `elements`
+    */
+    void setFaceElements(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's face element file
+    */
+    ElementFile* getFaceElements() const { return m_faceElements; }
+
+    /**
+     \brief
+     replaces the point element file by `elements`
+    */
+    void setPoints(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's point (nodal) element file
+    */
+    ElementFile* getPoints() const { return m_points; }
+
+    /**
+     \brief
+     returns a reference to the MPI information wrapper for this domain
+    */
+    virtual escript::JMPI getMPI() const { return m_mpiInfo; }
+
+    /**
+     \brief
+     returns the number of processors used for this domain
+    */
+    virtual int getMPISize() const { return m_mpiInfo->size; }
+
+    /**
+     \brief
+     returns the number MPI rank of this processor
+    */
+    virtual int getMPIRank() const { return m_mpiInfo->rank; }
+
+    /**
+     \brief
+     If compiled for MPI then execute an MPI_Barrier, else do nothing
+    */
+    virtual void MPIBarrier() const;
+
+    /**
+     \brief
+     returns true if on MPI processor 0, else false
+    */
+    virtual bool onMasterProcessor() const { return getMPIRank() == 0; }
+
+    MPI_Comm getMPIComm() const { return m_mpiInfo->comm; }
+
+    /**
+     \brief
+     writes the current mesh to a file with the given name in the fly file
+     format.
+     \param fileName Input - The name of the file to write to.
+    */
+    void write(const std::string& fileName) const;
+
+    /**
+     \brief
+     \param full whether to include coordinate values and id's
+    */
+    void Print_Mesh_Info(bool full=false) const;
+
+    /**
+     \brief
+     dumps the mesh to a file with the given name.
+     \param fileName Input - The name of the file
+    */
+    void dump(const std::string& fileName) const;
+
+    /**
+     \brief
+     Return the tag key for the given sample number.
+     \param functionSpaceType Input - The function space type.
+     \param sampleNo Input - The sample number.
+    */
+    int getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const;
+
+    /**
+     \brief
+     Return the reference number of  the given sample number.
+     \param functionSpaceType Input - The function space type.
+    */
+    const index_t* borrowSampleReferenceIDs(int functionSpaceType) const;
+
+    /**
+     \brief
+     Returns true if the given integer is a valid function space type
+     for this domain.
+    */
+    virtual bool isValidFunctionSpaceType(int functionSpaceType) const;
+
+    /**
+     \brief
+     Return a description for this domain
+    */
+    virtual std::string getDescription() const;
+
+    /**
+     \brief
+     Return a description for the given function space type code
+    */
+    virtual std::string functionSpaceTypeAsString(int functionSpaceType) const;
+
+    /**
+     \brief
+     Build the table of function space type names
+    */
+    void setFunctionSpaceTypeNames();
+
+    /**
+     \brief
+     Return a continuous FunctionSpace code
+    */
+    virtual int getContinuousFunctionCode() const;
+
+    /**
+     \brief
+     Return a continuous on reduced order nodes FunctionSpace code
+    */
+    virtual int getReducedContinuousFunctionCode() const;
+
+    /**
+     \brief
+     Return a function FunctionSpace code
+    */
+    virtual int getFunctionCode() const;
+
+    /**
+     \brief
+     Return a function with reduced integration order FunctionSpace code
+    */
+    virtual int getReducedFunctionCode() const;
+
+    /**
+     \brief
+     Return a function on boundary FunctionSpace code
+    */
+    virtual int getFunctionOnBoundaryCode() const;
+
+    /**
+     \brief
+     Return a function on boundary with reduced integration order FunctionSpace code
+    */
+    virtual int getReducedFunctionOnBoundaryCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactZero code
+    */
+    virtual int getFunctionOnContactZeroCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactZero code  with reduced integration order
+    */
+    virtual int getReducedFunctionOnContactZeroCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactOne code
+    */
+    virtual int getFunctionOnContactOneCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactOne code  with reduced integration order
+    */
+    virtual int getReducedFunctionOnContactOneCode() const;
+
+    /**
+     \brief
+     Return a Solution code
+    */
+    virtual int getSolutionCode() const;
+
+    /**
+     \brief
+     Return a ReducedSolution code
+    */
+    virtual int getReducedSolutionCode() const;
+
+    /**
+     \brief
+     Return a DiracDeltaFunctions code
+    */
+    virtual int getDiracDeltaFunctionsCode() const;
+
+    /**
+     \brief
+    */
+    typedef std::map<int, std::string> FunctionSpaceNamesMapType;
+
+    /**
+     \brief
+    */
+    virtual int getDim() const { return m_nodes->numDim; }
+
+    /**
+     \brief
+      Returns a status indicator of the domain. The status identifier should be unique over
+      the live time if the object but may be updated if changes to the domain happen, e.g.
+      modifications to its geometry.
+    */
+    virtual StatusType getStatus() const;
+
+    /**
+     \brief
+     Return the number of data points summed across all MPI processes
+    */
+    virtual dim_t getNumDataPointsGlobal() const;
+
+    /**
+     \brief
+     Return the number of data points per sample, and the number of samples as a pair.
+     \param functionSpaceCode Input -
+    */
+    virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
+
+    /**
+     \brief
+     copies the location of data points into arg. The domain of arg has to match this.
+     has to be implemented by the actual Domain adapter.
+    */
+    virtual void setToX(escript::Data& arg) const;
+
+    /**
+     \brief
+     sets a map from a clear tag name to a tag key
+     \param name Input - tag name.
+     \param tag Input - tag key.
+    */
+    virtual void setTagMap(const std::string& name, int tag);
+
+    /**
+     \brief
+     Return the tag key for tag name.
+     \param name Input - tag name
+    */
+    virtual int getTag(const std::string& name) const;
+
+    /**
+     \brief
+     Returns true if name is a defined tag name.
+     \param name Input - tag name to be checked.
+    */
+    virtual bool isValidTagName(const std::string& name) const;
+
+    /**
+     \brief
+     Returns all tag names in a single string sperated by commas
+    */
+    virtual std::string showTagNames() const;
+
+    /**
+     \brief
+     assigns new location to the domain
+    */
+    virtual void setNewX(const escript::Data& arg);
+
+    /**
+     \brief
+     interpolates data given on source onto target where source and target have to be given on the same domain.
+    */
+    virtual void interpolateOnDomain(escript::Data& target,
+                                     const escript::Data& source) const;
+
+    virtual bool probeInterpolationOnDomain(int functionSpaceType_source,
+                                           int functionSpaceType_target) const;
+
+    virtual signed char preferredInterpolationOnDomain(int functionSpaceType_source, int functionSpaceType_target) const;
+
+    /**
+    \brief given a vector of FunctionSpace typecodes, pass back a code which then can all be interpolated to.
+    \return true is result is valid, false if not
+    */
+    bool commonFunctionSpace(const std::vector<int>& fs, int& resultcode) const;
+
+    /**
+     \brief
+     interpolates data given on source onto target where source and target are given on different domains.
+    */
+    virtual void interpolateAcross(escript::Data& target, const escript::Data& source) const;
+
+    /**
+     \brief determines whether interpolation from source to target is possible.
+    */
+    virtual bool probeInterpolationAcross(int functionSpaceType_source,
+                                  const escript::AbstractDomain& targetDomain,
+                                  int functionSpaceType_target) const;
+
+    /**
+     \brief
+     copies the surface normals at data points into out. The actual function space to be considered
+     is defined by out. out has to be defined on this.
+    */
+    virtual void setToNormal(escript::Data& out) const;
+
+    /**
+     \brief
+     copies the size of samples into out. The actual function space to be considered
+     is defined by out. out has to be defined on this.
+    */
+    virtual void setToSize(escript::Data& out) const;
+
+    /**
+     \brief
+     copies the gradient of arg into grad. The actual function space to be considered
+     for the gradient is defined by grad. arg and grad have to be defined on this.
+    */
+    virtual void setToGradient(escript::Data& grad, const escript::Data& arg) const;
+
+    /**
+     \brief
+     copies the integrals of the function defined by arg into integrals.
+     arg has to be defined on this.
+    */
+    virtual void setToIntegrals(std::vector<double>& integrals, const escript::Data& arg) const;
+
+    /**
+     \brief
+     return the identifier of the matrix type to be used for the global
+     stiffness matrix when a particular solver, package, preconditioner,
+     and symmetric matrix is used.
+
+     \param options a SolverBuddy instance with the desired options set
+    */
+    virtual int getSystemMatrixTypeId(const boost::python::object& options) const;
+
+    /**
+     \brief
+     return the identifier of the transport problem type to be used when a particular solver, perconditioner, package
+     and symmetric matrix is used.
+     \param solver
+     \param preconditioner
+     \param package
+     \param symmetry
+    */
+    virtual int getTransportTypeId(int solver, int preconditioner, int package,
+                                   bool symmetry) const;
+
+    /**
+     \brief
+     returns true if data on this domain and a function space of type functionSpaceCode has to
+     considered as cell centered data.
+    */
+    virtual bool isCellOriented(int functionSpaceCode) const;
+
+    virtual bool ownSample(int fsCode, index_t id) const;
+
+    /**
+     \brief
+     adds a PDE onto the stiffness matrix mat and a rhs
+    */
+    virtual void addPDEToSystem(
+                     escript::AbstractSystemMatrix& mat, escript::Data& rhs,
+                     const escript::Data& A, const escript::Data& B,
+                     const escript::Data& C, const escript::Data& D,
+                     const escript::Data& X, const escript::Data& Y,
+                     const escript::Data& d, const escript::Data& y,
+                     const escript::Data& d_contact,
+                     const escript::Data& y_contact,
+                     const escript::Data& d_dirac,
+                     const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     adds a PDE onto the lumped stiffness matrix matrix
+    */
+    virtual void addPDEToLumpedSystem(escript::Data& mat,
+                                      const escript::Data& D,
+                                      const escript::Data& d,
+                                      const escript::Data& d_dirac,
+                                      bool useHRZ) const;
+
+    /**
+     \brief
+     adds a PDE onto the stiffness matrix mat and a rhs
+    */
+    virtual void addPDEToRHS(escript::Data& rhs, const escript::Data& X,
+                             const escript::Data& Y, const escript::Data& y,
+                             const escript::Data& y_contact,
+                             const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     adds a PDE onto a transport problem
+    */
+    virtual void addPDEToTransportProblem(
+                     escript::AbstractTransportProblem& tp,
+                     escript::Data& source, const escript::Data& M,
+                     const escript::Data& A, const escript::Data& B,
+                     const escript::Data& C, const escript::Data& D,
+                     const escript::Data& X, const escript::Data& Y,
+                     const escript::Data& d, const escript::Data& y,
+                     const escript::Data& d_contact,
+                     const escript::Data& y_contact,
+                     const escript::Data& d_dirac,
+                     const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     creates a stiffness matrix and initializes it with zeros
+    */
+    escript::ASM_ptr newSystemMatrix(
+                      int row_blocksize,
+                      const escript::FunctionSpace& row_functionspace,
+                      int column_blocksize,
+                      const escript::FunctionSpace& column_functionspace,
+                      int type) const;
+
+    /**
+     \brief
+      creates a TransportProblem
+    */
+    escript::ATP_ptr newTransportProblem(int blocksize,
+                                   const escript::FunctionSpace& functionspace,
+                                   int type) const;
+
+    /**
+     \brief returns locations in the FEM nodes
+    */
+    virtual escript::Data getX() const;
+
+    /**
+     \brief returns boundary normals at the quadrature point on the face
+            elements
+    */
+    virtual escript::Data getNormal() const;
+
+    /**
+     \brief returns the element size
+    */
+    virtual escript::Data getSize() const;
+
+    /**
+     \brief comparison operators
+    */
+    virtual bool operator==(const escript::AbstractDomain& other) const;
+    virtual bool operator!=(const escript::AbstractDomain& other) const;
+
+    /**
+     \brief assigns new tag newTag to all samples of functionspace with a
+            positive value of mask for any its sample point.
+    */
+    virtual void setTags(int functionSpaceType, int newTag,
+                         const escript::Data& mask) const;
+
+    /**
+      \brief
+       returns the number of tags in use and a pointer to an array with the
+       number of tags in use
+    */
+    virtual int getNumberOfTagsInUse(int functionSpaceCode) const;
+
+    virtual const int* borrowListOfTagsInUse(int functionSpaceCode) const;
+
+    /**
+     \brief Checks if this domain allows tags for the specified
+            functionSpace code.
+    */
+    virtual bool canTag(int functionSpaceCode) const;
+
+    /**
+     \brief returns the approximation order used for a function space functionSpaceCode
+    */
+    virtual int getApproximationOrder(int functionSpaceCode) const;
+
+    virtual bool supportsContactElements() const { return false; }
+
+    virtual escript::Data randomFill(const escript::DataTypes::ShapeType& shape,
+                                const escript::FunctionSpace& what, long seed,
+                                const boost::python::tuple& filter) const;
+
+    void createMappings(const std::vector<index_t>& dofDistribution,
+                        const std::vector<index_t>& nodeDistribution);
+
+    /// assigns new node reference numbers to all element files.
+    /// If k is the old node, the new node is newNode[k-offset].
+    void relabelElementNodes(const index_t* newNode, index_t offset);
+
+#ifdef ESYS_HAVE_PASO
+    /// returns a reference to the paso matrix pattern
+    paso::SystemMatrixPattern_ptr getPasoPattern() const;
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// returns a Trilinos CRS graph suitable to build a sparse matrix.
+    esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph() const {
+        return m_nodes->getTrilinosGraph();
+    }
+#endif
+
+private:
+    void prepare(bool optimize);
+
+    /// Initially the element nodes refer to the numbering defined by the
+    /// global id assigned to the nodes in the NodeFile. It is also not ensured
+    /// that all nodes referred by an element are actually available on the
+    /// process. At the output, a local node labeling is used and all nodes are
+    /// available. In particular the numbering of the element nodes is between
+    /// 0 and Nodes->numNodes.
+    /// The function does not create a distribution of the degrees of freedom.
+    void resolveNodeIds();
+
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrixPattern_ptr makePasoPattern() const;
+#endif
+
+    void createColoring(const index_t* dofMap);
+    void distributeByRankOfDOF(const IndexVector& distribution);
+    void markNodes(std::vector<short>& mask, index_t offset) const;
+    void optimizeDOFDistribution(IndexVector& distribution);
+    void optimizeDOFLabeling(const IndexVector& distribution);
+    void optimizeElementOrdering();
+    void updateTagList();
+    void printElementInfo(const ElementFile* e, const std::string& title,
+                          const std::string& defaultType, bool full) const;
+
+    void writeElementInfo(std::ostream& stream, const ElementFile* e,
+                          const std::string& defaultType) const;
+
+    /// MPI information
+    escript::JMPI m_mpiInfo;
+    /// domain description
+    std::string m_name;
+    /// the table of the nodes
+    NodeFile* m_nodes;
+    /// the table of the elements
+    ElementFile* m_elements;
+    /// the table of face elements
+    ElementFile* m_faceElements;
+    /// the table of points (treated as elements of dimension 0)
+    ElementFile* m_points;
+    /// the tag map mapping names to tag keys
+    TagMap m_tagMap;
+#ifdef ESYS_HAVE_PASO
+    // pointer to the sparse matrix pattern
+    mutable paso::SystemMatrixPattern_ptr pasoPattern;
+#endif
+
+    static FunctionSpaceNamesMapType m_functionSpaceTypeNames;
+};
+
+} // end of namespace
+
+#endif // __DUDLEY_DOMAIN_H__
+
diff --git a/dudley/test/MeshAdapterTestCase.h b/dudley/src/DudleyException.h
similarity index 66%
rename from dudley/test/MeshAdapterTestCase.h
rename to dudley/src/DudleyException.h
index 84f3f02..f68f164 100644
--- a/dudley/test/MeshAdapterTestCase.h
+++ b/dudley/src/DudleyException.h
@@ -14,20 +14,19 @@
 *
 *****************************************************************************/
 
+#ifndef __DUDLEY_EXCEPTION_H__
+#define __DUDLEY_EXCEPTION_H__
 
-#if !defined MeshAdapterTestCase_20040705_H
-#define MeshAdapterTestCase_20040705_H
+#include <escript/EsysException.h>
 
-#include <cppunit/TestFixture.h>
-#include <cppunit/TestSuite.h>
+namespace dudley {
 
-class MeshAdapterTestCase : public CppUnit::TestFixture
+class DudleyException : public escript::EsysException
 {
 public:
-  void testAll();
-
-  static CppUnit::TestSuite* suite();
+    DudleyException(const std::string& str) : escript::EsysException(str) {}
 };
 
-#endif
+} // end of namespace
 
+#endif // __DUDLEY_EXCEPTION_H__
diff --git a/dudley/src/DudleyVersion.h b/dudley/src/DudleyVersion.h
index 17a7619..ebccbdf 100644
--- a/dudley/src/DudleyVersion.h
+++ b/dudley/src/DudleyVersion.h
@@ -17,6 +17,7 @@
 #ifndef INC_DUDLEYVERSION
 #define INC_DUDLEYVERSION
 
-char Dudley_Version[] = "$Revision: 6109 $";
+char Dudley_Version[] = "$Revision: 6119 $";
 
 #endif
+
diff --git a/dudley/src/ElementFile.cpp b/dudley/src/ElementFile.cpp
index eb75579..ea0d655 100644
--- a/dudley/src/ElementFile.cpp
+++ b/dudley/src/ElementFile.cpp
@@ -14,175 +14,231 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Dudley: ElementFile */
-
-/*   allocates an element file to hold elements of type id and with integration order order. */
-/*   use Dudley_Mesh_allocElementTable to allocate the element table (Id,Nodes,Tag,Owner). */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "ElementFile.h"
 #include "ShapeTable.h"
 
-/************************************************************************************/
-
-Dudley_ElementFile *Dudley_ElementFile_alloc(Dudley_ElementTypeId etype, esysUtils::JMPI& MPIInfo)
+#include <escript/index.h>
+
+namespace dudley {
+
+ElementFile::ElementFile(ElementTypeId type, escript::JMPI mpiInfo) :
+    MPIInfo(mpiInfo),
+    numElements(0),
+    Id(NULL),
+    Tag(NULL),
+    Owner(NULL),
+    Nodes(NULL),
+    Color(NULL),
+    minColor(0),
+    maxColor(-1),
+    etype(type)
 {
-    Dudley_ElementFile *out;
-
-    if (!Dudley_noError())
-	return NULL;
-
-    /*  allocate the return value */
-
-    out = new Dudley_ElementFile;
-    if (Dudley_checkPtr(out))
-	return NULL;
-    out->numElements = 0;
-    out->Id = NULL;
-    out->Nodes = NULL;
-    out->Tag = NULL;
-    out->Color = NULL;
-    out->minColor = 0;
-    out->maxColor = -1;
-    out->jacobeans = NULL;
-    out->jacobeans_reducedQ = NULL;
-
-    out->Owner = NULL;
-    out->numTagsInUse = 0;
-    out->tagsInUse = NULL;
-
-    out->MPIInfo = MPIInfo;
-
-    out->jacobeans = Dudley_ElementFile_Jacobeans_alloc();
-    out->jacobeans_reducedQ = Dudley_ElementFile_Jacobeans_alloc();
-
-    if (!Dudley_noError())
-    {
-	Dudley_ElementFile_free(out);
-	return NULL;
-    }
-    out->etype = etype;
-    out->numDim = Dims[out->etype];
-    out->numNodes = out->numDim + 1;
-    out->numLocalDim = localDims[out->etype];
-    out->numShapes = out->numLocalDim + 1;
-    out->ename = getElementName(out->etype);
-    return out;
+    jacobians = new ElementFile_Jacobians();
+    jacobians_reducedQ = new ElementFile_Jacobians();
+
+    numDim = Dims[type];
+    numNodes = numDim + 1;
+    numLocalDim = localDims[type];
+    numShapes = numLocalDim + 1;
+    ename = getElementName(type);
 }
 
-/*  deallocates an element file: */
+ElementFile::~ElementFile()
+{
+    freeTable();
+    delete jacobians;
+    delete jacobians_reducedQ;
+}
 
-void Dudley_ElementFile_free(Dudley_ElementFile * in)
+void ElementFile::allocTable(dim_t NE)
 {
-    if (in != NULL)
-    {
-	Dudley_ElementFile_freeTable(in);
-	Dudley_ElementFile_Jacobeans_dealloc(in->jacobeans);
-	Dudley_ElementFile_Jacobeans_dealloc(in->jacobeans_reducedQ);
-	delete in;
+    if (numElements > 0)
+        freeTable();
+
+    numElements = NE;
+    Owner = new int[numElements];
+    Id = new index_t[numElements];
+    Nodes = new index_t[numElements * numNodes];
+    Tag = new int[numElements];
+    Color = new index_t[numElements];
+
+    // this initialization makes sure that data are located on the right
+    // processor
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        for (int i = 0; i < numNodes; i++)
+            Nodes[INDEX2(i, e, numNodes)] = -1;
+        Owner[e] = -1;
+        Id[e] = -1;
+        Tag[e] = -1;
+        Color[e] = -1;
     }
+    maxColor = -1;
+    minColor = 0;
 }
 
-void Dudley_ElementFile_setElementDistribution(Dudley_ElementFile * in, dim_t * distribution)
+void ElementFile::freeTable()
 {
-    dim_t local_num_elements, e, num_elements = 0;
-    Esys_MPI_rank myRank;
-    if (in == NULL)
-    {
-	distribution[0] = num_elements;
+    delete[] Owner;
+    delete[] Id;
+    delete[] Nodes;
+    delete[] Tag;
+    delete[] Color;
+    tagsInUse.clear();
+    numElements = 0;
+    maxColor = -1;
+    minColor = 0;
+}
+
+void ElementFile::copyTable(index_t offset, index_t nodeOffset,
+                            index_t idOffset, const ElementFile* in)
+{
+    const int NN_in = in->numNodes;
+    if (NN_in > numNodes) {
+        throw DudleyException("ElementFile::copyTable: dimensions of element files don't match.");
+    }
+
+    if (MPIInfo->comm != in->MPIInfo->comm) {
+        throw DudleyException("ElementFile::copyTable: MPI communicators of element files don't match.");
     }
-    else
-    {
-	if (in->MPIInfo->size > 1)
-	{
-	    num_elements = 0;
-	    myRank = in->MPIInfo->rank;
-#pragma omp parallel private(local_num_elements)
-	    {
-		local_num_elements = 0;
-#pragma omp for private(e)
-		for (e = 0; e < in->numElements; e++)
-		{
-		    if (in->Owner[e] == myRank)
-			local_num_elements++;
-		}
-#pragma omp critical
-		num_elements += local_num_elements;
-	    }
-#ifdef ESYS_MPI
-	    MPI_Allgather(&num_elements, 1, MPI_INT, distribution, 1, MPI_INT, in->MPIInfo->comm);
-#else
-	    distribution[0] = num_elements;
-#endif
-	}
-	else
-	{
-	    distribution[0] = in->numElements;
-	}
+
+#pragma omp parallel for
+    for (index_t n = 0; n < in->numElements; n++) {
+        Owner[offset + n] = in->Owner[n];
+        Id[offset + n] = in->Id[n] + idOffset;
+        Tag[offset + n] = in->Tag[n];
+        for (int i = 0; i < numNodes; i++)
+            Nodes[INDEX2(i, offset + n, numNodes)] =
+                            in->Nodes[INDEX2(i, n, NN_in)] + nodeOffset;
     }
 }
 
-dim_t Dudley_ElementFile_getGlobalNumElements(Dudley_ElementFile * in)
+void ElementFile::print(const index_t* nodesId) const
 {
-    dim_t size, *distribution = NULL, out, p;
-    if (in == NULL)
-    {
-	return 0;
+    std::cout << "=== " << ename << ":\nnumber of elements=" << numElements
+              << "\ncolor range=[" << minColor << "," << maxColor << "]\n";
+
+    if (numElements > 0) {
+        std::cout << "Id,Tag,Owner,Color,Nodes" << std::endl;
+        for (index_t i = 0; i < numElements; i++) {
+            std::cout << Id[i] << "," << Tag[i] << ","
+                << Owner[i] << "," << Color[i] << ",";
+            for (int j = 0; j < numNodes; j++)
+                std::cout << " " << nodesId[Nodes[INDEX2(j, i, numNodes)]];
+            std::cout << std::endl;
+        }
     }
-    else
-    {
-	size = in->MPIInfo->size;
-	distribution = new  dim_t[size];
-	Dudley_ElementFile_setElementDistribution(in, distribution);
-	out = 0;
-	for (p = 0; p < size; ++p)
-	    out += distribution[p];
-	delete[] distribution;
-	return out;
+}
+
+void ElementFile::gather(const index_t* index, const ElementFile* in)
+{
+    const int NN_in = in->numNodes;
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        const index_t k = index[e];
+        Id[e] = in->Id[k];
+        Tag[e] = in->Tag[k];
+        Owner[e] = in->Owner[k];
+        Color[e] = in->Color[k] + maxColor + 1;
+        for (int j = 0; j < std::min(numNodes, NN_in); j++)
+            Nodes[INDEX2(j, e, numNodes)] = in->Nodes[INDEX2(j, k, NN_in)];
     }
+    minColor = std::min(minColor, in->minColor + maxColor + 1);
+    maxColor = std::max(maxColor, in->maxColor + maxColor + 1);
+}
+
+void ElementFile::swapTable(ElementFile* other)
+{
+    std::swap(numElements, other->numElements);
+    std::swap(Owner, other->Owner);
+    std::swap(Id, other->Id);
+    std::swap(Nodes, other->Nodes);
+    std::swap(Tag, other->Tag);
+    std::swap(Color, other->Color);
+    std::swap(minColor, other->minColor);
+    std::swap(maxColor, other->maxColor);
+    std::swap(tagsInUse, other->tagsInUse);
 }
 
-dim_t Dudley_ElementFile_getMyNumElements(Dudley_ElementFile * in)
+void ElementFile::optimizeOrdering()
 {
-    dim_t size, *distribution = NULL, out;
-    if (in == NULL)
-    {
-	return 0;
+    if (numElements < 1)
+        return;
+
+    util::ValueAndIndexList item_list(numElements);
+    index_t* index = new index_t[numElements];
+    ElementFile* out = new ElementFile(etype, MPIInfo);
+    out->allocTable(numElements);
+
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        std::pair<index_t,index_t> entry(Nodes[INDEX2(0, e, numNodes)], e);
+        for (int i = 1; i < numNodes; i++)
+            entry.first = std::min(entry.first, Nodes[INDEX2(i, e, numNodes)]);
+        item_list[e] = entry;
     }
-    else
-    {
-	size = in->MPIInfo->size;
-	distribution = new  dim_t[size];
-	Dudley_ElementFile_setElementDistribution(in, distribution);
-	out = distribution[in->MPIInfo->rank];
-	delete[] distribution;
-	return out;
+    util::sortValueAndIndex(item_list);
+
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++)
+        index[e] = item_list[e].second;
+
+    out->gather(index, this);
+    swapTable(out);
+    delete out;
+    delete[] index;
+}
+
+void ElementFile::setTags(int newTag, const escript::Data& mask)
+{
+    const int numQuad = hasReducedIntegrationOrder(mask) ? 1 : numNodes;
+
+    if (1 != mask.getDataPointSize()) {
+        throw DudleyException("ElementFile::setTags: number of components of mask must be 1.");
+    } else if (!mask.numSamplesEqual(numQuad, numElements)) {
+        throw DudleyException("ElementFile::setTags: illegal number of samples of mask Data object");
     }
 
+    if (mask.actsExpanded()) {
+#pragma omp parallel for
+        for (index_t n = 0; n < numElements; n++) {
+            if (mask.getSampleDataRO(n)[0] > 0)
+                Tag[n] = newTag;
+        }
+    } else {
+#pragma omp parallel for
+        for (index_t n = 0; n < numElements; n++) {
+            const double* mask_array = mask.getSampleDataRO(n);
+            bool check = false;
+            for (int q = 0; q < numQuad; q++)
+                check = check || mask_array[q];
+            if (check)
+                Tag[n] = newTag;
+        }
+    }
+    updateTagList();
 }
 
-index_t Dudley_ElementFile_getFirstElement(Dudley_ElementFile * in)
+void ElementFile::markNodes(std::vector<short>& mask, index_t offset) const
 {
-    dim_t size, *distribution = NULL, out, p;
-    if (in == NULL)
-    {
-	return 0;
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++) {
+        for (int i = 0; i < numNodes; i++) {
+            mask[Nodes[INDEX2(i, e, numNodes)] - offset] = 1;
+        }
     }
-    else
-    {
-	size = in->MPIInfo->size;
-	distribution = new  dim_t[size];
-	Dudley_ElementFile_setElementDistribution(in, distribution);
-	out = 0;
-	for (p = 0; p < in->MPIInfo->rank; ++p)
-	    out += distribution[p];
-	delete[] distribution;
-	return out;
+}
+
+void ElementFile::relabelNodes(const index_t* newNode, index_t offset)
+{
+#pragma omp parallel for
+    for (index_t j = 0; j < numElements; j++) {
+        for (int i = 0; i < numNodes; i++) {
+            Nodes[INDEX2(i, j, numNodes)] =
+                              newNode[Nodes[INDEX2(i, j, numNodes)] - offset];
+        }
     }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/ElementFile.h b/dudley/src/ElementFile.h
index 0682fe1..db2e750 100644
--- a/dudley/src/ElementFile.h
+++ b/dudley/src/ElementFile.h
@@ -14,103 +14,170 @@
 *
 *****************************************************************************/
 
-#ifndef INC_DUDLEY_ELEMENTFILE
-#define INC_DUDLEY_ELEMENTFILE
+#ifndef __DUDLEY_ELEMENTFILE_H__
+#define __DUDLEY_ELEMENTFILE_H__
 
 #include "Dudley.h"
 #include "NodeFile.h"
 #include "ElementType.h"
-#include "escript/DataC.h"
-
-#ifdef ESYS_MPI
-#include "esysUtils/Esys_MPI.h"
-#endif
-
-typedef struct {
-    Dudley_Status_t status;	/* status of mesh when jacobeans where updated last time */
-    dim_t numDim;		/* spatial dimension */
-    dim_t numQuad;		/* number of quadrature nodes used to calculate jacobeans */
-    dim_t numShapes;		/* number of shape functions */
-    dim_t numElements;		/* number of elements */
-    double *absD;		/* used to compute volume */
-    double quadweight;		/* used to compute volume */
-    double *DSDX;		/* derivatives of shape functions in global coordinates at quadrature points */
-} Dudley_ElementFile_Jacobeans;
-
-struct Dudley_ElementFile {
-    esysUtils::JMPI MPIInfo;
-    Esys_MPI_rank *Owner;
-
-    dim_t numElements;		/* number of elements. */
-
-    index_t *Id;		/* Id[i] is the id nmber of
-				   node i. this number is not
-				   used but useful when
-				   elements are resorted. in
-				   the entire code the term
-				   'element id' refers to i
-				   but nor to Id[i] if not
-				   explicitly stated
-				   otherwise. */
-
-    index_t *Tag;		/* Tag[i] is the tag of element i. */
-
-    index_t *tagsInUse;		/* array of tags which are actually used */
-    dim_t numTagsInUse;		/* number of tags used */
-
-    dim_t numNodes;		/* number of nodes per element */
-    index_t *Nodes;		/* Nodes[INDEX(k, i, numNodes)]
-				   is the k-the node in the
-				   i-the element. note that
-				   in the way the nodes are
-				   ordered Nodes[INDEX(k, i, numNodes)
-				   is k-the node of element i
-				   when refering to the
-				   linear version of the
-				   mesh. */
-    index_t minColor;		/* minimum color */
-    index_t maxColor;		/* maximum color */
-    index_t *Color;		/* assigns each element a color. elements with the same color     
-				   are don't share a node so they can be processed simultaneously 
-				   at anytime Color must provide a valid value. In any case one can set  
-				   Color[e]=e  for all e */
-
-    Dudley_ElementFile_Jacobeans *jacobeans;	/* jacobeans of the shape function used for solution approximation */
-    Dudley_ElementFile_Jacobeans *jacobeans_reducedQ;	/* jacobeans of the shape function used for solution approximation for reduced integration order */
-    dim_t numDim;		/* spatial dimension of the domain */
-    dim_t numLocalDim;		/* dimension of the element eg 2 for A line in 2D or 3D */
-    Dudley_ElementTypeId etype;	/* element type */
-    const char *ename;		/* name of element type */
-    dim_t numShapes;		/* number of shape functions */
+#include "Util.h"
+
+namespace dudley {
+
+struct ElementFile_Jacobians
+{
+    ElementFile_Jacobians();
+    ~ElementFile_Jacobians();
+
+    /// status of mesh when jacobians were updated last time
+    int status;
+    /// number of spatial dimensions
+    int numDim;
+    /// number of quadrature nodes used to calculate jacobians
+    int numQuad;
+    /// number of shape functions
+    int numShapes;
+    /// number of elements
+    dim_t numElements;
+    /// used to compute volume
+    double *absD;
+    /// used to compute volume
+    double quadweight;
+    /// derivatives of shape functions in global coordinates at quadrature
+    /// points
+    double* DSDX;
 };
 
-typedef struct Dudley_ElementFile Dudley_ElementFile;
-Dudley_ElementFile *Dudley_ElementFile_alloc(Dudley_ElementTypeId etype, esysUtils::JMPI& MPIInfo);
-void Dudley_ElementFile_free(Dudley_ElementFile *);
-void Dudley_ElementFile_allocTable(Dudley_ElementFile *, dim_t);
-void Dudley_ElementFile_freeTable(Dudley_ElementFile *);
-void Dudley_ElementFile_setElementDistribution(Dudley_ElementFile * in, dim_t * distribution);
-dim_t Dudley_ElementFile_getGlobalNumElements(Dudley_ElementFile * in);
-dim_t Dudley_ElementFile_getMyNumElements(Dudley_ElementFile * in);
-index_t Dudley_ElementFile_getFirstElement(Dudley_ElementFile * in);
-void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile * self, Esys_MPI_rank * mpiRankOfDOF, index_t * Id);
-
-void Dudley_ElementFile_createColoring(Dudley_ElementFile * in, dim_t numNodes, dim_t * degreeOfFreedom);
-void Dudley_ElementFile_optimizeOrdering(Dudley_ElementFile ** in);
-void Dudley_ElementFile_setNodeRange(dim_t *, dim_t *, Dudley_ElementFile *);
-void Dudley_ElementFile_relableNodes(dim_t *, dim_t, Dudley_ElementFile *);
-void Dudley_ElementFile_markNodes(dim_t *, dim_t, dim_t, Dudley_ElementFile *, bool);
-void Dudley_ElementFile_scatter(dim_t *, Dudley_ElementFile *, Dudley_ElementFile *);
-void Dudley_ElementFile_gather(dim_t *, Dudley_ElementFile *, Dudley_ElementFile *);
-void Dudley_ElementFile_copyTable(dim_t, Dudley_ElementFile *, dim_t, dim_t, Dudley_ElementFile *);
-void Dudley_ElementFile_markDOFsConnectedToRange(index_t * mask, index_t offset, index_t marker, index_t firstDOF,
-						 index_t lastDOF, index_t * dofIndex, Dudley_ElementFile * in,
-						 bool useLinear);
-
-void Dudley_ElementFile_setTags(Dudley_ElementFile *, const int, const escript::Data *);
-Dudley_ElementFile_Jacobeans *Dudley_ElementFile_Jacobeans_alloc(void);
-void Dudley_ElementFile_Jacobeans_dealloc(Dudley_ElementFile_Jacobeans *);
-Dudley_ElementFile_Jacobeans *Dudley_ElementFile_borrowJacobeans(Dudley_ElementFile *, Dudley_NodeFile *, bool);
-void Dudley_ElementFile_setTagsInUse(Dudley_ElementFile * in);
-
-#endif				/* #ifndef INC_DUDLEY_ELEMENTFILE */
+class ElementFile
+{
+public:
+    ElementFile(ElementTypeId etype, escript::JMPI mpiInfo);
+    ~ElementFile();
+
+    /// allocates the element table within an element file to hold NE elements
+    void allocTable(dim_t NE);
+
+    /// deallocates the element table within an element file
+    void freeTable();
+
+    /// copies element file `in` into this element file starting from `offset`.
+    /// The elements `offset` to in->numElements+offset-1 will be overwritten.
+    void copyTable(index_t offset, index_t nodeOffset, index_t idOffset,
+                   const ElementFile* in);
+
+    /// prints information about this element file to stdout
+    void print(const index_t* nodesId) const;
+
+    /// redistributes the elements including overlap by rank
+    void distributeByRankOfDOF(const int* mpiRankOfDOF,
+                               const index_t* nodesId);
+
+    /// Tries to reduce the number of colors used to color elements in this
+    /// ElementFile
+    void createColoring(dim_t numNodes, const index_t* degreeOfFreedom);
+
+    /// reorders the elements so that they are stored close to the nodes
+    void optimizeOrdering();
+
+    /// assigns new node reference numbers to the elements.
+    /// If k is the old node, the new node is newNode[k-offset].
+    void relabelNodes(const index_t* newNode, index_t offset);
+
+    void markNodes(std::vector<short>& mask, index_t offset) const;
+
+    /// gathers the elements from the element file `in` using
+    /// index[0:out->elements-1]. `index` has to be between 0 and
+    /// in->numElements-1. A conservative assumption on the colouring is made.
+    void gather(const index_t* index, const ElementFile* in);
+
+    /// sets element tags to newTag where mask > 0
+    void setTags(int newTag, const escript::Data& mask);
+
+    ElementFile_Jacobians* borrowJacobians(const NodeFile* nodes,
+                                           bool reducedOrder) const;
+
+    /// returns the minimum and maximum reference number of nodes
+    /// describing the elements
+    inline std::pair<index_t,index_t> getNodeRange() const;
+
+    inline void updateTagList();
+
+private:
+    void swapTable(ElementFile* other);
+
+public:
+    escript::JMPI MPIInfo;
+
+    /// number of elements
+    dim_t numElements;
+
+    /// Id[i] is the id number of node i. This number is used when elements
+    /// are resorted. In the entire code the term 'element id' refers to i and
+    /// not to Id[i] unless explicitly stated otherwise.
+    index_t* Id;
+
+    /// Tag[i] is the tag of element i
+    int* Tag;
+
+    /// Owner[i] contains the rank that owns element i
+    int* Owner;
+
+    /// array of tags which are actually used
+    std::vector<int> tagsInUse;
+
+    /// number of nodes per element
+    int numNodes;
+
+    /// Nodes[INDEX(k, i, numNodes)] is the k-th node in the i-th element.
+    index_t* Nodes;
+
+    /// assigns each element a color. Elements with the same color don't share
+    /// a node so they can be processed simultaneously. At any time Color must
+    /// provide a valid value. In any case one can set Color[e]=e for all e.
+    index_t* Color;
+
+    /// minimum color value
+    index_t minColor;
+
+    /// maximum color value
+    index_t maxColor;
+
+    /// number of spatial dimensions of the domain
+    int numDim;
+
+    /// dimension of the element e.g. 2 for a line in 2D or 3D
+    int numLocalDim;
+
+    /// element type ID
+    ElementTypeId etype;
+
+    /// name of element type
+    const char *ename;
+
+    /// number of shape functions
+    int numShapes;
+
+private:
+    /// jacobians of the shape function used for solution approximation
+    ElementFile_Jacobians* jacobians;
+
+    /// jacobians of the shape function used for solution approximation for
+    /// reduced integration order
+    ElementFile_Jacobians* jacobians_reducedQ;
+};
+
+inline std::pair<index_t,index_t> ElementFile::getNodeRange() const
+{
+    return util::getMinMaxInt(numNodes, numElements, Nodes);
+}
+
+inline void ElementFile::updateTagList()
+{
+    util::setValuesInUse(Tag, numElements, tagsInUse, MPIInfo);
+}
+
+
+} // namespace dudley
+
+#endif // __DUDLEY_ELEMENTFILE_H__
+
diff --git a/dudley/src/ElementFile_allocTable.cpp b/dudley/src/ElementFile_allocTable.cpp
deleted file mode 100644
index 03c4e55..0000000
--- a/dudley/src/ElementFile_allocTable.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: ElementFile */
-
-/*   allocates and deallocates element table                  */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-#include "Util.h"
-
-/**************************************************************************************************/
-
-/*  allocates the element table within an element file to hold numElements: */
-
-void Dudley_ElementFile_allocTable(Dudley_ElementFile * in, dim_t numElements)
-{
-    index_t *Id2 = NULL, *Nodes2 = NULL, *Tag2 = NULL, *Color2 = NULL;
-    Esys_MPI_rank *Owner2 = NULL;
-    dim_t numNodes, e, i;
-
-    Dudley_resetError();
-    /*  allocate memory: */
-    numNodes = in->numNodes;
-    Owner2 = new  Esys_MPI_rank[numElements];
-    Id2 = new  index_t[numElements];
-    Nodes2 = new  index_t[numElements * in->numNodes];
-    Tag2 = new  index_t[numElements];
-    Color2 = new  index_t[numElements];
-
-    /*  if fine, deallocate the old table and replace by new: */
-
-    if (Dudley_checkPtr(Owner2) || Dudley_checkPtr(Id2) || Dudley_checkPtr(Nodes2) ||
-	Dudley_checkPtr(Tag2) || Dudley_checkPtr(Color2))
-    {
-	delete[] Owner2;
-	delete[] Nodes2;
-	delete[] Id2;
-	delete[] Tag2;
-	delete[] Color2;
-    }
-    else
-    {
-	Dudley_ElementFile_freeTable(in);
-	in->Owner = Owner2;
-	in->numElements = numElements;
-	in->Id = Id2;
-	in->Nodes = Nodes2;
-	in->Tag = Tag2;
-	in->Color = Color2;
-
-	/* this initialization makes sure that data are located on the right processor */
-
-#pragma omp parallel for private(e,i) schedule(static)
-	for (e = 0; e < numElements; e++)
-	{
-	    for (i = 0; i < numNodes; i++)
-		in->Nodes[INDEX2(i, e, numNodes)] = -1;
-	    in->Owner[e] = -1;
-	    in->Id[e] = -1;
-	    in->Tag[e] = -1;
-	    in->Color[e] = -1;
-	}
-	in->maxColor = -1;
-	in->minColor = 0;
-    }
-    return;
-}
-
-void Dudley_ElementFile_setTagsInUse(Dudley_ElementFile * in)
-{
-    index_t *tagsInUse = NULL;
-    dim_t numTagsInUse;
-    if (in != NULL)
-    {
-	Dudley_Util_setValuesInUse(in->Tag, in->numElements, &numTagsInUse, &tagsInUse, in->MPIInfo);
-	if (Dudley_noError())
-	{
-	    delete[] in->tagsInUse;
-	    in->tagsInUse = tagsInUse;
-	    in->numTagsInUse = numTagsInUse;
-	}
-    }
-}
-
-/*  deallocates the element table within an element file: */
-
-void Dudley_ElementFile_freeTable(Dudley_ElementFile * in)
-{
-    delete[] in->Owner;
-    delete[] in->Id;
-    delete[] in->Nodes;
-    delete[] in->Tag;
-    delete[] in->Color;
-    delete[] in->tagsInUse;
-    in->numTagsInUse = 0;
-    in->numElements = 0;
-    in->maxColor = -1;
-    in->minColor = 0;
-}
diff --git a/dudley/src/ElementFile_copyTable.cpp b/dudley/src/ElementFile_copyTable.cpp
deleted file mode 100644
index 41c2dac..0000000
--- a/dudley/src/ElementFile_copyTable.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: ElementFile                                                      */
-
-/* copies element file in into element file out starting from offset          */
-/* the elements offset to in->numElements+offset-1 in out will be overwritten */
-
-/************************************************************************************/
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/**************************************************************************************************/
-
-void Dudley_ElementFile_copyTable(index_t offset, Dudley_ElementFile * out, index_t node_offset, index_t idOffset,
-				  Dudley_ElementFile * in)
-{
-    dim_t i, n;
-    dim_t NN, NN_in;
-    if (in == NULL)
-	return;
-    NN = out->numNodes;
-    NN_in = in->numNodes;
-    if (NN_in > NN)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_ElementFile_copyTable: dimensions of element files don't match.");
-    }
-    if (out->MPIInfo->comm != in->MPIInfo->comm)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_ElementFile_copyTable: MPI communicators of element files don't match.");
-    }
-    if (Dudley_noError())
-    {
-#pragma omp parallel for private(i,n) schedule(static)
-	for (n = 0; n < in->numElements; n++)
-	{
-	    out->Owner[offset + n] = out->Owner[n];
-	    out->Id[offset + n] = in->Id[n] + idOffset;
-	    out->Tag[offset + n] = in->Tag[n];
-	    for (i = 0; i < NN; i++)
-		out->Nodes[INDEX2(i, offset + n, NN)] = in->Nodes[INDEX2(i, n, NN_in)] + node_offset;
-	}
-    }
-}
diff --git a/dudley/src/ElementFile_createColoring.cpp b/dudley/src/ElementFile_createColoring.cpp
index c202b75..ad51357 100644
--- a/dudley/src/ElementFile_createColoring.cpp
+++ b/dudley/src/ElementFile_createColoring.cpp
@@ -14,100 +14,74 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-/*                                                                                                         */
-/*   Dudley: ElementFile                                                                                   */
-/*                                                                                                         */
-/*   This routine tries to reduce the number of colors used to color elements in the Dudley_ElementFile in */
-/*                                                                                                         */
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "ElementFile.h"
 #include "Util.h"
 
-/************************************************************************************/
+#include <escript/index.h>
+
+namespace dudley {
 
-void Dudley_ElementFile_createColoring(Dudley_ElementFile * in, dim_t numNodes, index_t * degreeOfFreedom)
+void ElementFile::createColoring(dim_t nNodes, const index_t* dofMap)
 {
-    dim_t e, i, numUncoloredElements, n, len, NN;
-    index_t *maskDOF, min_id, max_id;
-    bool independent;
+    if (numElements < 1)
+        return;
+
+    //const std::pair<index_t,index_t> idRange(util::getMinMaxInt(
+    //                                        1, dofMap.size(), &dofMap[0]));
+    const std::pair<index_t,index_t> idRange(util::getMinMaxInt(
+                                            1, nNodes, dofMap));
+
+    const int NN = numNodes;
+    const dim_t len = idRange.second - idRange.first + 1;
 
-    if (in == NULL)
-	return;
-    if (in->numElements < 1)
-	return;
-    NN = in->numNodes;
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++)
+        Color[e] = -1;
 
-    min_id = Dudley_Util_getMinInt(1, numNodes, degreeOfFreedom);
-    max_id = Dudley_Util_getMaxInt(1, numNodes, degreeOfFreedom);
-    len = max_id - min_id + 1;
-    maskDOF = new index_t[len];
-    if (!Dudley_checkPtr(maskDOF))
-    {
-#pragma omp parallel for private(e) schedule(static)
-	for (e = 0; e < in->numElements; e++)
-	    in->Color[e] = -1;
-	numUncoloredElements = in->numElements;
-	in->minColor = 0;
-	in->maxColor = in->minColor - 1;
-	while (numUncoloredElements > 0)
-	{
-	    /* initialize the mask marking nodes used by a color */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < len; n++)
-		maskDOF[n] = -1;
-	    numUncoloredElements = 0;
-	    /* OMP ? */
-	    for (e = 0; e < in->numElements; e++)
-	    {
-		if (in->Color[e] < 0)
-		{
-		    /* find out if element e is independent from the elements already colored: */
-		    independent = TRUE;
-		    for (i = 0; i < NN; i++)
-		    {
+    dim_t numUncoloredElements = numElements;
+    minColor = 0;
+    maxColor = -1;
+    index_t* maskDOF = new index_t[len];
+    while (numUncoloredElements > 0) {
+        // initialize the mask marking nodes used by a color
+#pragma omp parallel for
+        for (index_t n = 0; n < len; n++)
+            maskDOF[n] = -1;
+        numUncoloredElements = 0;
+
+        for (index_t e = 0; e < numElements; e++) {
+            if (Color[e] < 0) {
+                // find out if element e is independent from the elements
+                // already colored:
+                bool independent = true;
+                for (int i = 0; i < NN; i++) {
 #ifdef BOUNDS_CHECK
-			if (in->Nodes[INDEX2(i, e, NN)] < 0 || in->Nodes[INDEX2(i, e, NN)] >= numNodes)
-			{
-			    printf("BOUNDS_CHECK %s %d i=%d e=%d NN=%d min_id=%d in->Nodes[INDEX2...]=%d\n", __FILE__,
-				   __LINE__, i, e, NN, min_id, in->Nodes[INDEX2(i, e, NN)]);
-			    exit(1);
-			}
-			if ((degreeOfFreedom[in->Nodes[INDEX2(i, e, NN)]] - min_id) >= len
-			    || (degreeOfFreedom[in->Nodes[INDEX2(i, e, NN)]] - min_id) < 0)
-			{
-			    printf("BOUNDS_CHECK %s %d i=%d e=%d NN=%d min_id=%d dof=%d\n", __FILE__, __LINE__, i, e,
-				   NN, min_id, degreeOfFreedom[in->Nodes[INDEX2(i, e, NN)]] - min_id);
-			    exit(1);
-			}
+                    ESYS_ASSERT(Nodes[INDEX2(i, e, NN)] >= 0, "BOUNDS_CHECK");
+                    ESYS_ASSERT(Nodes[INDEX2(i, e, NN)] < nNodes, "BOUNDS_CHECK");
+                    ESYS_ASSERT(dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first < len, "BOUNDS_CHECK");
+                    ESYS_ASSERT(dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first >= 0, "BOUNDS_CHECK");
 #endif
-			if (maskDOF[degreeOfFreedom[in->Nodes[INDEX2(i, e, NN)]] - min_id] > 0)
-			{
-			    independent = FALSE;
-			    break;
-			}
-		    }
-		    /* if e is independent a new color is assigned and the nodes are marked as being used */
-		    if (independent)
-		    {
-			for (i = 0; i < NN; i++)
-			    maskDOF[degreeOfFreedom[in->Nodes[INDEX2(i, e, NN)]] - min_id] = 1;
-			in->Color[e] = in->maxColor + 1;
-		    }
-		    else
-		    {
-			numUncoloredElements++;
-		    }
-		}
-
-	    }
-	    in->maxColor++;
-	}			/* end of while loop */
-    }
-    /* all done : */
+                    if (maskDOF[dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first] > 0)
+                    {
+                        independent = false;
+                        break;
+                    }
+                }
+                // if e is independent a new color is assigned and the nodes
+                // are marked as being used
+                if (independent) {
+                    for (int i = 0; i < NN; i++)
+                        maskDOF[dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first] = 1;
+                    Color[e] = maxColor + 1;
+                } else {
+                    numUncoloredElements++;
+                }
+            }
+        } // for all elements
+        maxColor++;
+    } // end of while loop
     delete[] maskDOF;
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/ElementFile_distributeByRankOfDOF.cpp b/dudley/src/ElementFile_distributeByRankOfDOF.cpp
index 66af4d7..cb55f71 100644
--- a/dudley/src/ElementFile_distributeByRankOfDOF.cpp
+++ b/dudley/src/ElementFile_distributeByRankOfDOF.cpp
@@ -14,248 +14,169 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Dudley: ElementFile: this will redistribute the Elements including overlap by */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "ElementFile.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
-/************************************************************************************/
+#include <escript/index.h>
+
+namespace dudley {
 
-void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile * self, Esys_MPI_rank * mpiRankOfDOF, index_t * Id)
+void ElementFile::distributeByRankOfDOF(const int* mpiRankOfDOF,
+                                        const index_t* nodesId)
 {
-    size_t size_size;
-    Esys_MPI_rank myRank, p, *Owner_buffer = NULL, loc_proc_mask_max;
-    dim_t e, j, i, size, *send_count = NULL, *recv_count = NULL, *newOwner = NULL, *loc_proc_mask =
-	NULL, *loc_send_count = NULL, newNumElements, numElementsInBuffer, numNodes, NN;
-    index_t *send_offset = NULL, *recv_offset = NULL, *Id_buffer = NULL, *Tag_buffer = NULL, *Nodes_buffer = NULL, k;
-    bool *proc_mask = NULL;
+    const int size = MPIInfo->size;
+    if (size > 1) {
 #ifdef ESYS_MPI
-    dim_t numRequests = 0;
-    MPI_Request *mpi_requests = NULL;
-    MPI_Status *mpi_stati = NULL;
-#endif
-    if (self == NULL)
-	return;
-    myRank = self->MPIInfo->rank;
-    size = self->MPIInfo->size;
-    size_size = size * sizeof(dim_t);
-    numNodes = self->numNodes;
-    NN = self->numNodes;
-    if (size > 1)
-    {
-#ifdef ESYS_MPI
-	mpi_requests = new  MPI_Request[8 * size];
-	mpi_stati = new  MPI_Status[8 * size];
-	Dudley_checkPtr(mpi_requests);
-	Dudley_checkPtr(mpi_stati);
-#endif
+        const int myRank = MPIInfo->rank;
+        int numRequests = 0;
+        std::vector<MPI_Request> mpi_requests(8 * size);
+        std::vector<MPI_Status> mpi_stati(8 * size);
 
-	/* count the number elements that have to be send to each processor (send_count) 
-	   and define a new element owner as the processor with the largest number of DOFs and the smallest id */
-	send_count = new  dim_t[size];
-	recv_count = new  dim_t[size];
-	newOwner = new  Esys_MPI_rank[self->numElements];
-	if (!(Dudley_checkPtr(send_count) || Dudley_checkPtr(recv_count) || Dudley_checkPtr(newOwner)))
-	{
-	    memset(send_count, 0, size_size);
-#pragma omp parallel private(p,loc_proc_mask,loc_send_count)
-	    {
-		loc_proc_mask = new  dim_t[size];
-		loc_send_count = new  dim_t[size];
-		memset(loc_send_count, 0, size_size);
-#pragma omp for private(e,j,loc_proc_mask_max) schedule(static)
-		for (e = 0; e < self->numElements; e++)
-		{
-		    if (self->Owner[e] == myRank)
-		    {
-			newOwner[e] = myRank;
-			memset(loc_proc_mask, 0, size_size);
-			for (j = 0; j < numNodes; j++)
-			{
-			    p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
-			    loc_proc_mask[p]++;
-			}
-			loc_proc_mask_max = 0;
-			for (p = 0; p < size; ++p)
-			{
-			    if (loc_proc_mask[p] > 0)
-				loc_send_count[p]++;
-			    if (loc_proc_mask[p] > loc_proc_mask_max)
-			    {
-				newOwner[e] = p;
-				loc_proc_mask_max = loc_proc_mask[p];
-			    }
-			}
-		    }
-		    else
-		    {
-			newOwner[e] = -1;
-		    }
-		}
+        // count the number elements that have to be sent to each processor
+        // (send_count) and define a new element owner as the processor with
+        // the largest number of DOFs and the smallest id
+        std::vector<dim_t> send_count(size);
+        std::vector<dim_t> recv_count(size);
+        int* newOwner = new int[numElements];
+#pragma omp parallel
+        {
+            std::vector<dim_t> loc_proc_mask(size);
+            std::vector<dim_t> loc_send_count(size);
+#pragma omp for
+            for (index_t e = 0; e < numElements; e++) {
+                if (Owner[e] == myRank) {
+                    newOwner[e] = myRank;
+                    loc_proc_mask.assign(size, 0);
+                    for (int j = 0; j < numNodes; j++) {
+                        const int p = mpiRankOfDOF[Nodes[INDEX2(j, e, numNodes)]];
+                        loc_proc_mask[p]++;
+                    }
+                    dim_t loc_proc_mask_max = 0;
+                    for (int p = 0; p < size; ++p) {
+                        if (loc_proc_mask[p] > 0)
+                            loc_send_count[p]++;
+                        if (loc_proc_mask[p] > loc_proc_mask_max) {
+                            newOwner[e] = p;
+                            loc_proc_mask_max = loc_proc_mask[p];
+                        }
+                    }
+                } else {
+                    newOwner[e] = -1;
+                }
+            }
 #pragma omp critical
-		{
-		    for (p = 0; p < size; ++p)
-			send_count[p] += loc_send_count[p];
-		}
-		delete[] loc_proc_mask;
-		delete[] loc_send_count;
-	    }
-#ifdef ESYS_MPI
-	    MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, self->MPIInfo->comm);
-#else
-	    for (p = 0; p < size; ++p)
-		recv_count[p] = send_count[p];
-#endif
-	    /* get the new number of elements for this processor */
-	    newNumElements = 0;
-	    for (p = 0; p < size; ++p)
-		newNumElements += recv_count[p];
+            {
+                for (int p = 0; p < size; ++p)
+                    send_count[p] += loc_send_count[p];
+            }
+        } // end parallel section
+        MPI_Alltoall(&send_count[0], 1, MPI_DIM_T, &recv_count[0], 1,
+                     MPI_DIM_T, MPIInfo->comm);
+        // get the new number of elements for this processor
+        dim_t newNumElements = 0;
+        dim_t numElementsInBuffer = 0;
+        for (int p = 0; p < size; ++p) {
+            newNumElements += recv_count[p];
+            numElementsInBuffer += send_count[p];
+        }
 
-	    /* get the new number of elements for this processor */
-	    numElementsInBuffer = 0;
-	    for (p = 0; p < size; ++p)
-		numElementsInBuffer += send_count[p];
-	    /* allocate buffers */
-	    Id_buffer = new  index_t[numElementsInBuffer];
-	    Tag_buffer = new  index_t[numElementsInBuffer];
-	    Owner_buffer = new  Esys_MPI_rank[numElementsInBuffer];
-	    Nodes_buffer = new  index_t[numElementsInBuffer * NN];
-	    send_offset = new  index_t[size];
-	    recv_offset = new  index_t[size];
-	    proc_mask = new  bool[size];
-	    if (!(Dudley_checkPtr(Id_buffer) || Dudley_checkPtr(Tag_buffer) || Dudley_checkPtr(Owner_buffer) ||
-		  Dudley_checkPtr(Nodes_buffer) || Dudley_checkPtr(send_offset) || Dudley_checkPtr(recv_offset) ||
-		  Dudley_checkPtr(proc_mask)))
-	    {
+        std::vector<index_t> Id_buffer(numElementsInBuffer);
+        std::vector<int> Tag_buffer(numElementsInBuffer);
+        std::vector<int> Owner_buffer(numElementsInBuffer);
+        std::vector<index_t> Nodes_buffer(numElementsInBuffer * numNodes);
+        std::vector<index_t> send_offset(size);
+        std::vector<index_t> recv_offset(size);
+        std::vector<unsigned char> proc_mask(size);
 
-		/* calculate the offsets for the processor buffers */
-		recv_offset[0] = 0;
-		for (p = 0; p < size - 1; ++p)
-		    recv_offset[p + 1] = recv_offset[p] + recv_count[p];
-		send_offset[0] = 0;
-		for (p = 0; p < size - 1; ++p)
-		    send_offset[p + 1] = send_offset[p] + send_count[p];
+        // calculate the offsets for the processor buffers
+        for (int p = 0; p < size - 1; ++p) {
+            recv_offset[p + 1] = recv_offset[p] + recv_count[p];
+            send_offset[p + 1] = send_offset[p] + send_count[p];
+        }
 
-		memset(send_count, 0, size_size);
-		/* copy element into buffers. proc_mask makes sure that an 
-		 * element is copied once only for each processor */
-		for (e = 0; e < self->numElements; e++)
-		{
-		    if (self->Owner[e] == myRank)
-		    {
-			memset(proc_mask, TRUE, size*sizeof(bool));
-			for (j = 0; j < numNodes; j++)
-			{
-			    p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
-			    if (proc_mask[p])
-			    {
-				k = send_offset[p] + send_count[p];
-				Id_buffer[k] = self->Id[e];
-				Tag_buffer[k] = self->Tag[e];
-				Owner_buffer[k] = newOwner[e];
-				for (i = 0; i < numNodes; i++)
-				    Nodes_buffer[INDEX2(i, k, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
-				send_count[p]++;
-				proc_mask[p] = FALSE;
-			    }
-			}
-		    }
-		}
-		/* allocate new tables */
-		Dudley_ElementFile_allocTable(self, newNumElements);
+        send_count.assign(size, 0);
+        // copy element into buffers. proc_mask makes sure that an element is
+        // copied once only for each processor
+        for (index_t e = 0; e < numElements; e++) {
+            if (Owner[e] == myRank) {
+                proc_mask.assign(size, 1);
+                for (int j = 0; j < numNodes; j++) {
+                    const int p = mpiRankOfDOF[Nodes[INDEX2(j, e, numNodes)]];
+                    if (proc_mask[p]) {
+                        const index_t k = send_offset[p] + send_count[p];
+                        Id_buffer[k] = Id[e];
+                        Tag_buffer[k] = Tag[e];
+                        Owner_buffer[k] = newOwner[e];
+                        for (int i = 0; i < numNodes; i++)
+                            Nodes_buffer[INDEX2(i, k, numNodes)] =
+                                         nodesId[Nodes[INDEX2(i, e, numNodes)]];
+                        send_count[p]++;
+                        proc_mask[p] = 0;
+                    }
+                }
+            }
+        }
+        // allocate new tables
+        allocTable(newNumElements);
 
-		/* start to receive new elements */
-		for (p = 0; p < size; ++p)
-		{
-		    if (recv_count[p] > 0)
-		    {
-#ifdef ESYS_MPI
-			MPI_Irecv(&(self->Id[recv_offset[p]]), recv_count[p],
-				  MPI_INT, p, self->MPIInfo->msg_tag_counter + myRank,
-				  self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Irecv(&(self->Tag[recv_offset[p]]), recv_count[p],
-				  MPI_INT, p, self->MPIInfo->msg_tag_counter + size + myRank,
-				  self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Irecv(&(self->Owner[recv_offset[p]]), recv_count[p],
-				  MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + myRank,
-				  self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Irecv(&(self->Nodes[recv_offset[p] * NN]), recv_count[p] * NN,
-				  MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + myRank,
-				  self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-#endif
-		    }
-		}
-		/* now the buffers can be send away */
-		for (p = 0; p < size; ++p)
-		{
-		    if (send_count[p] > 0)
-		    {
-#ifdef ESYS_MPI
-			MPI_Issend(&(Id_buffer[send_offset[p]]), send_count[p],
-				   MPI_INT, p, self->MPIInfo->msg_tag_counter + p,
-				   self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Issend(&(Tag_buffer[send_offset[p]]), send_count[p],
-				   MPI_INT, p, self->MPIInfo->msg_tag_counter + size + p,
-				   self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Issend(&(Owner_buffer[send_offset[p]]), send_count[p],
-				   MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + p,
-				   self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-			MPI_Issend(&(Nodes_buffer[send_offset[p] * NN]), send_count[p] * NN,
-				   MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + p,
-				   self->MPIInfo->comm, &mpi_requests[numRequests]);
-			numRequests++;
-#endif
-
-		    }
-		}
-		ESYS_MPI_INC_COUNTER(*(self->MPIInfo), 4 * size);
-		/* wait for the requests to be finalized */
-#ifdef ESYS_MPI
-		MPI_Waitall(numRequests, mpi_requests, mpi_stati);
+        // start to receive new elements
+        for (int p = 0; p < size; ++p) {
+            if (recv_count[p] > 0) {
+                MPI_Irecv(&Id[recv_offset[p]], recv_count[p], MPI_DIM_T, p,
+                          MPIInfo->counter() + myRank, MPIInfo->comm,
+                          &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Irecv(&Tag[recv_offset[p]], recv_count[p], MPI_INT, p,
+                          MPIInfo->counter() + size + myRank, MPIInfo->comm,
+                          &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Irecv(&Owner[recv_offset[p]], recv_count[p], MPI_INT, p,
+                          MPIInfo->counter() + 2 * size + myRank,
+                          MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Irecv(&Nodes[recv_offset[p] * numNodes],
+                          recv_count[p] * numNodes, MPI_DIM_T, p,
+                          MPIInfo->counter() + 3 * size + myRank,
+                          MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+            }
+        }
+        // now the buffers can be sent away
+        for (int p = 0; p < size; ++p) {
+            if (send_count[p] > 0) {
+                MPI_Issend(&Id_buffer[send_offset[p]], send_count[p],
+                           MPI_DIM_T, p, MPIInfo->counter() + p,
+                           MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Issend(&Tag_buffer[send_offset[p]], send_count[p],
+                           MPI_INT, p, MPIInfo->counter() + size + p,
+                           MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Issend(&Owner_buffer[send_offset[p]], send_count[p],
+                           MPI_INT, p, MPIInfo->counter() + 2 * size + p,
+                           MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+                MPI_Issend(&Nodes_buffer[send_offset[p] * numNodes],
+                           send_count[p] * numNodes, MPI_DIM_T, p,
+                           MPIInfo->counter() + 3 * size + p,
+                           MPIInfo->comm, &mpi_requests[numRequests]);
+                numRequests++;
+            }
+        }
+        MPIInfo->incCounter(4 * size);
+        // wait for the requests to be finalized
+        MPI_Waitall(numRequests, &mpi_requests[0], &mpi_stati[0]);
+        delete[] newOwner;
 #endif
-	    }
-	    /* clear buffer */
-	    delete[] Id_buffer;
-	    delete[] Tag_buffer;
-	    delete[] Owner_buffer;
-	    delete[] Nodes_buffer;
-	    delete[] send_offset;
-	    delete[] recv_offset;
-	    delete[] proc_mask;
-	}
-#ifdef ESYS_MPI
-	delete[] mpi_requests;
-	delete[] mpi_stati;
-#endif
-	delete[] send_count;
-	delete[] recv_count;
-	delete[] newOwner;
-    }
-    else
-    {
-#pragma omp for private(e,i) schedule(static)
-	for (e = 0; e < self->numElements; e++)
-	{
-	    self->Owner[e] = myRank;
-	    for (i = 0; i < numNodes; i++)
-		self->Nodes[INDEX2(i, e, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
-	}
+    } else { // single rank
+#pragma omp parallel for
+        for (index_t e = 0; e < numElements; e++) {
+            Owner[e] = 0;
+            for (int i = 0; i < numNodes; i++)
+                Nodes[INDEX2(i, e, numNodes)] =
+                                     nodesId[Nodes[INDEX2(i, e, numNodes)]];
+        }
     }
-    return;
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/ElementFile_gather.cpp b/dudley/src/ElementFile_gather.cpp
deleted file mode 100644
index e6b5dcd..0000000
--- a/dudley/src/ElementFile_gather.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (SUCCESS)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Element File */
-
-/*   gathers the Element File out from the  Element File in using index[0:out->elements-1].  */
-/*   index has to be between 0 and in->elements-1. */
-/*   a conservative assumption on the coloring is made */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_gather(index_t * index, Dudley_ElementFile * in, Dudley_ElementFile * out)
-{
-    index_t k;
-    dim_t e, j;
-    dim_t NN_in = in->numNodes;
-    dim_t NN_out = out->numNodes;
-    if (in != NULL)
-    {
-	/*OMP */
-#pragma omp parallel for private(e,k,j) schedule(static)
-	for (e = 0; e < out->numElements; e++)
-	{
-	    k = index[e];
-	    out->Id[e] = in->Id[k];
-	    out->Tag[e] = in->Tag[k];
-	    out->Owner[e] = in->Owner[k];
-	    out->Color[e] = in->Color[k] + out->maxColor + 1;
-	    for (j = 0; j < MIN(NN_out, NN_in); j++)
-		out->Nodes[INDEX2(j, e, NN_out)] = in->Nodes[INDEX2(j, k, NN_in)];
-	}
-	out->minColor = MIN(out->minColor, in->minColor + out->maxColor + 1);
-	out->maxColor = MAX(out->maxColor, in->maxColor + out->maxColor + 1);
-    }
-}
diff --git a/dudley/src/ElementFile_jacobeans.cpp b/dudley/src/ElementFile_jacobeans.cpp
deleted file mode 100644
index 3f5c603..0000000
--- a/dudley/src/ElementFile_jacobeans.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-#include "Assemble.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "ShapeTable.h"
-
-/************************************************************************************/
-
-Dudley_ElementFile_Jacobeans *Dudley_ElementFile_Jacobeans_alloc(void)
-{
-    Dudley_ElementFile_Jacobeans *out = new Dudley_ElementFile_Jacobeans;
-    if (Dudley_checkPtr(out))
-    {
-	return NULL;
-    }
-    else
-    {
-	out->status = DUDLEY_INITIAL_STATUS - 1;
-	out->numDim = 0;
-	out->numQuad = 0;
-	out->numElements = 0;
-	out->absD = NULL;
-	out->quadweight = 0;
-	out->DSDX = NULL;
-	return out;
-    }
-}
-
-/************************************************************************************/
-
-void Dudley_ElementFile_Jacobeans_dealloc(Dudley_ElementFile_Jacobeans * in)
-{
-    if (in != NULL)
-    {
-	delete[] in->DSDX;
-	delete[] in->absD;
-	delete in;
-    }
-}
-
-/************************************************************************************/
-
-Dudley_ElementFile_Jacobeans *Dudley_ElementFile_borrowJacobeans(Dudley_ElementFile * self, Dudley_NodeFile * nodes,
-								 bool reducedIntegrationOrder)
-{
-    Dudley_ElementFile_Jacobeans *out = NULL;
-
-    dim_t numNodes = self->numNodes;
-
-    if (reducedIntegrationOrder)
-    {
-	out = self->jacobeans_reducedQ;
-    }
-    else
-    {
-	out = self->jacobeans;
-    }
-    if (out->status < nodes->status)
-    {
-	out->numDim = nodes->numDim;
-	out->numQuad = QuadNums[self->numDim][!reducedIntegrationOrder];
-	out->numShapes = self->numDim + 1;
-	out->numElements = self->numElements;
-	if (out->DSDX == NULL)
-	    out->DSDX = new  double[(out->numElements) * (out->numShapes) * (out->numDim) * (out->numQuad)];
-	if (out->absD == NULL)
-	    out->absD = new  double[out->numElements];
-	if (!(Dudley_checkPtr(out->DSDX) || Dudley_checkPtr(out->absD)))
-	{
-	  /*========================== dim = 1 ============================================== */
-	    if (out->numDim == 1)
-	    {
-		Dudley_setError(SYSTEM_ERROR, "Dudley does not support 1D domains.");
-	  /*========================== dim = 2 ============================================== */
-	    }
-	    else if (out->numDim == 2)
-	    {
-		if (self->numLocalDim == 0)
-		{
-		    Dudley_setError(SYSTEM_ERROR,
-				    "Dudley_ElementFile_borrowJacobeans: 2D does not support local dimension 0.");
-		}
-		else if (self->numLocalDim == 1)
-		{
-		    Dudley_Assemble_jacobeans_2D_M1D_E1D(nodes->Coordinates, out->numQuad, self->numElements, numNodes,
-						  self->Nodes, out->DSDX, out->absD, &(out->quadweight), self->Id);
-		}
-		else if (self->numLocalDim == 2)
-		{
-		    Dudley_Assemble_jacobeans_2D(nodes->Coordinates, out->numQuad, self->numElements, numNodes, self->Nodes,
-					  out->DSDX, out->absD, &(out->quadweight), self->Id);
-		}
-		else
-		{
-		    Dudley_setError(SYSTEM_ERROR,
-				    "Dudley_ElementFile_borrowJacobeans: local dimension in a 2D domain has to be 1 or 2.");
-		}
-	  /*========================== dim = 3 ============================================== */
-	    }
-	    else if (out->numDim == 3)
-	    {
-		if (self->numLocalDim == 0)
-		{
-		    Dudley_setError(SYSTEM_ERROR,
-				    "Dudley_ElementFile_borrowJacobeans: 3D does not support local dimension 0.");
-		}
-		else if (self->numLocalDim == 2)
-		{
-		    Dudley_Assemble_jacobeans_3D_M2D_E2D(nodes->Coordinates, out->numQuad, self->numElements, numNodes,
-						  self->Nodes, out->DSDX, out->absD, &(out->quadweight), self->Id);
-		}
-		else if (self->numLocalDim == 3)
-		{
-		    Dudley_Assemble_jacobeans_3D(nodes->Coordinates, out->numQuad, self->numElements, numNodes, self->Nodes,
-					  out->DSDX, out->absD, &(out->quadweight), self->Id);
-		}
-		else
-		{
-		    Dudley_setError(SYSTEM_ERROR,
-				    "Dudley_ElementFile_borrowJacobeans: local dimension in a 3D domain has to be 2 or 3.");
-		}
-	    }
-	    else
-	    {
-		Dudley_setError(SYSTEM_ERROR,
-				"Dudley_ElementFile_borrowJacobeans: spatial dimension has to be 1, 2 or 3.");
-	    }
-	}
-	if (Dudley_noError())
-	{
-	    out->status = nodes->status;
-	}
-	else
-	{
-	    out = NULL;
-	}
-
-    }
-
-    return out;
-}
diff --git a/dudley/src/ElementFile_jacobians.cpp b/dudley/src/ElementFile_jacobians.cpp
new file mode 100644
index 0000000..7d71439
--- /dev/null
+++ b/dudley/src/ElementFile_jacobians.cpp
@@ -0,0 +1,94 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "ElementFile.h"
+#include "Assemble.h"
+#include "ShapeTable.h"
+
+namespace dudley {
+
+ElementFile_Jacobians::ElementFile_Jacobians() :
+    status(DUDLEY_INITIAL_STATUS - 1),
+    numDim(0),
+    numQuad(0),
+    numElements(0),
+    absD(NULL),
+    quadweight(0),
+    DSDX(NULL)
+{
+}
+
+ElementFile_Jacobians::~ElementFile_Jacobians()
+{
+    delete[] DSDX;
+    delete[] absD;
+}
+
+ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodes,
+                                           bool reducedIntegrationOrder) const
+{
+    ElementFile_Jacobians* out =
+                (reducedIntegrationOrder ? jacobians_reducedQ : jacobians);
+
+    if (out->status < nodes->status) {
+        out->numDim = nodes->numDim;
+        out->numQuad = QuadNums[numDim][!reducedIntegrationOrder];
+        out->numShapes = numDim + 1;
+        out->numElements = numElements;
+        if (out->DSDX == NULL)
+            out->DSDX = new double[out->numElements * out->numShapes * out->numDim * out->numQuad];
+        if (out->absD == NULL)
+            out->absD = new double[out->numElements];
+
+        /*========================== dim = 2 ============================= */
+        if (out->numDim == 2) {
+            if (numLocalDim == 1) {
+                Assemble_jacobians_2D_M1D_E1D(nodes->Coordinates, out->numQuad,
+                        numElements, numNodes, Nodes, out->DSDX, out->absD,
+                        &out->quadweight, Id);
+            } else if (numLocalDim == 2) {
+                Assemble_jacobians_2D(nodes->Coordinates, out->numQuad,
+                        numElements, numNodes, Nodes, out->DSDX, out->absD,
+                        &out->quadweight, Id);
+            } else {
+                throw DudleyException("ElementFile::borrowJacobians: local "
+                                "dimension in a 2D domain has to be 1 or 2.");
+            }
+        /*========================== dim = 3 ============================= */
+        } else if (out->numDim == 3) {
+            if (numLocalDim == 2) {
+                Assemble_jacobians_3D_M2D_E2D(nodes->Coordinates, out->numQuad,
+                        numElements, numNodes, Nodes, out->DSDX, out->absD,
+                        &out->quadweight, Id);
+            } else if (numLocalDim == 3) {
+                Assemble_jacobians_3D(nodes->Coordinates, out->numQuad,
+                        numElements, numNodes, Nodes, out->DSDX, out->absD,
+                        &out->quadweight, Id);
+            } else {
+                throw DudleyException("ElementFile::borrowJacobians: local "
+                                "dimension in a 3D domain has to be 2 or 3.");
+            }
+        } else {
+            throw DudleyException("ElementFile::borrowJacobians: number of "
+                                  "spatial dimensions has to be 2 or 3.");
+        }
+        out->status = nodes->status;
+    }
+    return out;
+}
+
+} // namespace dudley
+
diff --git a/dudley/src/ElementFile_markNodes.cpp b/dudley/src/ElementFile_markNodes.cpp
deleted file mode 100644
index b8dd2d6..0000000
--- a/dudley/src/ElementFile_markNodes.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: ElementFile */
-
-/*   mark the used nodes with offset: */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_markNodes(index_t * mask, index_t offset, dim_t numNodes, Dudley_ElementFile * in,
-				  bool useLinear)
-{
-    dim_t i, NN, e;
-    if (in != NULL)
-    {
-	NN = in->numNodes;
-#pragma omp parallel for private(e,i) schedule(static)
-	for (e = 0; e < in->numElements; e++)
-	{
-	    for (i = 0; i < NN; i++)
-	    {
-		mask[in->Nodes[INDEX2(i, e, NN)] - offset] = 1;
-	    }
-	}
-    }
-}
-
-void Dudley_ElementFile_markDOFsConnectedToRange(index_t * mask, index_t offset, index_t marker, index_t firstDOF,
-						 index_t lastDOF, index_t * dofIndex, Dudley_ElementFile * in,
-						 bool useLinear)
-{
-    dim_t i, NN, e, j;
-    index_t color;
-    register index_t k;
-
-    if (in != NULL)
-    {
-	NN = in->numNodes;
-	for (color = in->minColor; color <= in->maxColor; color++)
-	{
-#pragma omp parallel for private(e,i,j,k) schedule(static)
-	    for (e = 0; e < in->numElements; e++)
-	    {
-		if (in->Color[e] == color)
-		{
-		    for (i = 0; i < NN; i++)
-		    {
-			k = dofIndex[in->Nodes[INDEX2(i, e, NN)]];
-			if ((firstDOF <= k) && (k < lastDOF))
-			{
-			    for (j = 0; j < NN; j++)
-				mask[dofIndex[in->Nodes[INDEX2(j, e, NN)]] - offset] = marker;
-			    break;
-			}
-		    }
-		}
-	    }
-	}
-    }
-}
diff --git a/dudley/src/ElementFile_optimizeOrdering.cpp b/dudley/src/ElementFile_optimizeOrdering.cpp
deleted file mode 100644
index 77070a6..0000000
--- a/dudley/src/ElementFile_optimizeOrdering.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-/*                                                                                                         */
-/*   Dudley: ElementFile                                                                                   */
-/*                                                                                                         */
-/*  reorders the elements in the element file such that the elements are stored close to the nodes         */
-/*                                                                                                         */
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Util.h"
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_optimizeOrdering(Dudley_ElementFile ** in)
-{
-    Dudley_Util_ValueAndIndex *item_list = NULL;
-    Dudley_ElementFile *out = NULL;
-    dim_t e, i, NN;
-    index_t *index = NULL;
-    if (*in != NULL)
-    {
-	if ((*in)->numElements < 1)
-	    return;
-	NN = (*in)-> /*referenceElementSet-> */ numNodes;
-	item_list = new  Dudley_Util_ValueAndIndex[(*in)->numElements];
-	index = new  index_t[(*in)->numElements];
-	if (!(Dudley_checkPtr(item_list) || Dudley_checkPtr(index)))
-	{
-
-	    out = Dudley_ElementFile_alloc((*in)->etype /*referenceElementSet */ , (*in)->MPIInfo);
-	    if (Dudley_noError())
-	    {
-		Dudley_ElementFile_allocTable(out, (*in)->numElements);
-		if (Dudley_noError())
-		{
-#pragma omp parallel for private(e,i) schedule(static)
-		    for (e = 0; e < (*in)->numElements; e++)
-		    {
-			item_list[e].index = e;
-			item_list[e].value = (*in)->Nodes[INDEX2(0, e, NN)];
-			for (i = 1; i < NN; i++)
-			    item_list[e].value = MIN(item_list[e].value, (*in)->Nodes[INDEX2(i, e, NN)]);
-		    }
-		    Dudley_Util_sortValueAndIndex((*in)->numElements, item_list);
-#pragma omp parallel for private(e) schedule(static)
-		    for (e = 0; e < (*in)->numElements; e++)
-			index[e] = item_list[e].index;
-		    Dudley_ElementFile_gather(index, *in, out);
-		    Dudley_ElementFile_free(*in);
-		    *in = out;
-		}
-		else
-		{
-		    Dudley_ElementFile_free(out);
-		}
-	    }
-	}
-	delete[] item_list;
-	delete[] index;
-    }
-}
diff --git a/dudley/src/ElementFile_relableNodes.cpp b/dudley/src/ElementFile_relableNodes.cpp
deleted file mode 100644
index 0e92ff0..0000000
--- a/dudley/src/ElementFile_relableNodes.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: ElementFile */
-
-/*                                                                      */
-/*   assigns new node reference numbers to elements in element file in. */
-/*   if k is the old node, the new node is newNode[k-offset].           */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_relableNodes(index_t * newNode, index_t offset, Dudley_ElementFile * in)
-{
-    dim_t i, j, NN;
-
-    if (in != NULL)
-    {
-	NN = in->numNodes;
-#pragma omp parallel for private(j,i) schedule(static)
-	for (j = 0; j < in->numElements; j++)
-	{
-	    for (i = 0; i < NN; i++)
-	    {
-		in->Nodes[INDEX2(i, j, NN)] = newNode[in->Nodes[INDEX2(i, j, NN)] - offset];
-	    }
-	}
-    }
-}
diff --git a/dudley/src/ElementFile_scatter.cpp b/dudley/src/ElementFile_scatter.cpp
deleted file mode 100644
index 072be8a..0000000
--- a/dudley/src/ElementFile_scatter.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************
-*                                                                                            
-*   Dudley: ElementFile                                                                      
-*                                                                                            
-*   scatter the ElementFile in into the  ElementFile out using index[0:out->numElements-1].  
-*   index has to be between 0 and in->numElements-1.                                         
-*   a conservative assumption on the coloring is made                                         
-*                                                                                            
-************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_scatter(index_t * index, Dudley_ElementFile * in, Dudley_ElementFile * out)
-{
-    index_t k;
-    dim_t e, j;
-    if (in != NULL)
-    {
-	dim_t NN_in = in->numNodes;
-	dim_t NN_out = out->numNodes;
-	/*OMP */
-#pragma omp parallel for private(e,k,j) schedule(static)
-	for (e = 0; e < in->numElements; e++)
-	{
-	    k = index[e];
-	    out->Owner[k] = in->Owner[e];
-	    out->Id[k] = in->Id[e];
-	    out->Tag[k] = in->Tag[e];
-	    out->Color[k] = in->Color[e] + out->maxColor + 1;
-	    for (j = 0; j < MIN(NN_out, NN_in); j++)
-		out->Nodes[INDEX2(j, k, NN_out)] = in->Nodes[INDEX2(j, e, NN_in)];
-	}
-	out->minColor = MIN(out->minColor, in->minColor + out->maxColor + 1);
-	out->maxColor = MAX(out->maxColor, in->maxColor + out->maxColor + 1);
-    }
-}
diff --git a/dudley/src/ElementFile_setCoordinates.cpp b/dudley/src/ElementFile_setCoordinates.cpp
deleted file mode 100644
index 3ad658e..0000000
--- a/dudley/src/ElementFile_setCoordinates.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: sets new coordinates for elements */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_setCoordinates(Dudley_ElementFile * self, escript::Data* newX)
-{
-    /* self->volume_is_valid=FALSE;   
-       self->DSDV_is_valid=FALSE;    
-       self->DSLinearDV_is_valid=FALSE; 
-       self->X_is_valid=FALSE;          */
-}
-
diff --git a/dudley/src/ElementFile_setNodeRange.cpp b/dudley/src/ElementFile_setNodeRange.cpp
deleted file mode 100644
index 11fa165..0000000
--- a/dudley/src/ElementFile_setNodeRange.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-/*                                                                                            */
-/*   Dudley: ElementFile                                                                      */
-/*                                                                                            */
-/*   returns the maximum and minimum node reference number of nodes describing the elements:; */
-/*                                                                                            */
-/*                                                                                            */
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-#include "Util.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_setNodeRange(index_t * min_id, index_t * max_id, Dudley_ElementFile * in)
-{
-    if (in != NULL)
-    {
-	*min_id = Dudley_Util_getMinInt(in->numNodes, in->numElements, in->Nodes);
-	*max_id = Dudley_Util_getMaxInt(in->numNodes, in->numElements, in->Nodes);
-    }
-    else
-    {
-	*min_id = INDEX_T_MAX;
-	*max_id = -INDEX_T_MAX;
-    }
-}
diff --git a/dudley/src/ElementFile_setTags.cpp b/dudley/src/ElementFile_setTags.cpp
deleted file mode 100644
index b32f7f2..0000000
--- a/dudley/src/ElementFile_setTags.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*	 Dudley: Mesh: ElementFile */
-
-/*	set tags to newTag where mask>0 */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "ElementFile.h"
-#include "Util.h"
-#include "Assemble.h"
-
-/************************************************************************************/
-
-void Dudley_ElementFile_setTags(Dudley_ElementFile * self, const int newTag, const escript::Data* mask)
-{
-    register dim_t n, q;
-    dim_t numElements, numQuad;
-    register __const double *mask_array;
-    register bool check;
-    Dudley_resetError();
-    if (self == NULL)
-	return;
-    numElements = self->numElements;
-
-    numQuad = Dudley_Assemble_reducedIntegrationOrder(mask) ? 1 : (self->numDim + 1);
-    if (1 != getDataPointSize(mask))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_ElementFile_setTags: number of components of mask is 1.");
-    }
-    else if (!numSamplesEqual(mask, numQuad, numElements))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_ElementFile_setTags: illegal number of samples of mask Data object");
-    }
-
-    /* now we can start */
-
-    if (Dudley_noError())
-    {
-	if (isExpanded(mask))
-	{
-#pragma omp parallel private(n,check,mask_array)
-	    {
-#pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		{
-		    mask_array = getSampleDataRO(mask, n);
-		    if (mask_array[0] > 0)
-			self->Tag[n] = newTag;
-		}
-	    }
-	}
-	else
-	{
-#pragma omp parallel private(q,n,check,mask_array)
-	    {
-#pragma omp for schedule(static)
-		for (n = 0; n < numElements; n++)
-		{
-		    mask_array = getSampleDataRO(mask, n);
-		    check = FALSE;
-		    for (q = 0; q < numQuad; q++)
-			check = check || mask_array[q];
-		    if (check)
-			self->Tag[n] = newTag;
-		}
-	    }
-	}
-	Dudley_ElementFile_setTagsInUse(self);
-    }
-}
diff --git a/dudley/src/ElementType.cpp b/dudley/src/ElementType.cpp
deleted file mode 100644
index eb89b0a..0000000
--- a/dudley/src/ElementType.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2010-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include <string.h>
-#include "ElementType.h"
-
-Dudley_ElementTypeId eltTypeFromString(const char *s)
-{
-    if (strcmp(s, "Point1") == 0)
-    {
-	return Dudley_Point1;
-    }
-    else if (strcmp(s, "Line2") == 0)
-    {
-	return Dudley_Line2;
-    }
-    else if (strcmp(s, "Tri3") == 0)
-    {
-	return Dudley_Tri3;
-    }
-    else if (strcmp(s, "Tet4") == 0)
-    {
-	return Dudley_Tet4;
-    }
-    else if (strcmp(s, "Line2Face") == 0)
-    {
-	return Dudley_Line2Face;
-    }
-    else if (strcmp(s, "Tri3Face") == 0)
-    {
-	return Dudley_Tri3Face;
-    }
-    else if (strcmp(s, "Tet4Face") == 0)
-    {
-	return Dudley_Tet4Face;
-    }
-    else
-	return Dudley_NoRef;
-}
diff --git a/dudley/src/ElementType.h b/dudley/src/ElementType.h
index 18bf316..c2748e2 100644
--- a/dudley/src/ElementType.h
+++ b/dudley/src/ElementType.h
@@ -14,8 +14,12 @@
 *
 *****************************************************************************/
 
-#ifndef DUDLEY_TYPEID_H
-#define DUDLEY_TYPEID_H
+#ifndef __DUDLEY_ELEMENTTYPE_H__
+#define __DUDLEY_ELEMENTTYPE_H__
+
+#include <string>
+
+namespace dudley {
 
 typedef enum {
     Dudley_Point1 = 0,
@@ -25,8 +29,30 @@ typedef enum {
     Dudley_Line2Face = 4,
     Dudley_Tri3Face = 5,
     Dudley_Tet4Face = 6,
-    Dudley_NoRef = 7			/* marks end of list */
-} Dudley_ElementTypeId;
+    Dudley_NoRef = 7      // marks end of list
+} ElementTypeId;
+
+inline ElementTypeId eltTypeFromString(const std::string& s)
+{
+    if (s == "Point1")
+        return Dudley_Point1;
+    else if (s == "Line2")
+        return Dudley_Line2;
+    else if (s == "Tri3")
+        return Dudley_Tri3;
+    else if (s == "Tet4")
+        return Dudley_Tet4;
+    else if (s == "Line2Face")
+        return Dudley_Line2Face;
+    else if (s == "Tri3Face")
+        return Dudley_Tri3Face;
+    else if (s == "Tet4Face")
+        return Dudley_Tet4Face;
+    else
+        return Dudley_NoRef;
+}
+
+}
+
+#endif // __DUDLEY_ELEMENTTYPE_H__
 
-Dudley_ElementTypeId eltTypeFromString(const char *s);
-#endif
diff --git a/dudley/src/IndexList.cpp b/dudley/src/IndexList.cpp
index 3f85c47..9385ba2 100644
--- a/dudley/src/IndexList.cpp
+++ b/dudley/src/IndexList.cpp
@@ -14,130 +14,78 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/****************************************************************************/
 
 /* Dudley: Converting an element list into a matrix shape     */
 
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+/****************************************************************************/
 
 #include "IndexList.h"
+#include "ElementFile.h"
 
-/* Translate from distributed/local array indices to global indices */
+#include <escript/index.h>
 
-/************************************************************************************/
-/* inserts the contributions from the element matrices of elements
-   into the row index col. If symmetric is set, only the upper
-   triangle of the matrix is stored. */
+namespace dudley {
 
-void Dudley_IndexList_insertElements(IndexList* index_list,
-                                     Dudley_ElementFile * elements,
-                                     bool reduce_row_order, index_t* row_map,
-                                     bool reduce_col_order, index_t* col_map)
+/* Translate from distributed/local array indices to global indices */
+
+/// inserts the contributions from the element matrices of elements
+/// into the row index col.
+void IndexList_insertElements(IndexList* index_list,
+                              const ElementFile* elements, const index_t* map)
 {
-    /* index_list is an array of linked lists. Each entry is a row (DOF) and contains the indices to the non-zero columns */
-    index_t color;
-    dim_t e, kr, kc, NN_row, NN_col, icol, irow, NN;
-    if (elements != NULL)
-    {
-    NN = elements->numNodes;
-    NN_col = (elements->numShapes);
-    NN_row = (elements->numShapes);
+    // index_list is an array of linked lists. Each entry is a row (DOF) and
+    // contains the indices to the non-zero columns
+    if (!elements)
+        return;
 
-    for (color = elements->minColor; color <= elements->maxColor; color++)
-    {
-#pragma omp for private(e,irow,kr,kc,icol) schedule(static)
-        for (e = 0; e < elements->numElements; e++)
-        {
-        if (elements->Color[e] == color)
-        {
-            for (kr = 0; kr < NN_row; kr++)
-            {
-            irow = row_map[elements->Nodes[INDEX2(kr, e, NN)]];
-            for (kc = 0; kc < NN_col; kc++)
-            {
-                icol = col_map[elements->Nodes[INDEX2(kc, e, NN)]];
-                index_list[irow].insertIndex(icol);
-            }
-            }
-        }
-        }
-    }
-    }
-}
+    const int NN = elements->numNodes;
+    // number of element nodes for both column and row
+    const int NN_rowcol = elements->numShapes;
 
-void Dudley_IndexList_insertElementsWithRowRange(IndexList* index_list,
-        index_t firstRow, index_t lastRow, Dudley_ElementFile* elements,
-        index_t* row_map, index_t* col_map)
-{
-/* this does not resolve macro elements */
-    index_t color;
-    dim_t e, kr, kc, icol, irow, NN;
-    if (elements != NULL)
-    {
-    NN = elements->numNodes;
-    for (color = elements->minColor; color <= elements->maxColor; color++)
-    {
-#pragma omp for private(e,irow,kr,kc,icol) schedule(static)
-        for (e = 0; e < elements->numElements; e++)
-        {
-        if (elements->Color[e] == color)
-        {
-            for (kr = 0; kr < NN; kr++)
-            {
-            irow = row_map[elements->Nodes[INDEX2(kr, e, NN)]];
-            if ((firstRow <= irow) && (irow < lastRow))
-            {
-                irow -= firstRow;
-                for (kc = 0; kc < NN; kc++)
-                {
-                icol = col_map[elements->Nodes[INDEX2(kc, e, NN)]];
-                index_list[irow].insertIndex(icol);
+    for (index_t color = elements->minColor; color <= elements->maxColor; color++) {
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            if (elements->Color[e] == color) {
+                for (int kr = 0; kr < NN_rowcol; kr++) {
+                    const index_t irow = map[elements->Nodes[INDEX2(kr, e, NN)]];
+                    for (int kc = 0; kc < NN_rowcol; kc++) {
+                        const index_t icol = map[elements->Nodes[INDEX2(kc, e, NN)]];
+                        index_list[irow].insertIndex(icol);
+                    }
                 }
             }
-            }
         }
-        }
-    }
     }
 }
 
-void Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(
-        IndexList* index_list, index_t firstRow, index_t lastRow,
-        Dudley_ElementFile* elements, index_t* row_map, index_t* col_map)
+void IndexList_insertElementsWithRowRangeNoMainDiagonal(IndexList* indexList,
+                              index_t firstRow, index_t lastRow,
+                              const ElementFile* elements, const index_t* map)
 {
-    /* this does not resolve macro elements */
-    index_t color;
-    dim_t e, kr, kc, icol, irow, NN, irow_loc;
-    if (elements != NULL)
-    {
-    NN = elements->numNodes;
-    for (color = elements->minColor; color <= elements->maxColor; color++)
-    {
-#pragma omp for private(e,irow,kr,kc,icol,irow_loc) schedule(static)
-        for (e = 0; e < elements->numElements; e++)
-        {
-        if (elements->Color[e] == color)
-        {
-            for (kr = 0; kr < NN; kr++)
-            {
-            irow = row_map[elements->Nodes[INDEX2(kr, e, NN)]];
-            if ((firstRow <= irow) && (irow < lastRow))
-            {
-                irow_loc = irow - firstRow;
-                for (kc = 0; kc < NN; kc++)
-                {
-                icol = col_map[elements->Nodes[INDEX2(kc, e, NN)]];
-                if (icol != irow)
-                    index_list[irow_loc].insertIndex(icol);
+    if (!elements)
+        return;
+
+    const int NN = elements->numNodes;
+    for (index_t color = elements->minColor; color <= elements->maxColor; color++) {
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            if (elements->Color[e] == color) {
+                for (int kr = 0; kr < NN; kr++) {
+                    const index_t irow = map[elements->Nodes[INDEX2(kr, e, NN)]];
+                    if (firstRow <= irow && irow < lastRow) {
+                        const index_t irow_loc = irow - firstRow;
+                        for (int kc = 0; kc < NN; kc++) {
+                            const index_t icol = map[elements->Nodes[INDEX2(kc, e, NN)]];
+                            if (icol != irow)
+                                indexList[irow_loc].insertIndex(icol);
+                        }
+                    }
                 }
             }
-            }
-        }
         }
     }
-    }
 }
 
+} // namespace dudley
+
diff --git a/dudley/src/IndexList.h b/dudley/src/IndexList.h
index 6677ed9..6a407a1 100644
--- a/dudley/src/IndexList.h
+++ b/dudley/src/IndexList.h
@@ -14,37 +14,35 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/****************************************************************************/
 
 /* Dudley: Converting an element list into a matrix shape     */
 
-/************************************************************************************/
+/****************************************************************************/
 
-#ifndef INC_DUDLEY_INDEXLIST
-#define INC_DUDLEY_INDEXLIST
+#ifndef __DUDLEY_INDEXLIST_H__
+#define __DUDLEY_INDEXLIST_H__
 
 #include "Dudley.h"
-#include "ElementFile.h"
-#include "Mesh.h"
 
-#include "esysUtils/IndexList.h"
+#include <escript/IndexList.h>
 
-using esysUtils::IndexList;
+namespace dudley {
 
-/* structure to build system matrix */
+using escript::IndexList;
 
-void Dudley_IndexList_insertElements(IndexList* index_list,
-                     Dudley_ElementFile * elements,
-                     bool reduce_row_order, index_t * row_map,
-                     bool reduce_col_order, index_t * col_map);
-void Dudley_IndexList_insertElementsWithRowRange(IndexList* index_list,
-                     index_t firstRow, index_t lastRow,
-                     Dudley_ElementFile* elements, index_t* row_map,
-                     index_t* col_map);
-void Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(
-                     IndexList* index_list, index_t firstRow,
-                     index_t lastRow, Dudley_ElementFile* elements,
-                     index_t* row_map, index_t* col_map);
+// helpers to build system matrix
 
-#endif /* #ifndef INC_DUDLEY_INDEXLIST */
+class ElementFile;
+
+void IndexList_insertElements(IndexList* indexlist, const ElementFile* elements,
+                              const index_t* map);
+
+void IndexList_insertElementsWithRowRangeNoMainDiagonal(IndexList* index_list,
+                            index_t firstRow, index_t lastRow,
+                            const ElementFile* elements, const index_t* map);
+
+} // namespace dudley
+
+#endif // __DUDLEY_INDEXLIST_H__
 
diff --git a/dudley/src/Mesh.cpp b/dudley/src/Mesh.cpp
deleted file mode 100644
index 4f22057..0000000
--- a/dudley/src/Mesh.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-/*   allocates a Mesh with name name for elements of type id using an integration order. If order is negative, */
-/*   the most appropriate order is selected independently. */
-
-Dudley_Mesh *Dudley_Mesh_alloc(char *name, dim_t numDim, esysUtils::JMPI& mpi_info)
-{
-    Dudley_Mesh *out;
-
-    /*  allocate the return value */
-
-    out = new Dudley_Mesh;
-    if (Dudley_checkPtr(out))
-	return NULL;
-    out->Name = NULL;
-    out->Nodes = NULL;
-    out->Elements = NULL;
-    out->FaceElements = NULL;
-    out->Points = NULL;
-    out->TagMap = NULL;
-    out->reference_counter = 0;
-    out->MPIInfo = mpi_info;
-
-    if (!Dudley_noError())
-    {
-	Dudley_Mesh_free(out);
-	return NULL;
-    }
-    /*   copy name: */
-
-    out->Name = new  char[strlen(name) + 1];
-    if (Dudley_checkPtr(out->Name))
-    {
-	Dudley_Mesh_free(out);
-	return NULL;
-    }
-    strcpy(out->Name, name);
-
-    /*   allocate node table: */
-    out->Nodes = Dudley_NodeFile_alloc(numDim, mpi_info);
-    if (!Dudley_noError())
-    {
-	Dudley_Mesh_free(out);
-	return NULL;
-    }
-    out->approximationOrder = -1;
-    out->reducedApproximationOrder = -1;
-    out->integrationOrder = -1;
-    out->reducedIntegrationOrder = -1;
-
-    out->Elements = NULL;
-    out->FaceElements = NULL;
-    out->Points = NULL;
-    out->reference_counter++;
-    return out;
-}
-
-/* returns a reference to Dudley_Mesh in */
-
-Dudley_Mesh *Dudley_Mesh_reference(Dudley_Mesh * in)
-{
-    if (in != NULL)
-	++(in->reference_counter);
-    return in;
-}
-
-/*   frees a mesh: */
-
-void Dudley_Mesh_free(Dudley_Mesh * in)
-{
-    if (in != NULL)
-    {
-	in->reference_counter--;
-	if (in->reference_counter < 1)
-	{
-	    delete[] in->Name;
-	    Dudley_NodeFile_free(in->Nodes);
-	    Dudley_ElementFile_free(in->FaceElements);
-	    Dudley_ElementFile_free(in->Elements);
-	    Dudley_ElementFile_free(in->Points);
-	    Dudley_TagMap_free(in->TagMap);
-	    delete in;
-	}
-    }
-}
-
-/************************************************************************************/
-
-/*  returns the spatial dimension of the mesh: */
-
-dim_t Dudley_Mesh_getDim(Dudley_Mesh * in)
-{
-    return in->Nodes->numDim;
-}
-
-void Dudley_Mesh_setElements(Dudley_Mesh * self, Dudley_ElementFile * elements)
-{
-    Dudley_ElementFile_free(self->Elements);
-    self->Elements = elements;
-}
-
-void Dudley_Mesh_setFaceElements(Dudley_Mesh * self, Dudley_ElementFile * elements)
-{
-    Dudley_ElementFile_free(self->FaceElements);
-    self->FaceElements = elements;
-}
-
-void Dudley_Mesh_setPoints(Dudley_Mesh * self, Dudley_ElementFile * elements)
-{
-    Dudley_ElementFile_free(self->Points);
-    self->Points = elements;
-}
-
-int Dudley_Mesh_getStatus(Dudley_Mesh * in)
-{
-    if (in == NULL)
-    {
-	return -1;
-    }
-    else if (in->Nodes == NULL)
-    {
-	return -1;
-    }
-    else
-    {
-	return in->Nodes->status;
-    }
-}
-
-void Dudley_Mesh_setOrders(Dudley_Mesh * in)
-{
-    in->approximationOrder = 1;	/* order of shapeFunctions is always 1 in Dudley */
-    in->reducedApproximationOrder = 1;
-    in->integrationOrder = 2;
-    in->reducedIntegrationOrder = 0;
-
-}
diff --git a/dudley/src/Mesh.h b/dudley/src/Mesh.h
deleted file mode 100644
index a52640f..0000000
--- a/dudley/src/Mesh.h
+++ /dev/null
@@ -1,166 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#ifndef INC_DUDLEY_MESH
-#define INC_DUDLEY_MESH
-
-/************************************************************************************/
-
-/* Dudley: Mesh */
-
-/* A mesh is built from nodes and elements which are describing the
-   domain, the surface and point sources. (the latter are needed to
-   establish links with other codes, in particular to particle
-   codes). The nodes are stored a Dudley_NodeFile and elements in a
-   Dudley_ElementFile. A Dudley_NodeFile and three Dudley_ElementFile
-   containing the elements describing the domain, surface and point
-   sources respectively. Notice that the surface elements do not
-   necessaryly cover the entire surface of the domain. */
-
-/* The element type is fixed by the reference element, see
-   ReferenceElement.h. The numbering of the nodes starts with 0. */
-
-/* Important: it is assumed that every node is appearing in at least
-   one element or surface element and that any node used in an
-   element, surface element or as a point is specified in the
-   Dudley_Node, see also Dudley_resolveNodeIds. */
-
-/* In some cases it is useful to refer to a mesh entirly built from
-   order 1 (=linear) elements. The linear version of the mesh can be
-   accessed by referning to the first few nodes of each element
-   (thanks to the way the nodes are ordered). As the numbering of
-   these nodes is not continuous a relabeling vectors are introduced
-   in the Dudley_NodeFile. This feature is not fully implemented
-   yet. */
-
-/* allnodes and elements are tagged. the tag allows to group nodes and
-   elements. A typical application is to mark surface elements on a
-   certain portion of the domain with the same tag. All these surface
-   elements can then assigned the same value eg. for the pressure. */
-
-/* Thespacial dimension is determined by the type of elements
-   used. The spacial dimension should be accessed by the function
-   Dudley_Mesh_getDim. Notice that the element type also determines
-   the type of surface elements to be used. */
-
-/************************************************************************************/
-
-#include "Dudley.h"
-#include "NodeFile.h"
-#include "ElementFile.h"
-#include "TagMap.h"
-#include "Util.h"
-#include "paso/SystemMatrixPattern.h"
-#include "escript/DataC.h"
-
-#ifdef ESYS_MPI
-#include "esysUtils/Esys_MPI.h"
-#endif
-
-/************************************************************************************/
-
-/*  this struct holds a mesh: */
-
-struct Dudley_Mesh {
-    char *Name;			/* the name of the mesh */
-    dim_t reference_counter;	/* counts the number of references to the mesh; */
-    dim_t approximationOrder;
-    dim_t reducedApproximationOrder;
-    dim_t integrationOrder;
-    dim_t reducedIntegrationOrder;
-    Dudley_NodeFile *Nodes;	/* the table of the nodes */
-    Dudley_ElementFile *Elements;	/* the table of the elements */
-    Dudley_ElementFile *FaceElements;	/* the table of the face elements */
-    Dudley_ElementFile *Points;	/* the table of points (treated as elements of dimension 0) */
-    Dudley_TagMap *TagMap;	/* the tag map mapping names to tag keys */
-
-    /* pointer to the sparse matrix pattern */
-
-    paso::SystemMatrixPattern_ptr FullFullPattern;
-    paso::SystemMatrixPattern_ptr FullReducedPattern;
-    paso::SystemMatrixPattern_ptr ReducedFullPattern;
-    paso::SystemMatrixPattern_ptr ReducedReducedPattern;
-    esysUtils::JMPI MPIInfo;
-};
-
-typedef struct Dudley_Mesh Dudley_Mesh;
-
-/* these structures are used for matching surfaces elements: */
-
-struct Dudley_Mesh_findMatchingFaces_center {
-    index_t refId;
-    double x[MAX_numDim];
-};
-typedef struct Dudley_Mesh_findMatchingFaces_center Dudley_Mesh_findMatchingFaces_center;
-
-/************************************************************************************/
-
-/*  interfaces: */
-Dudley_Mesh *Dudley_Mesh_alloc(char *name, dim_t numDim, esysUtils::JMPI& mpi_info);
-Dudley_Mesh *Dudley_Mesh_reference(Dudley_Mesh *);
-dim_t Dudley_Mesh_getDim(Dudley_Mesh *);
-void Dudley_Mesh_free(Dudley_Mesh *);
-
-void Dudley_Mesh_addTagMap(Dudley_Mesh * mesh_p, const char *name, index_t tag_key);
-index_t Dudley_Mesh_getTag(Dudley_Mesh * mesh_p, const char *name);
-bool Dudley_Mesh_isValidTagName(Dudley_Mesh * mesh_p, const char *name);
-void Dudley_Mesh_distributeByRankOfDOF(Dudley_Mesh * in, dim_t * distribution);
-paso::SystemMatrixPattern_ptr Dudley_getPattern(Dudley_Mesh * mesh, bool reduce_row_order, bool reduce_col_order);
-paso::SystemMatrixPattern_ptr Dudley_makePattern(Dudley_Mesh * mesh, bool reduce_row_order, bool reduce_col_order);
-void Dudley_Mesh_write(Dudley_Mesh *, char *);
-void Dudley_Mesh_dump(Dudley_Mesh * in, char *fname);
-void Dudley_PrintMesh_Info(Dudley_Mesh *, bool);
-Dudley_Mesh *Dudley_Mesh_load(char *fname);
-Dudley_Mesh *Dudley_Mesh_read(char *, index_t, index_t, bool);
-Dudley_Mesh *Dudley_Mesh_readGmsh(char *, index_t, index_t, index_t, bool, bool);
-void Dudley_Mesh_setOrders(Dudley_Mesh * in);
-
-void Dudley_Mesh_setCoordinates(Dudley_Mesh *, const escript::Data*);
-void Dudley_Mesh_setElements(Dudley_Mesh * self, Dudley_ElementFile * elements);
-void Dudley_Mesh_setFaceElements(Dudley_Mesh * self, Dudley_ElementFile * elements);
-void Dudley_Mesh_setPoints(Dudley_Mesh * self, Dudley_ElementFile * elements);
-
-void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution);
-void Dudley_Mesh_prepare(Dudley_Mesh * in, bool optimize);
-void Dudley_Mesh_createColoring(Dudley_Mesh * in, index_t * node_localDOF_map);
-void Dudley_Mesh_optimizeElementOrdering(Dudley_Mesh * in);
-void Dudley_Mesh_resolveNodeIds(Dudley_Mesh *);
-void Dudley_Mesh_createMappings(Dudley_Mesh * in, index_t * dof_distribution, index_t * node_distribution);
-void Dudley_Mesh_createNodeFileMappings(Dudley_Mesh * in, dim_t numReducedNodes, index_t * indexReducedNodes,
-					index_t * dof_first_component, index_t * nodes_first_component);
-void Dudley_Mesh_markDOFsConnectedToRange(index_t * mask, index_t offset, index_t marker, index_t firstDOF,
-					  index_t lastDOF, Dudley_Mesh * in, bool useLinear);
-
-void Dudley_Mesh_optimizeDOFLabeling(Dudley_Mesh *, dim_t *);
-
-Dudley_Mesh *Dudley_Mesh_merge(dim_t, Dudley_Mesh **);
-
-void Dudley_Mesh_relableElementNodes(int *, int, Dudley_Mesh *);
-void Dudley_Mesh_markNodes(int *, int, Dudley_Mesh *, bool);
-
-void Dudley_Mesh_glueFaces(Dudley_Mesh * self, double safety_factor, double tolerance, bool);
-void Dudley_Mesh_joinFaces(Dudley_Mesh * self, double safety_factor, double tolerance, bool);
-
-int Dudley_Mesh_findMatchingFaces_compar(const void *, const void *);
-void Dudley_Mesh_findMatchingFaces(Dudley_NodeFile *, Dudley_ElementFile *, double, double, int *, int *, int *, int *);
-void Dudley_Mesh_print(Dudley_Mesh * in);
-void Dudley_Mesh_optimizeNodeLabeling(Dudley_Mesh * mesh_p);
-dim_t Dudley_Mesh_FindMinDegreeNode(paso::SystemMatrixPattern_ptr pattern_p, index_t * available, index_t indicator);
-index_t Dudley_Mesh_getDegree(paso::SystemMatrixPattern_ptr pattern_p, index_t * label);
-void Dudley_Mesh_setTagsInUse(Dudley_Mesh * in);
-int Dudley_Mesh_getStatus(Dudley_Mesh * in);
-
-#endif				/* #ifndef INC_DUDLEY_MESH */
diff --git a/dudley/src/Mesh_createNodeFileMappings.cpp b/dudley/src/Mesh_createNodeFileMappings.cpp
deleted file mode 100644
index 3fd4fdc..0000000
--- a/dudley/src/Mesh_createNodeFileMappings.cpp
+++ /dev/null
@@ -1,523 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: NodeFile : creates the mappings using the indexReducedNodes */
-/*                 no distribution is happening                          */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-#define UNUSED -1
-
-/************************************************************************************/
-
-void Dudley_Mesh_createDOFMappingAndCoupling(Dudley_Mesh * in, bool use_reduced_elements)
-{
-    index_t min_DOF, max_DOF, *shared = NULL, *offsetInShared = NULL, *locDOFMask =
-	NULL, i, k, myFirstDOF, myLastDOF, *nodeMask = NULL, firstDOF, lastDOF, *globalDOFIndex, *wanted_DOFs = NULL;
-    dim_t mpiSize, len_loc_dof, numNeighbors, n, lastn, numNodes, *rcv_len = NULL, *snd_len = NULL, count;
-    Esys_MPI_rank myRank, p, p_min, p_max, *neighbor = NULL;
-    paso::SharedComponents_ptr rcv_shcomp, snd_shcomp;
-    Dudley_NodeMapping *this_mapping = NULL;
-    paso::Connector_ptr this_connector;
-    paso::Distribution_ptr dof_distribution;
-    esysUtils::JMPI& mpi_info = in->MPIInfo;
-#ifdef ESYS_MPI
-    MPI_Request *mpi_requests = NULL;
-    MPI_Status *mpi_stati = NULL;
-#else
-    int *mpi_requests = NULL, *mpi_stati = NULL;
-#endif
-
-    numNodes = in->Nodes->numNodes;
-    if (use_reduced_elements)
-    {
-	dof_distribution = in->Nodes->reducedDegreesOfFreedomDistribution;
-	globalDOFIndex = in->Nodes->globalReducedDOFIndex;
-    }
-    else
-    {
-	dof_distribution = in->Nodes->degreesOfFreedomDistribution;
-	globalDOFIndex = in->Nodes->globalDegreesOfFreedom;
-    }
-    myFirstDOF = dof_distribution->getFirstComponent();
-    myLastDOF = dof_distribution->getLastComponent();
-
-    mpiSize = mpi_info->size;
-    myRank = mpi_info->rank;
-
-    min_DOF = Dudley_Util_getFlaggedMinInt(1, numNodes, globalDOFIndex, -1);
-    max_DOF = Dudley_Util_getFlaggedMaxInt(1, numNodes, globalDOFIndex, -1);
-
-    if (max_DOF < min_DOF)
-    {
-	min_DOF = myFirstDOF;
-	max_DOF = myLastDOF - 1;
-    }
-
-    p_min = mpiSize;
-    p_max = -1;
-    if (max_DOF >= min_DOF)
-    {
-	for (p = 0; p < mpiSize; ++p)
-	{
-	    if (dof_distribution->first_component[p] <= min_DOF)
-		p_min = p;
-	    if (dof_distribution->first_component[p] <= max_DOF)
-		p_max = p;
-	}
-    }
-
-    len_loc_dof = max_DOF - min_DOF + 1;
-    if (!((min_DOF <= myFirstDOF) && (myLastDOF - 1 <= max_DOF)))
-    {
-	Dudley_setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
-	return;
-    }
-    rcv_len = new  dim_t[mpiSize];
-    snd_len = new  dim_t[mpiSize];
-#ifdef ESYS_MPI
-    mpi_requests = new  MPI_Request[mpiSize * 2];
-    mpi_stati = new  MPI_Status[mpiSize * 2];
-#else
-    mpi_requests = new  int[mpiSize * 2];
-    mpi_stati = new  int[mpiSize * 2];
-#endif
-    wanted_DOFs = new  index_t[numNodes];
-    nodeMask = new  index_t[numNodes];
-    neighbor = new  Esys_MPI_rank[mpiSize];
-    shared = new  index_t[numNodes * (p_max - p_min + 1)];
-    offsetInShared = new  index_t[mpiSize + 1];
-    locDOFMask = new  index_t[len_loc_dof];
-    if (!
-	(Dudley_checkPtr(neighbor) || Dudley_checkPtr(shared) || Dudley_checkPtr(offsetInShared)
-	 || Dudley_checkPtr(locDOFMask) || Dudley_checkPtr(nodeMask) || Dudley_checkPtr(rcv_len)
-	 || Dudley_checkPtr(snd_len) || Dudley_checkPtr(mpi_requests) || Dudley_checkPtr(mpi_stati)
-	 || Dudley_checkPtr(mpi_stati)))
-    {
-
-	memset(rcv_len, 0, sizeof(dim_t) * mpiSize);
-#pragma omp parallel
-	{
-#pragma omp for private(i) schedule(static)
-	    for (i = 0; i < len_loc_dof; ++i)
-		locDOFMask[i] = UNUSED;
-#pragma omp for private(i) schedule(static)
-	    for (i = 0; i < numNodes; ++i)
-		nodeMask[i] = UNUSED;
-#pragma omp for private(i,k) schedule(static)
-	    for (i = 0; i < numNodes; ++i)
-	    {
-		k = globalDOFIndex[i];
-		if (k > -1)
-		{
-		    locDOFMask[k - min_DOF] = UNUSED - 1;
-#ifdef BOUNDS_CHECK
-		    if ((k - min_DOF) >= len_loc_dof)
-		    {
-			printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
-			exit(1);
-		    }
-#endif
-		}
-	    }
-
-#pragma omp for private(i) schedule(static)
-	    for (i = myFirstDOF - min_DOF; i < myLastDOF - min_DOF; ++i)
-	    {
-		locDOFMask[i] = i - myFirstDOF + min_DOF;
-#ifdef BOUNDS_CHECK
-		if (i < 0 || i >= len_loc_dof)
-		{
-		    printf("BOUNDS_CHECK %s %d i=%d\n", __FILE__, __LINE__, i);
-		    exit(1);
-		}
-#endif
-	    }
-	}
-
-	numNeighbors = 0;
-	n = 0;
-	lastn = n;
-	for (p = p_min; p <= p_max; ++p)
-	{
-	    firstDOF = MAX(min_DOF, dof_distribution->first_component[p]);
-	    lastDOF = MIN(max_DOF + 1, dof_distribution->first_component[p + 1]);
-	    if (p != myRank)
-	    {
-		for (i = firstDOF - min_DOF; i < lastDOF - min_DOF; ++i)
-		{
-#ifdef BOUNDS_CHECK
-		    if (i < 0 || i >= len_loc_dof)
-		    {
-			printf("BOUNDS_CHECK %s %d p=%d i=%d\n", __FILE__, __LINE__, p, i);
-			exit(1);
-		    }
-#endif
-		    if (locDOFMask[i] == UNUSED - 1)
-		    {
-			locDOFMask[i] = myLastDOF - myFirstDOF + n;
-			wanted_DOFs[n] = i + min_DOF;
-			++n;
-		    }
-		}
-		if (n > lastn)
-		{
-		    rcv_len[p] = n - lastn;
-		    neighbor[numNeighbors] = p;
-#ifdef BOUNDS_CHECK
-		    if (numNeighbors < 0 || numNeighbors >= mpiSize + 1)
-		    {
-			printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors,
-			       n);
-			exit(1);
-		    }
-#endif
-		    offsetInShared[numNeighbors] = lastn;
-		    numNeighbors++;
-		    lastn = n;
-		}
-	    }
-	}
-#ifdef BOUNDS_CHECK
-	if (numNeighbors < 0 || numNeighbors >= mpiSize + 1)
-	{
-	    printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
-	    exit(1);
-	}
-#endif
-	offsetInShared[numNeighbors] = lastn;
-
-	/* assign new DOF labels to nodes */
-#pragma omp parallel for private(i,k) schedule(static)
-	for (i = 0; i < numNodes; ++i)
-	{
-	    k = globalDOFIndex[i];
-	    if (k > -1)
-		nodeMask[i] = locDOFMask[k - min_DOF];
-	}
-
-	/* now we can set the mapping from nodes to local DOFs */
-	this_mapping = Dudley_NodeMapping_alloc(numNodes, nodeMask, UNUSED);
-	/* define how to get DOF values for controlled bu other processors */
-#ifdef BOUNDS_CHECK
-	for (i = 0; i < offsetInShared[numNeighbors]; ++i)
-	{
-	    if (i < 0 || i >= numNodes * (p_max - p_min + 1))
-	    {
-		printf("BOUNDS_CHECK %s %d i=%d\n", __FILE__, __LINE__, i);
-		exit(1);
-	    }
-	}
-#endif
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < offsetInShared[numNeighbors]; ++i)
-	    shared[i] = myLastDOF - myFirstDOF + i;
-
-	rcv_shcomp.reset(new paso::SharedComponents(myLastDOF - myFirstDOF,
-                numNeighbors, neighbor, shared, offsetInShared, 1, 0,
-                mpi_info));
-
-	/*
-	 *    now we build the sender
-	 */
-#ifdef ESYS_MPI
-	MPI_Alltoall(rcv_len, 1, MPI_INT, snd_len, 1, MPI_INT, mpi_info->comm);
-#else
-	for (p = 0; p < mpiSize; ++p)
-	    snd_len[p] = rcv_len[p];
-#endif
-	count = 0;
-	for (p = 0; p < rcv_shcomp->numNeighbors; p++)
-	{
-#ifdef ESYS_MPI
-	    MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
-		      rcv_shcomp->offsetInShared[p + 1] - rcv_shcomp->offsetInShared[p], MPI_INT,
-		      rcv_shcomp->neighbor[p], mpi_info->msg_tag_counter + myRank, mpi_info->comm,
-		      &mpi_requests[count]);
-#endif
-	    count++;
-	}
-	n = 0;
-	numNeighbors = 0;
-	for (p = 0; p < mpiSize; p++)
-	{
-	    if (snd_len[p] > 0)
-	    {
-#ifdef ESYS_MPI
-		MPI_Irecv(&(shared[n]), snd_len[p],
-			  MPI_INT, p, mpi_info->msg_tag_counter + p, mpi_info->comm, &mpi_requests[count]);
-#endif
-		count++;
-		neighbor[numNeighbors] = p;
-		offsetInShared[numNeighbors] = n;
-		numNeighbors++;
-		n += snd_len[p];
-	    }
-	}
-	mpi_info->incCounter(mpi_info->size);
-	offsetInShared[numNeighbors] = n;
-#ifdef ESYS_MPI
-	MPI_Waitall(count, mpi_requests, mpi_stati);
-#endif
-	/* map global ids to local id's */
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < offsetInShared[numNeighbors]; ++i)
-	{
-	    shared[i] = locDOFMask[shared[i] - min_DOF];
-	}
-
-	snd_shcomp.reset(new paso::SharedComponents(myLastDOF - myFirstDOF,
-                numNeighbors, neighbor, shared, offsetInShared, 1, 0,
-                dof_distribution->mpi_info));
-
-	if (Dudley_noError())
-	    this_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
-	/* assign new DOF labels to nodes */
-    }
-    delete[] rcv_len;
-    delete[] snd_len;
-    delete[] mpi_requests;
-    delete[] mpi_stati;
-    delete[] wanted_DOFs;
-    delete[] nodeMask;
-    delete[] neighbor;
-    delete[] shared;
-    delete[] offsetInShared;
-    delete[] locDOFMask;
-    if (Dudley_noError())
-    {
-	if (use_reduced_elements)
-	{
-	    in->Nodes->reducedDegreesOfFreedomMapping = this_mapping;
-	    in->Nodes->reducedDegreesOfFreedomConnector = this_connector;
-	}
-	else
-	{
-	    in->Nodes->degreesOfFreedomMapping = this_mapping;
-	    in->Nodes->degreesOfFreedomConnector = this_connector;
-	}
-    }
-    else
-    {
-	Dudley_NodeMapping_free(this_mapping);
-
-    }
-}
-
-void Dudley_Mesh_createMappings(Dudley_Mesh * mesh, index_t * dof_distribution, index_t * node_distribution)
-{
-    int i;
-    index_t *maskReducedNodes = NULL, *indexReducedNodes = NULL;
-    dim_t numReducedNodes;
-
-    maskReducedNodes = new  index_t[mesh->Nodes->numNodes];
-    indexReducedNodes = new  index_t[mesh->Nodes->numNodes];
-
-    if (!(Dudley_checkPtr(maskReducedNodes) || Dudley_checkPtr(indexReducedNodes)))
-    {
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < mesh->Nodes->numNodes; ++i)
-	    maskReducedNodes[i] = -1;
-	Dudley_Mesh_markNodes(maskReducedNodes, 0, mesh, TRUE);
-
-	numReducedNodes = Dudley_Util_packMask(mesh->Nodes->numNodes, maskReducedNodes, indexReducedNodes);
-	if (Dudley_noError())
-	    Dudley_Mesh_createNodeFileMappings(mesh, numReducedNodes, indexReducedNodes, dof_distribution,
-					       node_distribution);
-    }
-
-    delete[] maskReducedNodes;
-    delete[] indexReducedNodes;
-}
-
-void Dudley_Mesh_createNodeFileMappings(Dudley_Mesh * in, dim_t numReducedNodes, index_t * indexReducedNodes,
-					index_t * dof_first_component, index_t * nodes_first_component)
-{
-
-    index_t myFirstDOF, myLastDOF, myFirstNode, myLastNode, *reduced_dof_first_component = NULL, *nodeMask = NULL,
-	*reduced_nodes_first_component = NULL, k, *maskMyReducedDOF = NULL, *indexMyReducedDOF =
-	NULL, *maskMyReducedNodes = NULL, *indexMyReducedNodes = NULL;
-    dim_t myNumDOF, myNumNodes, myNumReducedNodes, myNumReducedDOF, globalNumReducedNodes, globalNumReducedDOF, i,
-	mpiSize;
-    Esys_MPI_rank myRank;
-
-    mpiSize = in->Nodes->MPIInfo->size;
-    myRank = in->Nodes->MPIInfo->rank;
-
-    /* mark the nodes used by the reduced mesh */
-
-    reduced_dof_first_component = new  index_t[mpiSize + 1];
-    reduced_nodes_first_component = new  index_t[mpiSize + 1];
-
-    if (!(Dudley_checkPtr(reduced_dof_first_component) || Dudley_checkPtr(reduced_nodes_first_component)))
-    {
-
-	myFirstDOF = dof_first_component[myRank];
-	myLastDOF = dof_first_component[myRank + 1];
-	myNumDOF = myLastDOF - myFirstDOF;
-
-	myFirstNode = nodes_first_component[myRank];
-	myLastNode = nodes_first_component[myRank + 1];
-	myNumNodes = myLastNode - myFirstNode;
-
-	maskMyReducedDOF = new  index_t[myNumDOF];
-	indexMyReducedDOF = new  index_t[myNumDOF];
-	maskMyReducedNodes = new  index_t[myNumNodes];
-	indexMyReducedNodes = new  index_t[myNumNodes];
-
-	if (!
-	    (Dudley_checkPtr(maskMyReducedDOF) || Dudley_checkPtr(indexMyReducedDOF)
-	     || Dudley_checkPtr(maskMyReducedNodes) || Dudley_checkPtr(indexMyReducedNodes)))
-	{
-
-#pragma omp parallel private(i)
-	    {
-#pragma omp for schedule(static)
-		for (i = 0; i < myNumNodes; ++i)
-		    maskMyReducedNodes[i] = -1;
-#pragma omp for schedule(static)
-		for (i = 0; i < myNumDOF; ++i)
-		    maskMyReducedDOF[i] = -1;
-#pragma omp for private(k) schedule(static)
-		for (i = 0; i < numReducedNodes; ++i)
-		{
-		    k = in->Nodes->globalNodesIndex[indexReducedNodes[i]];
-		    if ((k >= myFirstNode) && (myLastNode > k))
-			maskMyReducedNodes[k - myFirstNode] = i;
-		    k = in->Nodes->globalDegreesOfFreedom[indexReducedNodes[i]];
-		    if ((k >= myFirstDOF) && (myLastDOF > k))
-		    {
-			maskMyReducedDOF[k - myFirstDOF] = i;
-		    }
-		}
-	    }
-	    myNumReducedNodes = Dudley_Util_packMask(myNumNodes, maskMyReducedNodes, indexMyReducedNodes);
-	    myNumReducedDOF = Dudley_Util_packMask(myNumDOF, maskMyReducedDOF, indexMyReducedDOF);
-
-#ifdef ESYS_MPI
-	    MPI_Allgather(&myNumReducedNodes, 1, MPI_INT, reduced_nodes_first_component, 1, MPI_INT,
-			  in->Nodes->MPIInfo->comm);
-	    MPI_Allgather(&myNumReducedDOF, 1, MPI_INT, reduced_dof_first_component, 1, MPI_INT,
-			  in->Nodes->MPIInfo->comm);
-#else
-	    reduced_nodes_first_component[0] = myNumReducedNodes;
-	    reduced_dof_first_component[0] = myNumReducedDOF;
-#endif
-	    globalNumReducedNodes = 0;
-	    globalNumReducedDOF = 0;
-	    for (i = 0; i < mpiSize; ++i)
-	    {
-		k = reduced_nodes_first_component[i];
-		reduced_nodes_first_component[i] = globalNumReducedNodes;
-		globalNumReducedNodes += k;
-
-		k = reduced_dof_first_component[i];
-		reduced_dof_first_component[i] = globalNumReducedDOF;
-		globalNumReducedDOF += k;
-	    }
-	    reduced_nodes_first_component[mpiSize] = globalNumReducedNodes;
-	    reduced_dof_first_component[mpiSize] = globalNumReducedDOF;
-	    /* ==== distribution of Nodes =============================== */
-	    in->Nodes->nodesDistribution.reset(new paso::Distribution(in->Nodes->MPIInfo, nodes_first_component, 1, 0));
-
-	    /* ==== distribution of DOFs =============================== */
-	    in->Nodes->degreesOfFreedomDistribution.reset(
-                    new paso::Distribution(in->Nodes->MPIInfo, dof_first_component, 1, 0));
-
-	    /* ==== distribution of reduced Nodes =============================== */
-	    in->Nodes->reducedNodesDistribution.reset(new paso::Distribution(
-                    in->Nodes->MPIInfo, reduced_nodes_first_component, 1, 0));
-
-	    /* ==== distribution of reduced DOF =============================== */
-	    in->Nodes->reducedDegreesOfFreedomDistribution.reset(
-                    new paso::Distribution(in->Nodes->MPIInfo, reduced_dof_first_component, 1, 0));
-	}
-	delete[] maskMyReducedDOF;
-	delete[] indexMyReducedDOF;
-	delete[] maskMyReducedNodes;
-	delete[] indexMyReducedNodes;
-    }
-    delete[] reduced_dof_first_component;
-    delete[] reduced_nodes_first_component;
-
-    nodeMask = new  index_t[in->Nodes->numNodes];
-    if (!Dudley_checkPtr(nodeMask) && Dudley_noError())
-    {
-
-	/* ==== nodes mapping which is a dummy structure ======== */
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < in->Nodes->numNodes; ++i)
-	    nodeMask[i] = i;
-	in->Nodes->nodesMapping = Dudley_NodeMapping_alloc(in->Nodes->numNodes, nodeMask, UNUSED);
-
-	/* ==== mapping between nodes and reduced nodes ========== */
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < in->Nodes->numNodes; ++i)
-	    nodeMask[i] = UNUSED;
-#pragma omp parallel for private(i) schedule(static)
-	for (i = 0; i < numReducedNodes; ++i)
-	    nodeMask[indexReducedNodes[i]] = i;
-	in->Nodes->reducedNodesMapping = Dudley_NodeMapping_alloc(in->Nodes->numNodes, nodeMask, UNUSED);
-    }
-    delete[] nodeMask;
-    /* ==== mapping between nodes and DOFs + DOF connector ========== */
-    if (Dudley_noError())
-        Dudley_Mesh_createDOFMappingAndCoupling(in, FALSE);
-    /* ==== mapping between nodes and reduced DOFs + reduced DOF connector ========== */
-    if (Dudley_noError())
-        Dudley_Mesh_createDOFMappingAndCoupling(in, TRUE);
-
-    /* get the Ids for DOFs and reduced nodes */
-    if (Dudley_noError())
-    {
-#pragma omp parallel private(i)
-	{
-#pragma omp for
-	    for (i = 0; i < in->Nodes->reducedNodesMapping->numTargets; ++i)
-		in->Nodes->reducedNodesId[i] = in->Nodes->Id[in->Nodes->reducedNodesMapping->map[i]];
-#pragma omp for
-	    for (i = 0; i < in->Nodes->degreesOfFreedomMapping->numTargets; ++i)
-		in->Nodes->degreesOfFreedomId[i] = in->Nodes->Id[in->Nodes->degreesOfFreedomMapping->map[i]];
-#pragma omp for
-	    for (i = 0; i < in->Nodes->reducedDegreesOfFreedomMapping->numTargets; ++i)
-		in->Nodes->reducedDegreesOfFreedomId[i] =
-		    in->Nodes->Id[in->Nodes->reducedDegreesOfFreedomMapping->map[i]];
-	}
-    }
-    else
-    {
-	Dudley_NodeMapping_free(in->Nodes->nodesMapping);
-	Dudley_NodeMapping_free(in->Nodes->reducedNodesMapping);
-	Dudley_NodeMapping_free(in->Nodes->degreesOfFreedomMapping);
-	Dudley_NodeMapping_free(in->Nodes->reducedDegreesOfFreedomMapping);
-    in->Nodes->nodesDistribution.reset();
-    in->Nodes->reducedNodesDistribution.reset();
-    in->Nodes->degreesOfFreedomDistribution.reset();
-    in->Nodes->reducedDegreesOfFreedomDistribution.reset();
-    in->Nodes->degreesOfFreedomConnector.reset();
-    in->Nodes->reducedDegreesOfFreedomConnector.reset();
-	in->Nodes->nodesMapping = NULL;
-	in->Nodes->reducedNodesMapping = NULL;
-	in->Nodes->degreesOfFreedomMapping = NULL;
-	in->Nodes->reducedDegreesOfFreedomMapping = NULL;
-    }
-}
diff --git a/dudley/src/Mesh_distributeByRankOfDOF.cpp b/dudley/src/Mesh_distributeByRankOfDOF.cpp
index 20a1d15..4c9f888 100644
--- a/dudley/src/Mesh_distributeByRankOfDOF.cpp
+++ b/dudley/src/Mesh_distributeByRankOfDOF.cpp
@@ -14,104 +14,73 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: Mesh: this will redistribute the Nodes and Elements including overlap */
-/*   according to the dof_distribution. It will create an element coloring but will not create any mappings. */
+namespace dudley {
 
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_distributeByRankOfDOF(Dudley_Mesh * self, index_t * dof_distribution)
+/// redistributes the Nodes and Elements including overlap
+/// according to the DOF distribution. It will create an element colouring
+/// but will not create any mappings.
+void DudleyDomain::distributeByRankOfDOF(const std::vector<index_t>& dofDistribution)
 {
-
-    index_t min_dof_id, max_dof_id, *tmp_node_localDOF_map = NULL, *tmp_node_localDOF_mask = NULL;
-    Esys_MPI_rank *mpiRankOfDOF = NULL;
-    register index_t k;
-    dim_t len, n, numDOFs;
-
-    if (self == NULL)
-	return;
-    mpiRankOfDOF = new  Esys_MPI_rank[self->Nodes->numNodes];
-    if (!Dudley_checkPtr(mpiRankOfDOF))
-    {
-
-	Dudley_NodeFile_assignMPIRankToDOFs(self->Nodes, mpiRankOfDOF, dof_distribution);
-
-	/* first the elements are redistributed according to mpiRankOfDOF */
-	/* at the input the Node tables refering to a the local labeling of the nodes */
-	/* while at the output they refer to the global labeling which is rectified in the next step */
-	if (Dudley_noError())
-	    Dudley_ElementFile_distributeByRankOfDOF(self->Elements, mpiRankOfDOF, self->Nodes->Id);
-	if (Dudley_noError())
-	    Dudley_ElementFile_distributeByRankOfDOF(self->FaceElements, mpiRankOfDOF, self->Nodes->Id);
-	if (Dudley_noError())
-	    Dudley_ElementFile_distributeByRankOfDOF(self->Points, mpiRankOfDOF, self->Nodes->Id);
-
-	/* resolve the node ids */
-	if (Dudley_noError())
-	    Dudley_Mesh_resolveNodeIds(self);
-
-	/* create a local labeling of the DOFs */
-	Dudley_NodeFile_setDOFRange(&min_dof_id, &max_dof_id, self->Nodes);
-	len = max_dof_id - min_dof_id + 1;
-	tmp_node_localDOF_mask = new  index_t[len];	/* local mask for used nodes */
-	tmp_node_localDOF_map = new  index_t[self->Nodes->numNodes];
-	if (!((Dudley_checkPtr(tmp_node_localDOF_mask) && Dudley_checkPtr(tmp_node_localDOF_map))))
-	{
-
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < len; n++)
-		tmp_node_localDOF_mask[n] = -1;
-
-#pragma omp parallel for private (n) schedule(static)
-	    for (n = 0; n < self->Nodes->numNodes; n++)
-		tmp_node_localDOF_map[n] = -1;
-
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < self->Nodes->numNodes; n++)
-	    {
+    int* mpiRankOfDOF = new int[m_nodes->getNumNodes()];
+    m_nodes->assignMPIRankToDOFs(mpiRankOfDOF, dofDistribution);
+
+    // first, the elements are redistributed according to mpiRankOfDOF
+    // at the input the Node tables refer to a the local labeling of the nodes
+    // while at the output they refer to the global labeling which is rectified
+    // in the next step
+    m_elements->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+    m_faceElements->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+    m_points->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+
+    // this will replace the node file!
+    resolveNodeIds();
+
+    // create a local labeling of the DOFs
+    const std::pair<index_t,index_t> dofRange(m_nodes->getDOFRange());
+    const dim_t len = dofRange.second - dofRange.first + 1;
+    // local mask for used nodes
+    index_t* localDOF_mask = new index_t[len];
+    index_t* localDOF_map = new index_t[m_nodes->getNumNodes()];
+
+#pragma omp parallel for
+    for (index_t n = 0; n < len; n++)
+        localDOF_mask[n] = -1;
+
+#pragma omp parallel for
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++)
+        localDOF_map[n] = -1;
+
+#pragma omp parallel for
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
 #ifdef BOUNDS_CHECK
-		if ((self->Nodes->globalDegreesOfFreedom[n] - min_dof_id) >= len
-		    || (self->Nodes->globalDegreesOfFreedom[n] - min_dof_id) < 0)
-		{
-		    printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
-		    exit(1);
-		}
+        ESYS_ASSERT(m_nodes->globalDegreesOfFreedom[n] - dofRange.first < len, "BOUNDS_CHECK");
+        ESYS_ASSERT(m_nodes->globalDegreesOfFreedom[n] - dofRange.first >= 0, "BOUNDS_CHECK");
 #endif
-		tmp_node_localDOF_mask[self->Nodes->globalDegreesOfFreedom[n] - min_dof_id] = n;
-	    }
-
-	    numDOFs = 0;
-	    for (n = 0; n < len; n++)
-	    {
-		k = tmp_node_localDOF_mask[n];
-		if (k >= 0)
-		{
-		    tmp_node_localDOF_mask[n] = numDOFs;
-		    numDOFs++;
-		}
-	    }
-#pragma omp parallel for private (n,k)
-	    for (n = 0; n < self->Nodes->numNodes; n++)
-	    {
-		k = tmp_node_localDOF_mask[self->Nodes->globalDegreesOfFreedom[n] - min_dof_id];
-		tmp_node_localDOF_map[n] = k;
-	    }
-	    /* create element coloring */
-	    if (Dudley_noError())
-		Dudley_Mesh_createColoring(self, tmp_node_localDOF_map);
+        localDOF_mask[m_nodes->globalDegreesOfFreedom[n] - dofRange.first] = n;
+    }
 
-	}
-	delete[] tmp_node_localDOF_mask;
-	delete[] tmp_node_localDOF_map;
+    dim_t numDOFs = 0;
+    for (index_t n = 0; n < len; n++) {
+        const index_t k = localDOF_mask[n];
+        if (k >= 0) {
+            localDOF_mask[n] = numDOFs;
+            numDOFs++;
+        }
     }
+#pragma omp parallel for
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+        localDOF_map[n] = localDOF_mask[
+                            m_nodes->globalDegreesOfFreedom[n] - dofRange.first];
+    }
+    // create element coloring
+    createColoring(localDOF_map);
+
+    delete[] localDOF_mask;
+    delete[] localDOF_map;
     delete[] mpiRankOfDOF;
-    return;
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_findMatchingFaces.cpp b/dudley/src/Mesh_findMatchingFaces.cpp
deleted file mode 100644
index ae160b3..0000000
--- a/dudley/src/Mesh_findMatchingFaces.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh */
-
-/* searches for faces in the mesh which are matching */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Util.h"
-#include "Mesh.h"
-
-#include "ShapeTable.h"
-
-/************************************************************************************/
-
-static double Dudley_Mesh_lockingGridSize = 0;
-
-int Dudley_Mesh_findMatchingFaces_compar(const void *arg1, const void *arg2)
-{
-    Dudley_Mesh_findMatchingFaces_center *e1, *e2;
-    bool l, g;
-    dim_t i;
-    e1 = (Dudley_Mesh_findMatchingFaces_center *) arg1;
-    e2 = (Dudley_Mesh_findMatchingFaces_center *) arg2;
-    for (i = 0; i < MAX_numDim; i++)
-    {
-	l = (e1->x[i] < e2->x[i] + Dudley_Mesh_lockingGridSize) ? TRUE : FALSE;
-	g = (e2->x[i] < e1->x[i] + Dudley_Mesh_lockingGridSize) ? TRUE : FALSE;
-	if (!(l && g))
-	{
-	    if (l)
-		return -1;
-	    if (g)
-		return 1;
-	}
-    }
-    if (e1->refId < e2->refId)
-    {
-	return -1;
-    }
-    else if (e1->refId > e2->refId)
-    {
-	return 1;
-    }
-    else
-    {
-	return 0;
-    }
-}
-
-void Dudley_Mesh_findMatchingFaces(Dudley_NodeFile * nodes, Dudley_ElementFile * faces, double safety_factor,
-				   double tolerance, dim_t * numPairs, index_t * elem0, index_t * elem1,
-				   index_t * matching_nodes_in_elem1)
-{
-#define getDist(_dist_,_e0_,_i0_,_e1_,_i1_) \
-      {dim_t i;   \
-      _dist_=0; \
-      for (i=0;i<numDim;i++) _dist_=MAX(_dist_,ABS(X[INDEX3(i,_i0_,_e0_,numDim,NN)]-X[INDEX3(i,_i1_,_e1_,numDim,NN)])); \
-      }
-    char error_msg[LenErrorMsg_MAX];
-    double h = DBLE(HUGE_VAL), h_local, dist, *X = NULL;
-    Dudley_Mesh_findMatchingFaces_center *center;
-    index_t e_0, e_1, *a1 = NULL, *a2 = NULL, *perm = NULL, *perm_tmp = NULL, *itmp_ptr = NULL;
-    const index_t *shiftNodes = NULL, *reverseNodes = NULL;
-    dim_t e, i, i0, i1, n, NN, numNodesOnFace;
-
-    dim_t numDim = nodes->numDim;
-
-    NN = faces->numNodes;
-
-    numNodesOnFace = numNodesOnFaceMap[faces->etype];
-    shiftNodes = shiftNodesMap[faces->etype];
-    reverseNodes = reverseNodesMap[faces->etype];
-
-    if (numNodesOnFace <= 0)
-    {
-	sprintf(error_msg,
-		"Dudley_Mesh_findMatchingFaces: matching faces cannot be applied to face elements of type %s",
-		getElementName(faces->etype));
-	Dudley_setError(TYPE_ERROR, error_msg);
-	return;
-    }
-    X = new  double[NN * numDim * faces->numElements];
-    center = new  Dudley_Mesh_findMatchingFaces_center[faces->numElements];
-    a1 = new  int[NN];
-    a2 = new  int[NN];
-    if (!(Dudley_checkPtr(X) || Dudley_checkPtr(center) || Dudley_checkPtr(a1) || Dudley_checkPtr(a2)))
-    {
-	/* OMP */
-	for (e = 0; e < faces->numElements; e++)
-	{
-	    /* get the coordinates of the nodes */
-	    Dudley_Util_Gather_double(NN, &(faces->Nodes[INDEX2(0, e, NN)]), numDim, nodes->Coordinates,
-				      &(X[INDEX3(0, 0, e, numDim, NN)]));
-	    /* get the element center */
-	    center[e].refId = e;
-	    for (i = 0; i < MAX_numDim; i++)
-		center[e].x[i] = 0;
-	    for (i0 = 0; i0 < numNodesOnFace; i0++)
-	    {
-		for (i = 0; i < numDim; i++)
-		    center[e].x[i] += X[INDEX3(i, i0, e, numDim, NN)];
-	    }
-	    for (i = 0; i < numDim; i++)
-		center[e].x[i] /= numNodesOnFace;
-	    /* get the minimum distance between nodes in the element */
-	    for (i0 = 0; i0 < numNodesOnFace; i0++)
-	    {
-		for (i1 = i0 + 1; i1 < numNodesOnFace; i1++)
-		{
-		    getDist(h_local, e, i0, e, i1);
-		    h = MIN(h, h_local);
-		}
-	    }
-	}
-	/* set the */
-	Dudley_Mesh_lockingGridSize = h * MAX(safety_factor, 0);
-#ifdef Dudley_TRACE
-	printf("locking grid size is %e\n", Dudley_Mesh_lockingGridSize);
-	printf("absolute tolerance is %e.\n", h * tolerance);
-#endif
-	/* sort the elements by center coordinates (lexicographical) */
-	qsort(center, faces->numElements, sizeof(Dudley_Mesh_findMatchingFaces_center),
-	      Dudley_Mesh_findMatchingFaces_compar);
-	/* find elements with matching center */
-	*numPairs = 0;
-	/* OMP */
-	for (e = 0; e < faces->numElements - 1 && Dudley_noError(); e++)
-	{
-	    dist = 0;
-	    for (i = 0; i < numDim; i++)
-		dist = MAX(dist, ABS(center[e].x[i] - center[e + 1].x[i]));
-	    if (dist < h * tolerance)
-	    {
-		e_0 = center[e].refId;
-		e_1 = center[e + 1].refId;
-		elem0[*numPairs] = e_0;
-		elem1[*numPairs] = e_1;
-		/* now the element e_1 is rotated such that the first node in element e_0 and e_1 have the same coordinates */
-		perm = a1;
-		perm_tmp = a2;
-		for (i = 0; i < NN; i++)
-		    perm[i] = i;
-		while (Dudley_noError())
-		{
-		    /* if node 0 and perm[0] are the same we are ready */
-		    getDist(dist, e_0, 0, e_1, perm[0]);
-		    if (dist <= h * tolerance)
-			break;
-		    if (shiftNodes[0] >= 0)
-		    {
-			/* rotate the nodes */
-			itmp_ptr = perm;
-			perm = perm_tmp;
-			perm_tmp = itmp_ptr;
-#pragma ivdep
-			for (i = 0; i < NN; i++)
-			    perm[i] = perm_tmp[shiftNodes[i]];
-		    }
-		    /* if the permutation is back at the identity, ie. perm[0]=0, the faces don't match: */
-		    if (perm[0] == 0)
-		    {
-			sprintf(error_msg,
-				"Mesh_findMatchingFaces:couldn't match first node of element %d to touching element %d",
-				e_0, e_1);
-			Dudley_setError(VALUE_ERROR, error_msg);
-		    }
-		}
-		/* now we check if the second nodes match */
-		if (Dudley_noError())
-		{
-		    if (numNodesOnFace > 1)
-		    {
-			getDist(dist, e_0, 1, e_1, perm[1]);
-			/* if the second node does not match we reverse the direction of the nodes */
-			if (dist > h * tolerance)
-			{
-			    /* rotate the nodes */
-			    if (reverseNodes[0] < 0)
-			    {
-				sprintf(error_msg,
-					"Mesh_findMatchingFaces:couldn't match the second node of element %d to touching element %d",
-					e_0, e_1);
-				Dudley_setError(VALUE_ERROR, error_msg);
-			    }
-			    else
-			    {
-				itmp_ptr = perm;
-				perm = perm_tmp;
-				perm_tmp = itmp_ptr;
-#pragma ivdep
-				for (i = 0; i < NN; i++)
-				    perm[i] = perm_tmp[reverseNodes[i]];
-				getDist(dist, e_0, 1, e_1, perm[1]);
-				if (dist > h * tolerance)
-				{
-				    sprintf(error_msg,
-					    "Mesh_findMatchingFaces:couldn't match the second node of element %d to touching element %d",
-					    e_0, e_1);
-				    Dudley_setError(VALUE_ERROR, error_msg);
-				}
-			    }
-			}
-		    }
-		}
-		/* we check if the rest of the face nodes match: */
-		if (Dudley_noError())
-		{
-		    for (i = 2; i < numNodesOnFace; i++)
-		    {
-			n = i;
-			getDist(dist, e_0, n, e_1, perm[n]);
-			if (dist > h * tolerance)
-			{
-			    sprintf(error_msg,
-				    "Mesh_findMatchingFaces:couldn't match the %d-th node of element %d to touching element %d",
-				    i, e_0, e_1);
-			    Dudley_setError(VALUE_ERROR, error_msg);
-			    break;
-			}
-		    }
-		}
-		/* copy over the permuted nodes of e_1 into matching_nodes_in_elem1 */
-		if (Dudley_noError())
-		{
-		    for (i = 0; i < NN; i++)
-			matching_nodes_in_elem1[INDEX2(i, *numPairs, NN)] = faces->Nodes[INDEX2(perm[i], e_1, NN)];
-		}
-		(*numPairs)++;
-	    }
-	}
-#ifdef Dudley_TRACE
-	printf("number of pairs of matching faces %d\n", *numPairs);
-#endif
-    }
-    /* clean up */
-    delete[] X;
-    delete[] center;
-    delete[] a1;
-    delete[] a1;
-
-#undef getDist
-}
diff --git a/dudley/src/Mesh_getPattern.cpp b/dudley/src/Mesh_getPattern.cpp
index 49531fe..afa4d35 100644
--- a/dudley/src/Mesh_getPattern.cpp
+++ b/dudley/src/Mesh_getPattern.cpp
@@ -14,156 +14,55 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/* Dudley: Mesh */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
+#include "DudleyDomain.h"
 #include "IndexList.h"
-#include <boost/scoped_array.hpp>
 
-/************************************************************************************/
+#include <boost/scoped_array.hpp>
 
-/* returns a reference to the matrix pattern                  */
+namespace dudley {
 
-paso::SystemMatrixPattern_ptr Dudley_getPattern(Dudley_Mesh* mesh, bool reduce_row_order, bool reduce_col_order)
+#ifdef ESYS_HAVE_PASO
+paso::SystemMatrixPattern_ptr DudleyDomain::getPasoPattern() const
 {
-    paso::SystemMatrixPattern_ptr out;
-    Dudley_resetError();
-    /* make sure that the requested pattern is available */
-    if (reduce_row_order)
-    {
-        if (reduce_col_order)
-        {
-            if (mesh->ReducedReducedPattern == NULL)
-                mesh->ReducedReducedPattern = Dudley_makePattern(mesh, reduce_row_order, reduce_col_order);
-        }
-        else
-        {
-            if (mesh->ReducedFullPattern == NULL)
-                mesh->ReducedFullPattern = Dudley_makePattern(mesh, reduce_row_order, reduce_col_order);
-        }
-    }
-    else
-    {
-        if (reduce_col_order)
-        {
-            if (mesh->FullReducedPattern == NULL)
-                mesh->FullReducedPattern = Dudley_makePattern(mesh, reduce_row_order, reduce_col_order);
-        }
-        else
-        {
-            if (mesh->FullFullPattern == NULL)
-                mesh->FullFullPattern = Dudley_makePattern(mesh, reduce_row_order, reduce_col_order);
-        }
-    }
-    if (Dudley_noError())
-    {
-        if (reduce_row_order)
-        {
-            if (reduce_col_order)
-            {
-                out = mesh->ReducedReducedPattern;
-            }
-            else
-            {
-                out = mesh->ReducedFullPattern;
-            }
-        }
-        else
-        {
-            if (reduce_col_order)
-            {
-                out = mesh->FullReducedPattern;
-            }
-            else
-            {
-                out = mesh->FullFullPattern;
-            }
-        }
-    }
-    return out;
+    // make sure that the pattern is available
+    if (!pasoPattern)
+        pasoPattern = makePasoPattern();
+
+    return pasoPattern;
 }
 
-paso::SystemMatrixPattern_ptr Dudley_makePattern(Dudley_Mesh* mesh, bool reduce_row_order, bool reduce_col_order)
+paso::SystemMatrixPattern_ptr DudleyDomain::makePasoPattern() const
 {
-    paso::SystemMatrixPattern_ptr out;
-    paso::Pattern_ptr main_pattern, col_couple_pattern, row_couple_pattern;
-    paso::Connector_ptr col_connector, row_connector;
-    Dudley_NodeMapping *colMap = NULL, *rowMap = NULL;
-    paso::Distribution_ptr colDistribution, rowDistribution;
-
-    Dudley_resetError();
-
-    if (reduce_col_order)
-    {
-        colMap = mesh->Nodes->reducedDegreesOfFreedomMapping;
-        colDistribution = mesh->Nodes->reducedDegreesOfFreedomDistribution;
-        col_connector = mesh->Nodes->reducedDegreesOfFreedomConnector;
-
-    }
-    else
-    {
-        colMap = mesh->Nodes->degreesOfFreedomMapping;
-        colDistribution = mesh->Nodes->degreesOfFreedomDistribution;
-        col_connector = mesh->Nodes->degreesOfFreedomConnector;
-    }
+    const dim_t myNumTargets = m_nodes->getNumDegreesOfFreedom();
+    const dim_t numTargets = m_nodes->getNumDegreesOfFreedomTargets();
+    const index_t* target = m_nodes->borrowTargetDegreesOfFreedom();
+    boost::scoped_array<IndexList> index_list(new IndexList[numTargets]);
 
-    if (reduce_row_order)
-    {
-        rowMap = mesh->Nodes->reducedDegreesOfFreedomMapping;
-        rowDistribution = mesh->Nodes->reducedDegreesOfFreedomDistribution;
-        row_connector = mesh->Nodes->reducedDegreesOfFreedomConnector;
-    }
-    else
+#pragma omp parallel
     {
-        rowMap = mesh->Nodes->degreesOfFreedomMapping;
-        rowDistribution = mesh->Nodes->degreesOfFreedomDistribution;
-        row_connector = mesh->Nodes->degreesOfFreedomConnector;
+        // insert contributions from element matrices into columns in indexlist
+        IndexList_insertElements(index_list.get(), m_elements, target);
+        IndexList_insertElements(index_list.get(), m_faceElements, target);
+        IndexList_insertElements(index_list.get(), m_points, target);
     }
 
-    boost::scoped_array<IndexList> index_list(new IndexList[rowMap->numTargets]);
-    {
-
-#pragma omp parallel
-        {
-            /*  insert contributions from element matrices into columns index index_list: */
-            Dudley_IndexList_insertElements(index_list.get(), mesh->Elements,
-                                            reduce_row_order, rowMap->target, reduce_col_order, colMap->target);
-            Dudley_IndexList_insertElements(index_list.get(), mesh->FaceElements,
-                                            reduce_row_order, rowMap->target, reduce_col_order, colMap->target);
-            Dudley_IndexList_insertElements(index_list.get(), mesh->Points,
-                                            reduce_row_order, rowMap->target, reduce_col_order, colMap->target);
-
-        }
-
-        /* create pattern */
-        main_pattern = paso::Pattern::fromIndexListArray(0,
-            rowDistribution->getMyNumComponents(), index_list.get(),
-            0, colDistribution->getMyNumComponents(), 0);
-        col_couple_pattern = paso::Pattern::fromIndexListArray(0,
-            rowDistribution->getMyNumComponents(), index_list.get(),
-            colDistribution->getMyNumComponents(), colMap->numTargets,
-            -colDistribution->getMyNumComponents());
-        row_couple_pattern = paso::Pattern::fromIndexListArray(
-            rowDistribution->getMyNumComponents(), rowMap->numTargets,
-            index_list.get(), 0, colDistribution->getMyNumComponents(), 0);
-
-        /* if everything is in order we can create the return value */
-        if (Dudley_noError())
-        {
-            out.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-                    rowDistribution, colDistribution, main_pattern,
-                    col_couple_pattern, row_couple_pattern, col_connector,
-                    row_connector));
-        }
-    }
-    esysUtils::Esys_MPIInfo_noError(mesh->MPIInfo);
+    // create pattern
+    paso::Pattern_ptr mainPattern(paso::Pattern::fromIndexListArray(0,
+              myNumTargets, index_list.get(), 0, myNumTargets, 0));
+    paso::Pattern_ptr colCouplePattern(paso::Pattern::fromIndexListArray(0,
+              myNumTargets, index_list.get(), myNumTargets, numTargets,
+              -myNumTargets));
+    paso::Pattern_ptr rowCouplePattern(paso::Pattern::fromIndexListArray(
+              myNumTargets, numTargets, index_list.get(), 0, myNumTargets, 0));
+
+    paso::Connector_ptr connector(m_nodes->degreesOfFreedomConnector);
+    paso::SystemMatrixPattern_ptr out(new paso::SystemMatrixPattern(
+                MATRIX_FORMAT_DEFAULT, m_nodes->dofDistribution,
+                m_nodes->dofDistribution, mainPattern, colCouplePattern,
+                rowCouplePattern, connector, connector));
     return out;
 }
+#endif // ESYS_HAVE_PASO
+
+} // namespace dudley
 
diff --git a/dudley/src/Mesh_markNodes.cpp b/dudley/src/Mesh_markNodes.cpp
deleted file mode 100644
index 2a8cea2..0000000
--- a/dudley/src/Mesh_markNodes.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh */
-
-/*   mark the used nodes with offset: */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_markNodes(index_t * mask, index_t offset, Dudley_Mesh * in, bool useLinear)
-{
-    Dudley_ElementFile_markNodes(mask, offset, in->Nodes->numNodes, in->Elements, useLinear);
-    Dudley_ElementFile_markNodes(mask, offset, in->Nodes->numNodes, in->FaceElements, useLinear);
-    Dudley_ElementFile_markNodes(mask, offset, in->Nodes->numNodes, in->Points, useLinear);
-}
-
-void Dudley_Mesh_markDOFsConnectedToRange(index_t * mask, index_t offset, index_t marker,
-					  index_t firstDOF, index_t lastDOF, Dudley_Mesh * in, bool useLinear)
-{
-    index_t *dofIndex;
-    if (useLinear)
-    {
-	dofIndex = in->Nodes->globalReducedDOFIndex;
-    }
-    else
-    {
-	dofIndex = in->Nodes->globalDegreesOfFreedom;
-    }
-    Dudley_ElementFile_markDOFsConnectedToRange(mask, offset, marker, firstDOF, lastDOF, dofIndex, in->Elements,
-						useLinear);
-    Dudley_ElementFile_markDOFsConnectedToRange(mask, offset, marker, firstDOF, lastDOF, dofIndex, in->FaceElements,
-						useLinear);
-    Dudley_ElementFile_markDOFsConnectedToRange(mask, offset, marker, firstDOF, lastDOF, dofIndex, in->Points,
-						useLinear);
-}
diff --git a/dudley/src/Mesh_optimizeDOFDistribution.cpp b/dudley/src/Mesh_optimizeDOFDistribution.cpp
index 7ac2f83..50b641c 100644
--- a/dudley/src/Mesh_optimizeDOFDistribution.cpp
+++ b/dudley/src/Mesh_optimizeDOFDistribution.cpp
@@ -14,24 +14,13 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Dudley: Mesh: optimizes the distribution of DOFs across processors */
-/*   using ParMETIS. On return a new distribution is given and the globalDOF are relabelled */
-/*   accordingly but the mesh has not been redistributed yet                             */
-
-/************************************************************************************/
+#include "DudleyDomain.h"
+#include "IndexList.h"
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <escript/index.h>
 
-#include "Mesh.h"
-#include "IndexList.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-#ifdef USE_PARMETIS
-#include "parmetis.h"
+#ifdef ESYS_HAVE_PARMETIS
+#include <parmetis.h>
 #ifndef REALTYPEWIDTH
 typedef float real_t;
 #endif
@@ -39,245 +28,217 @@ typedef float real_t;
 
 #include <boost/scoped_array.hpp>
 
-/************************************************************************************
-   Check whether there is any node which has no vertex. In case 
-   such node exists, we don't use parmetis since parmetis requires
-   that every node has at least 1 vertex (at line 129 of file
-   "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if 
-   any node has no vertex).
- ************************************************************************************/
-#ifdef USE_PARMETIS
-int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
+namespace dudley {
+
+#ifdef ESYS_HAVE_PARMETIS
+// Checks whether there is any rank which has no vertex. In case
+// such a rank exists, we don't use parmetis since parmetis requires
+// that every rank has at least 1 vertex (at line 129 of file
+// "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
+// any rank has no vertex).
+static bool allRanksHaveNodes(escript::JMPI mpiInfo,
+                              const IndexVector& distribution)
 {
-    dim_t i, len;
-    int ret_val = 1;
+    int ret = 1;
 
-    if (rank == 0)
-    {
-        for (i = 0; i < mpiSize; i++)
-        {
-            len = distribution[i + 1] - distribution[i];
-            if (len == 0)
-            {
-                ret_val = 0;
+    if (mpiInfo->rank == 0) {
+        for (int i = 0; i < mpiInfo->size; i++) {
+            if (distribution[i + 1] == distribution[i]) {
+                ret = 0;
                 break;
             }
         }
+        if (ret == 0)
+            std::cerr << "INFO: ParMetis is not used since at least one rank "
+                         "has no vertex." << std::endl;
     }
-    MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
-    if (ret_val == 0)
-        printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
-    return ret_val;
+    MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm);
+    return ret==1;
 }
 #endif
 
-/*****************************************************************************/
-
-void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh* in, dim_t* distribution)
+/// optimizes the distribution of DOFs across processors using ParMETIS.
+/// On return a new distribution is given and the globalDOF are relabeled
+/// accordingly but the mesh has not been redistributed yet
+void DudleyDomain::optimizeDOFDistribution(std::vector<index_t>& distribution)
 {
-    if (in == NULL || in->Nodes == NULL)
-        return;
-
-    dim_t i, k;
-    Esys_MPI_rank rank;
-    int c;
-
-    const Esys_MPI_rank myRank = in->MPIInfo->rank;
-    dim_t mpiSize = in->MPIInfo->size;
+    int mpiSize = m_mpiInfo->size;
+    const int myRank = m_mpiInfo->rank;
+    const index_t myFirstVertex = distribution[myRank];
+    const index_t myLastVertex = distribution[myRank + 1];
+    const dim_t myNumVertices = myLastVertex - myFirstVertex;
+    const dim_t numNodes = m_nodes->getNumNodes();
 
     // first step is to distribute the elements according to a global X of DOF
-
-    index_t myFirstVertex = distribution[myRank];
-    index_t myLastVertex = distribution[myRank + 1];
-    dim_t myNumVertices = myLastVertex - myFirstVertex;
-    dim_t globalNumVertices = distribution[mpiSize];
     dim_t len = 0;
-    for (dim_t p = 0; p < mpiSize; ++p)
-        len = MAX(len, distribution[p + 1] - distribution[p]);
+    for (int p = 0; p < mpiSize; ++p)
+        len = std::max(len, distribution[p + 1] - distribution[p]);
 
     index_t* partition = new index_t[len];
-    dim_t* partition_count = new dim_t[mpiSize + 1];
-    dim_t* new_distribution = new dim_t[mpiSize + 1];
-    index_t* newGlobalDOFID = new index_t[len];
-    bool* setNewDOFId = new bool[in->Nodes->numNodes];
-    dim_t* recvbuf = new dim_t[mpiSize * mpiSize];
 
-#ifdef USE_PARMETIS
-    dim_t dim = in->Nodes->numDim;
-    real_t* xyz = new real_t[myNumVertices * dim];
+#ifdef ESYS_HAVE_PARMETIS
+    if (mpiSize > 1 && allRanksHaveNodes(m_mpiInfo, distribution)) {
+        boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
+        int dim = m_nodes->numDim;
 
-    /* set the coordinates: */
-    /* it is assumed that at least one node on this processor provides a coordinate */
-#pragma omp parallel for private(i,k)
-    for (i = 0; i < in->Nodes->numNodes; ++i)
-    {
-        k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
-        if ((k >= 0) && (k < myNumVertices))
+        // create the adjacency structure xadj and adjncy
+#pragma omp parallel
         {
-            for (dim_t j = 0; j < dim; ++j)
-                xyz[k * dim + j] = (real_t)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
+            // insert contributions from element matrices into columns index
+            IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                    myFirstVertex, myLastVertex, m_elements,
+                    m_nodes->globalDegreesOfFreedom);
+            IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                    myFirstVertex, myLastVertex, m_faceElements,
+                    m_nodes->globalDegreesOfFreedom);
+            IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                    myFirstVertex, myLastVertex, m_points,
+                    m_nodes->globalDegreesOfFreedom);
         }
-    }
-#endif // USE_PARMETIS
 
-    boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
-    /* ksteube CSR of DOF IDs */
-    /* create the adjacency structure xadj and adjncy */
-    {
-#pragma omp parallel
-        {
-            /* ksteube build CSR format */
-            /*  insert contributions from element matrices into columns index index_list: */
-            Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-            myFirstVertex, myLastVertex, in->Elements,
-            in->Nodes->globalDegreesOfFreedom,
-            in->Nodes->globalDegreesOfFreedom);
-            Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-            myFirstVertex, myLastVertex, in->FaceElements,
-            in->Nodes->globalDegreesOfFreedom,
-            in->Nodes->globalDegreesOfFreedom);
-            Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-            myFirstVertex, myLastVertex, in->Points,
-            in->Nodes->globalDegreesOfFreedom,
-            in->Nodes->globalDegreesOfFreedom);
+        // set the coordinates
+        real_t* xyz = new real_t[myNumVertices * dim];
+#pragma omp parallel for
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i] - myFirstVertex;
+            if (k >= 0 && k < myNumVertices) {
+                for (int j = 0; j < dim; ++j)
+                    xyz[k * dim + j] = (real_t)(m_nodes->Coordinates[INDEX2(j, i, dim)]);
+            }
         }
 
-        /* create the local matrix pattern */
-        paso::Pattern_ptr pattern = paso::Pattern::fromIndexListArray(0,
-                myNumVertices, index_list.get(), 0, globalNumVertices, 0);
+        // create the local CSR matrix pattern
+        const dim_t globalNumVertices = distribution[mpiSize];
+        index_t* ptr = new index_t[myNumVertices + 1];
+#pragma omp parallel for
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            ptr[i] = index_list[i].count(0, globalNumVertices);
+        }
+        // accumulate ptr
+        dim_t s = 0;
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            const index_t itmp = ptr[i];
+            ptr[i] = s;
+            s += itmp;
+        }
+        ptr[myNumVertices] = s;
 
-        if (Dudley_noError())
-        {
-#ifdef USE_PARMETIS
+        // create index
+        index_t* index = new index_t[s];
+#pragma omp parallel for
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            index_list[i].toArray(&index[ptr[i]], 0, globalNumVertices, 0);
+        }
 
-            if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
-            {
-                int i;
-                int wgtflag = 0;
-                int numflag = 0;    /* pattern->ptr is C style: starting from 0 instead of 1 */
-                int ncon = 1;
-                int edgecut;
-                int options[3];
-                real_t *tpwgts = new real_t[ncon * mpiSize];
-                real_t *ubvec = new real_t[ncon];
-                for (i = 0; i < ncon * mpiSize; i++)
-                    tpwgts[i] = 1.0 / (real_t)mpiSize;
-                for (i = 0; i < ncon; i++)
-                    ubvec[i] = 1.05;
-                options[0] = 1;
-                options[1] = 15;
-                options[2] = 0;
-                ParMETIS_V3_PartGeomKway(distribution, pattern->ptr,
-                        pattern->index, NULL, NULL, &wgtflag, &numflag, &dim,
-                        xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut,
-                        partition, /* new CPU ownership of elements */
-                        &in->MPIInfo->comm);
-                //printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1));
-                delete[] xyz;
-                delete[] ubvec;
-                delete[] tpwgts;
-            }
-            else
-            {
-                for (i = 0; i < myNumVertices; ++i)
-                    partition[i] = 0;       /* CPU 0 owns it */
-            }
+        index_t wgtflag = 0;
+        index_t numflag = 0;
+        index_t ncon = 1;
+        index_t edgecut;
+        index_t impiSize = mpiSize;
+        index_t options[3] = { 1, 0, 0 };
+        std::vector<real_t> tpwgts(ncon * mpiSize, 1.f / mpiSize);
+        std::vector<real_t> ubvec(ncon, 1.05f);
+        ParMETIS_V3_PartGeomKway(&distribution[0], ptr, index, NULL, NULL,
+                                 &wgtflag, &numflag, &dim, xyz, &ncon,
+                                 &impiSize, &tpwgts[0], &ubvec[0], options,
+                                 &edgecut, partition, &m_mpiInfo->comm);
+        delete[] xyz;
+        delete[] index;
+        delete[] ptr;
+    } else {
+        for (index_t i = 0; i < myNumVertices; ++i)
+            partition[i] = 0; // CPU 0 owns all
+    }
 #else
-            for (i = 0; i < myNumVertices; ++i)
-                partition[i] = myRank;      /* CPU 0 owns it */
-#endif // USE_PARMETIS
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumVertices; ++i)
+        partition[i] = myRank;
+#endif // ESYS_HAVE_PARMETIS
 
-        }
-
-        // create a new distribution and labelling of the DOF
-        const size_t mpiSize_size = mpiSize * sizeof(dim_t);
-        memset(new_distribution, 0, mpiSize_size);
+    // create a new distribution and labeling of the DOF
+    IndexVector new_distribution(mpiSize + 1);
 #pragma omp parallel
-        {
-            dim_t* loc_partition_count = new dim_t[mpiSize];
-            memset(loc_partition_count, 0, mpiSize_size);
-#pragma omp for private(i)
-            for (i = 0; i < myNumVertices; ++i)
-                loc_partition_count[partition[i]]++;
+    {
+        IndexVector loc_partition_count(mpiSize);
+#pragma omp for
+        for (index_t i = 0; i < myNumVertices; ++i)
+            loc_partition_count[partition[i]]++;
 #pragma omp critical
-            {
-                for (i = 0; i < mpiSize; ++i)
-                    new_distribution[i] += loc_partition_count[i];
-            }
-            delete[] loc_partition_count;
+        {
+            for (int i = 0; i < mpiSize; ++i)
+                new_distribution[i] += loc_partition_count[i];
         }
+    }
+
+    IndexVector recvbuf(mpiSize * mpiSize);
 #ifdef ESYS_MPI
-        // recvbuf will be the concatenation of each CPU's contribution to
-        // new_distribution
-        MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
+    // recvbuf will be the concatenation of each CPU's contribution to
+    // new_distribution
+    MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, &recvbuf[0],
+                  mpiSize, MPI_DIM_T, m_mpiInfo->comm);
 #else
-        for (i = 0; i < mpiSize; ++i)
-            recvbuf[i] = new_distribution[i];
+    for (int i = 0; i < mpiSize; ++i)
+        recvbuf[i] = new_distribution[i];
 #endif
-        new_distribution[0] = 0;
-        for (rank = 0; rank < mpiSize; rank++)
-        {
-            c = 0;
-            for (i = 0; i < myRank; ++i)
-                c += recvbuf[rank + mpiSize * i];
-            for (i = 0; i < myNumVertices; ++i)
-            {
-                if (rank == partition[i])
-                {
-                    newGlobalDOFID[i] = new_distribution[rank] + c;
-                    c++;
-                }
+    new_distribution[0] = 0;
+    index_t* newGlobalDOFID = new index_t[len];
+    for (int rank = 0; rank < mpiSize; rank++) {
+        index_t c = 0;
+        for (int i = 0; i < myRank; ++i)
+            c += recvbuf[rank + mpiSize * i];
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            if (rank == partition[i]) {
+                newGlobalDOFID[i] = new_distribution[rank] + c;
+                c++;
             }
-            for (i = myRank + 1; i < mpiSize; ++i)
-                c += recvbuf[rank + mpiSize * i];
-            new_distribution[rank + 1] = new_distribution[rank] + c;
         }
-        delete[] recvbuf;
+        for (int i = myRank + 1; i < mpiSize; ++i)
+            c += recvbuf[rank + mpiSize * i];
+        new_distribution[rank + 1] = new_distribution[rank] + c;
+    }
 
-        // now the overlap needs to be created by sending the partition around
+    // now the overlap needs to be created by sending the partition around
 #ifdef ESYS_MPI
-        Esys_MPI_rank dest = esysUtils::mod_rank(mpiSize, myRank + 1);
-        Esys_MPI_rank source = esysUtils::mod_rank(mpiSize, myRank - 1);
+    int dest = m_mpiInfo->mod_rank(myRank + 1);
+    int source = m_mpiInfo->mod_rank(myRank - 1);
 #endif
-        Esys_MPI_rank current_rank = myRank;
-#pragma omp parallel for private(i)
-        for (i = 0; i < in->Nodes->numNodes; ++i)
-            setNewDOFId[i] = true;
-
-        for (dim_t p = 0; p < mpiSize; ++p)
-        {
-            index_t firstVertex = distribution[current_rank];
-            index_t lastVertex = distribution[current_rank + 1];
-#pragma omp parallel for private(i,k)
-            for (i = 0; i < in->Nodes->numNodes; ++i)
-            {
-                k = in->Nodes->globalDegreesOfFreedom[i];
-                if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
-                {
-                    in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
-                    setNewDOFId[i] = false;
-                }
+    int current_rank = myRank;
+    bool* setNewDOFId = new bool[numNodes];
+#pragma omp parallel for
+    for (index_t i = 0; i < numNodes; ++i)
+        setNewDOFId[i] = true;
+
+    for (int p = 0; p < mpiSize; ++p) {
+        const index_t firstVertex = distribution[current_rank];
+        const index_t lastVertex = distribution[current_rank + 1];
+#pragma omp parallel for
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i];
+            if (setNewDOFId[i] && firstVertex <= k && k < lastVertex) {
+                m_nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
+                setNewDOFId[i] = false;
             }
+        }
 
-            if (p < mpiSize - 1)
-            {               /* the final send can be skipped */
+        if (p < mpiSize - 1) { // the final send can be skipped
 #ifdef ESYS_MPI
-                MPI_Status status;
-                MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
-                                     dest, in->MPIInfo->msg_tag_counter,
-                                     source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
+            MPI_Status status;
+            MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_DIM_T,
+                                 dest, m_mpiInfo->counter(),
+                                 source, m_mpiInfo->counter(),
+                                 m_mpiInfo->comm, &status);
+            m_mpiInfo->incCounter();
 #endif
-                in->MPIInfo->msg_tag_counter++;
-                current_rank = esysUtils::mod_rank(mpiSize, current_rank - 1);
-            }
+            current_rank = m_mpiInfo->mod_rank(current_rank - 1);
         }
-        for (i = 0; i < mpiSize + 1; ++i)
-            distribution[i] = new_distribution[i];
     }
+    for (int i = 0; i < mpiSize + 1; ++i)
+        distribution[i] = new_distribution[i];
+
     delete[] newGlobalDOFID;
     delete[] setNewDOFId;
-    delete[] new_distribution;
-    delete[] partition_count;
     delete[] partition;
 }
 
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_optimizeDOFLabeling.cpp b/dudley/src/Mesh_optimizeDOFLabeling.cpp
index 863b0c9..b3909e9 100644
--- a/dudley/src/Mesh_optimizeDOFLabeling.cpp
+++ b/dudley/src/Mesh_optimizeDOFLabeling.cpp
@@ -14,121 +14,93 @@
 *
 *****************************************************************************/
 
-/**********************************************************************************************/
-
-/*   Dudley: Mesh: optimizes the labeling of the DOFs on each processor */
-
-/**********************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
+#include "DudleyDomain.h"
 #include "IndexList.h"
 
 #include <boost/scoped_array.hpp>
 
-/************************************************************************************/
+namespace dudley {
 
-void Dudley_Mesh_optimizeDOFLabeling(Dudley_Mesh * in, dim_t * distribution)
+/// optimizes the labeling of the DOFs on each processor
+void DudleyDomain::optimizeDOFLabeling(const IndexVector& distribution)
 {
-
-    index_t myFirstVertex, myLastVertex, *newGlobalDOFID = NULL, firstVertex, lastVertex;
-    register index_t k;
-    dim_t mpiSize, myNumVertices, len, p, i;
-    paso::Pattern_ptr pattern;
-    Esys_MPI_rank myRank, current_rank;
-#ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
-#endif
-
-    if (in == NULL)
-	return;
-    if (in->Nodes == NULL)
-	return;
-
-    myRank = in->MPIInfo->rank;
-    mpiSize = in->MPIInfo->size;
-    myFirstVertex = distribution[myRank];
-    myLastVertex = distribution[myRank + 1];
-    myNumVertices = myLastVertex - myFirstVertex;
-    len = 0;
-    for (p = 0; p < mpiSize; ++p)
-	len = MAX(len, distribution[p + 1] - distribution[p]);
+    // this method relies on Pattern::reduceBandwidth so requires PASO
+    // at the moment
+#ifdef ESYS_HAVE_PASO
+    const int myRank = m_mpiInfo->rank;
+    const int mpiSize = m_mpiInfo->size;
+    const index_t myFirstVertex = distribution[myRank];
+    const index_t myLastVertex = distribution[myRank+1];
+    const dim_t myNumVertices = myLastVertex-myFirstVertex;
+    dim_t len = 0;
+    for (int p=0; p<mpiSize; ++p)
+        len=std::max(len, distribution[p+1]-distribution[p]);
 
     boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
-    newGlobalDOFID = new  index_t[len];
-    /* create the adjacency structure xadj and adjncy */
+    boost::scoped_array<index_t> newGlobalDOFID(new index_t[len]);
+
+    // create the adjacency structure xadj and adjncy
+#pragma omp parallel
     {
-#pragma omp parallel private(i)
-	{
-	    /*  insert contributions from element matrices into columns index index_list: */
-	    Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, in->Elements,
-                in->Nodes->globalDegreesOfFreedom, in->Nodes->globalDegreesOfFreedom);
-	    Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, in->FaceElements,
-                in->Nodes->globalDegreesOfFreedom,
-                in->Nodes->globalDegreesOfFreedom);
-	    Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, in->Points,
-                in->Nodes->globalDegreesOfFreedom,
-                in->Nodes->globalDegreesOfFreedom);
-	}
-	/* create the local matrix pattern */
-	pattern = paso::Pattern::fromIndexListArray(0, myNumVertices, index_list.get(),
-            myFirstVertex, myLastVertex, -myFirstVertex);
+        // insert contributions from element matrices into columns index
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+            myFirstVertex, myLastVertex, m_elements,
+            m_nodes->globalDegreesOfFreedom);
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+            myFirstVertex, myLastVertex, m_faceElements,
+            m_nodes->globalDegreesOfFreedom);
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+            myFirstVertex, myLastVertex, m_points,
+            m_nodes->globalDegreesOfFreedom);
+    }
+    // create the local matrix pattern
+    paso::Pattern_ptr pattern = paso::Pattern::fromIndexListArray(0,
+            myNumVertices, index_list.get(), myFirstVertex, myLastVertex,
+            -myFirstVertex);
 
-	if (Dudley_noError())
-	    pattern->reduceBandwidth(newGlobalDOFID);
+    pattern->reduceBandwidth(&newGlobalDOFID[0]);
 
-    }
-    esysUtils::Esys_MPIInfo_noError(in->MPIInfo);
-    if (Dudley_noError())
-    {
-	/* shift new labeling to create a global id */
-#pragma omp parallel for private(i)
-	for (i = 0; i < myNumVertices; ++i)
-	    newGlobalDOFID[i] += myFirstVertex;
+    // shift new labeling to create a global id
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumVertices; ++i)
+        newGlobalDOFID[i] += myFirstVertex;
 
-	/* distribute new labeling to other processors */
+    // distribute new labeling to other processors
 #ifdef ESYS_MPI
-	dest = esysUtils::mod_rank(mpiSize, myRank + 1);
-	source = esysUtils::mod_rank(mpiSize, myRank - 1);
+    const int dest = m_mpiInfo->mod_rank(myRank + 1);
+    const int source = m_mpiInfo->mod_rank(myRank - 1);
 #endif
-	current_rank = myRank;
-	for (p = 0; p < mpiSize; ++p)
-	{
-	    firstVertex = distribution[current_rank];
-	    lastVertex = distribution[current_rank + 1];
-#pragma omp parallel for private(i,k)
-	    for (i = 0; i < in->Nodes->numNodes; ++i)
-	    {
-		k = in->Nodes->globalDegreesOfFreedom[i];
-		if ((firstVertex <= k) && (k < lastVertex))
-		{
-		    in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
-		}
-	    }
-
-	    if (p < mpiSize - 1)
-	    {			/* the final send can be skipped */
+    int current_rank = myRank;
+    for (int p = 0; p < mpiSize; ++p) {
+        const index_t firstVertex = distribution[current_rank];
+        const index_t lastVertex = distribution[current_rank + 1];
+#pragma omp parallel for
+        for (index_t i = 0; i < m_nodes->getNumNodes(); ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i];
+            if (firstVertex <= k && k < lastVertex) {
+                m_nodes->globalDegreesOfFreedom[i] =
+                                            newGlobalDOFID[k-firstVertex];
+            }
+        }
+
+        if (p < mpiSize - 1) { // the final send can be skipped
 #ifdef ESYS_MPI
-		MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
-				     dest, in->MPIInfo->msg_tag_counter,
-				     source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
+            MPI_Status status;
+            MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T, dest,
+                                 m_mpiInfo->counter(), source,
+                                 m_mpiInfo->counter(), m_mpiInfo->comm, &status);
+            m_mpiInfo->incCounter();
 #endif
-		in->MPIInfo->msg_tag_counter++;
-		current_rank = esysUtils::mod_rank(mpiSize, current_rank - 1);
-	    }
-	}
+            current_rank = m_mpiInfo->mod_rank(current_rank - 1);
+        }
     }
-    delete[] newGlobalDOFID;
 #if 0
-    for (i = 0; i < in->Nodes->numNodes; ++i)
-	printf("%d ", in->Nodes->globalDegreesOfFreedom[i]);
-    printf("\n");
+    for (index_t i = 0; i < m_nodes->getNumNodes(); ++i)
+        std::cout << m_nodes->globalDegreesOfFreedom[i] << " ";
+    std::cout << std::endl;
 #endif
-    return;
+#endif // ESYS_HAVE_PASO
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_prepare.cpp b/dudley/src/Mesh_prepare.cpp
deleted file mode 100644
index 16785f5..0000000
--- a/dudley/src/Mesh_prepare.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: prepares the mesh for further calculations  */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_prepare(Dudley_Mesh * in, bool optimize)
-{
-    dim_t newGlobalNumDOFs = 0, numReducedNodes = 0, i;
-    index_t *distribution = NULL, *maskReducedNodes = NULL, *indexReducedNodes = NULL, *node_distribution = NULL;
-    if (in == NULL)
-	return;
-    if (in->Nodes == NULL)
-	return;
-
-    Dudley_Mesh_setOrders(in);
-
-    /* first step is to distribute the elements according to a global distribution of DOF */
-
-    distribution = new  index_t[in->MPIInfo->size + 1];
-    node_distribution = new  index_t[in->MPIInfo->size + 1];
-    if (!(Dudley_checkPtr(distribution) || Dudley_checkPtr(node_distribution)))
-    {
-	/* first we create dense labeling for the DOFs */
-
-	newGlobalNumDOFs = Dudley_NodeFile_createDenseDOFLabeling(in->Nodes);
-
-	/* create a distribution of the global DOFs and determine
-	   the MPI_rank controlling the DOFs on this processor      */
-	in->MPIInfo->setDistribution(0, newGlobalNumDOFs - 1, distribution);
-
-	/* now the mesh is re-distributed according to the mpiRankOfDOF vector */
-	/* this will redistribute the Nodes and Elements including overlap and will create an element coloring 
-	   but will not create any mappings (see later in this function)                                   */
-	if (Dudley_noError())
-	    Dudley_Mesh_distributeByRankOfDOF(in, distribution);
-    }
-
-    /* at this stage we are able to start an optimization of the DOF distribution using ParaMetis */
-    /* on return distribution is altered and new DOF ids have been assigned */
-    if (Dudley_noError() && optimize && in->MPIInfo->size > 1)
-    {
-	Dudley_Mesh_optimizeDOFDistribution(in, distribution);
-	if (Dudley_noError())
-	    Dudley_Mesh_distributeByRankOfDOF(in, distribution);
-    }
-    /* the local labelling of the degrees of free is optimized */
-    if (Dudley_noError() && optimize)
-    {
-	Dudley_Mesh_optimizeDOFLabeling(in, distribution);
-    }
-    /* rearrange elements with the attempt to bring elements closer to memory locations of the nodes (distributed shared memory!): */
-    if (Dudley_noError())
-	Dudley_Mesh_optimizeElementOrdering(in);
-
-    /* create the global indices */
-    if (Dudley_noError())
-    {
-
-	maskReducedNodes = new  index_t[in->Nodes->numNodes];
-	indexReducedNodes = new  index_t[in->Nodes->numNodes];
-	if (!(Dudley_checkPtr(maskReducedNodes) || Dudley_checkPtr(indexReducedNodes)))
-	{
-
-/* useful DEBUG:
-{index_t MIN_id,MAX_id;
-printf("Mesh_prepare: global DOF : %d\n",newGlobalNumDOFs);
-Dudley_NodeFile_setGlobalIdRange(&MIN_id,&MAX_id,in->Nodes);
-printf("Mesh_prepare: global node id range = %d :%d\n", MIN_id,MAX_id);
-Dudley_NodeFile_setIdRange(&MIN_id,&MAX_id,in->Nodes);
-printf("Mesh_prepare: local node id range = %d :%d\n", MIN_id,MAX_id);
-}
-*/
-#pragma omp parallel for private(i) schedule(static)
-	    for (i = 0; i < in->Nodes->numNodes; ++i)
-		maskReducedNodes[i] = -1;
-
-	    Dudley_Mesh_markNodes(maskReducedNodes, 0, in, TRUE);
-
-	    numReducedNodes = Dudley_Util_packMask(in->Nodes->numNodes, maskReducedNodes, indexReducedNodes);
-
-	    Dudley_NodeFile_createDenseNodeLabeling(in->Nodes, node_distribution, distribution);
-	    Dudley_NodeFile_createDenseReducedDOFLabeling(in->Nodes, maskReducedNodes);
-	    Dudley_NodeFile_createDenseReducedNodeLabeling(in->Nodes, maskReducedNodes);
-	    /* create the missing mappings */
-
-	    if (Dudley_noError())
-		Dudley_Mesh_createNodeFileMappings(in, numReducedNodes, indexReducedNodes, distribution,
-						   node_distribution);
-	}
-
-	delete[] maskReducedNodes;
-	delete[] indexReducedNodes;
-    }
-
-    delete[] distribution;
-    delete[] node_distribution;
-
-    Dudley_Mesh_setTagsInUse(in);
-    return;
-}
-
-/*                                                      */
-/*  tries to reduce the coloring for all element files: */
-/*                                                      */
-void Dudley_Mesh_createColoring(Dudley_Mesh * in, index_t * node_localDOF_map)
-{
-    if (Dudley_noError())
-	Dudley_ElementFile_createColoring(in->Elements, in->Nodes->numNodes, node_localDOF_map);
-    if (Dudley_noError())
-	Dudley_ElementFile_createColoring(in->FaceElements, in->Nodes->numNodes, node_localDOF_map);
-    if (Dudley_noError())
-	Dudley_ElementFile_createColoring(in->Points, in->Nodes->numNodes, node_localDOF_map);
-}
-
-/*                                                                    */
-/*  redistribute elements to minimize communication during assemblage */
-/*                                                                    */
-void Dudley_Mesh_optimizeElementOrdering(Dudley_Mesh * in)
-{
-    if (Dudley_noError())
-	Dudley_ElementFile_optimizeOrdering(&(in->Elements));
-    if (Dudley_noError())
-	Dudley_ElementFile_optimizeOrdering(&(in->FaceElements));
-    if (Dudley_noError())
-	Dudley_ElementFile_optimizeOrdering(&(in->Points));
-}
-
-/*                                                                    */
-/*  redistribute elements to minimize communication during assemblage */
-void Dudley_Mesh_setTagsInUse(Dudley_Mesh * in)
-{
-    if (Dudley_noError())
-	Dudley_NodeFile_setTagsInUse(in->Nodes);
-    if (Dudley_noError())
-	Dudley_ElementFile_setTagsInUse(in->Elements);
-    if (Dudley_noError())
-	Dudley_ElementFile_setTagsInUse(in->FaceElements);
-    if (Dudley_noError())
-	Dudley_ElementFile_setTagsInUse(in->Points);
-}
diff --git a/dudley/src/Mesh_print.cpp b/dudley/src/Mesh_print.cpp
deleted file mode 100644
index 6ea4fd4..0000000
--- a/dudley/src/Mesh_print.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: prints Mesh */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-/*  prints the mesh to the standard output: */
-
-void Dudley_Mesh_print(Dudley_Mesh * in)
-{
-    dim_t NN, i, j, numDim, NN2;
-
-    /* write header */
-
-    printf("Mesh name: %s\n", in->Name);
-
-    /*  write nodes: */
-
-    if (in->Nodes != NULL)
-    {
-	numDim = in->Nodes->numDim;
-	printf("=== %1dD-Nodes:\nnumber of nodes=%d\n", numDim, in->Nodes->numNodes);
-	printf("Id,Tag,globalDegreesOfFreedom,degreesOfFreedom,reducedDegreesOfFeedom,node,reducedNode,Coordinates\n");
-	for (i = 0; i < in->Nodes->numNodes; i++)
-	{
-	    printf("%d,%d,%d,%d,%d,%d,%d ",
-		   in->Nodes->Id[i], in->Nodes->Tag[i], in->Nodes->globalDegreesOfFreedom[i],
-		   in->Nodes->degreesOfFreedomMapping->target[i],
-		   in->Nodes->reducedDegreesOfFreedomMapping->target[i],
-		   in->Nodes->nodesMapping->target[i], in->Nodes->reducedNodesMapping->target[i]);
-	    for (j = 0; j < numDim; j++)
-		printf(" %20.15e", in->Nodes->Coordinates[INDEX2(j, i, numDim)]);
-	    printf("\n");
-	}
-    }
-
-    /*  write elements: */
-
-    if (in->Elements != NULL)
-    {
-	printf("=== %s:\nnumber of elements=%d\ncolor range=[%d,%d]\n",
-	       in->Elements->ename, in->Elements->numElements, in->Elements->minColor, in->Elements->maxColor);
-	NN = in->Elements->numNodes;
-	NN2 = in->Elements->numNodes;
-	if (in->Elements->numElements > 0)
-	{
-	    printf("Id,Tag,Owner,Color,Nodes\n");
-	    for (i = 0; i < in->Elements->numElements; i++)
-	    {
-		printf("%d,%d,%d,%d,", in->Elements->Id[i], in->Elements->Tag[i], in->Elements->Owner[i],
-		       in->Elements->Color[i]);
-		for (j = 0; j < NN; j++)
-		    printf(" %d", in->Nodes->Id[in->Elements->Nodes[INDEX2(j, i, NN2)]]);
-		printf("\n");
-	    }
-	}
-    }
-
-    /*  write face elements: */
-
-    if (in->FaceElements != NULL)
-    {
-	printf("=== %s:\nnumber of elements=%d\ncolor range=[%d,%d]\n",
-	       in->FaceElements->ename, in->FaceElements->numElements, in->FaceElements->minColor,
-	       in->FaceElements->maxColor);
-	NN = in->FaceElements->numNodes;
-	NN2 = in->FaceElements->numNodes;
-	if (in->FaceElements->numElements > 0)
-	{
-	    printf("Id,Tag,Owner,Color,Nodes\n");
-	    for (i = 0; i < in->FaceElements->numElements; i++)
-	    {
-		printf("%d,%d,%d,%d,", in->FaceElements->Id[i], in->FaceElements->Tag[i], in->Elements->Owner[i],
-		       in->FaceElements->Color[i]);
-		for (j = 0; j < NN; j++)
-		    printf(" %d", in->Nodes->Id[in->FaceElements->Nodes[INDEX2(j, i, NN2)]]);
-		printf("\n");
-	    }
-	}
-    }
-
-    /*  write points: */
-    if (in->Points != NULL)
-    {
-	printf("=== %s:\nnumber of elements=%d\ncolor range=[%d,%d]\n",
-	       in->Points->ename, in->Points->numElements, in->Points->minColor, in->Points->maxColor);
-	NN = in->Points->numNodes;
-	NN2 = in->Points->numNodes;
-	if (in->Points->numElements > 0)
-	{
-	    printf("Id,Tag,Owner,Color,Nodes\n");
-	    for (i = 0; i < in->Points->numElements; i++)
-	    {
-		printf("%d,%d,%d,%d,", in->Points->Id[i], in->Points->Tag[i], in->Elements->Owner[i],
-		       in->Points->Color[i]);
-		for (j = 0; j < NN; j++)
-		    printf(" %d", in->Nodes->Id[in->Points->Nodes[INDEX2(j, i, NN2)]]);
-		printf("\n");
-	    }
-	}
-    }
-}
diff --git a/dudley/src/Mesh_read.cpp b/dudley/src/Mesh_read.cpp
index 66c1b3f..2c5d04b 100644
--- a/dudley/src/Mesh_read.cpp
+++ b/dudley/src/Mesh_read.cpp
@@ -14,631 +14,354 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: read mesh */
+#include <escript/index.h>
 
-/************************************************************************************/
+using escript::IOError;
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+namespace {
 
-#include <ctype.h>
-#include "Mesh.h"
+using namespace dudley;
 
-#define FSCANF_CHECK(scan_ret, reason) { if (scan_ret == EOF) { perror(reason); Dudley_setError(IO_ERROR,"scan error while reading dudley file"); return NULL;} }
-
-Dudley_Mesh *Dudley_Mesh_read(char *fname, index_t order, index_t reduced_order, bool optimize)
+ElementFile* readElementFile(std::ifstream& fileHandle, escript::JMPI mpiInfo)
 {
-    dim_t numNodes, numDim=0, numEle, i0, i1;
-    Dudley_Mesh *mesh_p = NULL;
-    char name[LenString_MAX], element_type[LenString_MAX], frm[20];
-    char error_msg[LenErrorMsg_MAX];
-    FILE *fileHandle_p = NULL;
-    Dudley_ElementTypeId typeID = Dudley_NoRef;
-    int scan_ret;
-
-    Dudley_resetError();
-    /* No! Bad! take a parameter for this */
-    esysUtils::JMPI mpi_info = esysUtils::makeInfo(MPI_COMM_WORLD);
-
-    if (mpi_info->rank == 0)
-    {
-	/* get file handle */
-	fileHandle_p = fopen(fname, "r");
-	if (fileHandle_p == NULL)
-	{
-	    sprintf(error_msg, "Dudley_Mesh_read: Opening file %s for reading failed.", fname);
-	    Dudley_setError(IO_ERROR, error_msg);
-	    return NULL;
-	}
-
-	/* read header */
-	sprintf(frm, "%%%d[^\n]", LenString_MAX - 1);
-	scan_ret = fscanf(fileHandle_p, frm, name);
-	FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")
-	    /* get the number of nodes */
-	    scan_ret = fscanf(fileHandle_p, "%1d%*s %d\n", &numDim, &numNodes);
-    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
+    dim_t numEle = 0;
+    ElementTypeId typeID = Dudley_NoRef;
+    std::string elementType, line;
+
+    // Read the element typeID and number of elements
+    if (mpiInfo->rank == 0) {
+        std::getline(fileHandle, line);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file - expected <ElementType> <numEle>");
+        size_t pos = line.find(' ');
+        if (pos == std::string::npos)
+            throw IOError("Mesh::read: Scan error reading file - expected <ElementType> <numEle>");
+        elementType = line.substr(0, pos);
+        numEle = std::stol(line.substr(pos+1));
+        typeID = eltTypeFromString(elementType);
+    }
 #ifdef ESYS_MPI
-    /* MPI Broadcast numDim, numNodes, name if there are multiple MPI procs */
-    if (mpi_info->size > 1)
-    {
-	int temp1[3];
-	if (mpi_info->rank == 0)
-	{
-	    temp1[0] = numDim;
-	    temp1[1] = numNodes;
-	    temp1[2] = strlen(name) + 1;
-	}
-	else
-	{
-	    temp1[0] = 0;
-	    temp1[1] = 0;
-	    temp1[2] = 1;
-	}
-	MPI_Bcast(temp1, 3, MPI_INT, 0, mpi_info->comm);
-	numDim = temp1[0];
-	numNodes = temp1[1];
-	MPI_Bcast(name, temp1[2], MPI_CHAR, 0, mpi_info->comm);
+    if (mpiInfo->size > 1) {
+        dim_t temp1[2];
+        temp1[0] = (dim_t)typeID;
+        temp1[1] = numEle;
+        int mpiError = MPI_Bcast(temp1, 2, MPI_DIM_T, 0, mpiInfo->comm);
+        if (mpiError != MPI_SUCCESS) {
+            throw DudleyException("Mesh::read: broadcast of element typeID failed");
+        }
+        typeID = static_cast<ElementTypeId>(temp1[0]);
+        numEle = temp1[1];
     }
 #endif
+    if (typeID == Dudley_NoRef) {
+        std::stringstream ss;
+        ss << "Mesh::read: Unidentified element type " << elementType;
+        throw IOError(ss.str());
+    }
 
-    /* allocate mesh */
-    mesh_p = Dudley_Mesh_alloc(name, numDim, mpi_info);
-
-    if (Dudley_noError())
-    {
-	/* Each CPU will get at most chunkSize nodes so the message has to be sufficiently large */
-	int chunkSize = numNodes / mpi_info->size + 1, totalNodes = 0, chunkNodes = 0, nextCPU = 1;
-	int *tempInts = new  index_t[chunkSize * 3 + 1];	/* Stores the integer message data */
-	double *tempCoords = new  double[chunkSize * numDim];	/* Stores the double message data */
-
-	/*
-	   Read chunkSize nodes, send it in a chunk to worker CPU which copies chunk into its local mesh_p
-	   It doesn't matter that a CPU has the wrong nodes for its elements, this is sorted out later
-	   First chunk sent to CPU 1, second to CPU 2, ...
-	   Last chunk stays on CPU 0 (the master)
-	   The three columns of integers (Id, gDOF, Tag) are gathered into a single array tempInts and sent 
-	   together in a single MPI message
-	 */
-
-	if (mpi_info->rank == 0)	/* Master */
-	{
-	    for (;;)		/* Infinite loop */
-	    {
-#pragma omp parallel for private (i0) schedule(static)
-		for (i0 = 0; i0 < chunkSize * 3 + 1; i0++)
-		    tempInts[i0] = -1;
-
-#pragma omp parallel for private (i0) schedule(static)
-		for (i0 = 0; i0 < chunkSize * numDim; i0++)
-		    tempCoords[i0] = -1.0;
-
-		chunkNodes = 0;
-		for (i1 = 0; i1 < chunkSize; i1++)
-		{
-		    if (totalNodes >= numNodes)
-			break;	/* End of inner loop */
-		    if (1 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %d %d %le\n",
-					  &tempInts[0 + i1], &tempInts[chunkSize + i1], &tempInts[chunkSize * 2 + i1],
-					  &tempCoords[i1 * numDim + 0]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    if (2 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %d %d %le %le\n",
-					  &tempInts[0 + i1], &tempInts[chunkSize + i1], &tempInts[chunkSize * 2 + i1],
-					  &tempCoords[i1 * numDim + 0], &tempCoords[i1 * numDim + 1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    if (3 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %d %d %le %le %le\n",
-					  &tempInts[0 + i1], &tempInts[chunkSize + i1], &tempInts[chunkSize * 2 + i1],
-					  &tempCoords[i1 * numDim + 0], &tempCoords[i1 * numDim + 1],
-					  &tempCoords[i1 * numDim + 2]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    totalNodes++;	/* When do we quit the infinite loop? */
-		    chunkNodes++;	/* How many nodes do we actually have in this chunk? It may be smaller than chunkSize. */
-		}
-		if (chunkNodes > chunkSize)
-		{
-		    Dudley_setError(ESYS_MPI_ERROR,
-				    "Dudley_Mesh_read: error reading chunks of mesh, data too large for message size");
-		    return NULL;
-		}
+    // Allocate the ElementFile
+    ElementFile* out = new ElementFile(typeID, mpiInfo);
+    const int numNodes = out->numNodes;
+
+    /********************** Read the element data **************************/
+    dim_t chunkSize = numEle / mpiInfo->size + 1;
+    dim_t totalEle = 0;
+    dim_t chunkEle = 0;
+    int nextCPU = 1;
+    /// Store Id + Tag + node list (+ one int at end for chunkEle)
+    index_t* tempInts = new index_t[chunkSize * (2 + numNodes) + 1];
+    // Elements are specified as a list of integers...only need one message
+    // instead of two as with the nodes
+    if (mpiInfo->rank == 0) { // Master
+        for (;;) {            // Infinite loop
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * (2 + numNodes) + 1; i0++)
+                tempInts[i0] = -1;
+            chunkEle = 0;
+            for (index_t i0 = 0; i0 < chunkSize; i0++) {
+                if (totalEle >= numEle)
+                    break; // End inner loop
+                std::getline(fileHandle, line);
+                if (!fileHandle.good())
+                    throw IOError("Mesh::read: Scan error while reading element data");
+                std::stringstream ss;
+                ss << line;
+                ss >> tempInts[i0 * (2 + numNodes) + 0]
+                   >> tempInts[i0 * (2 + numNodes) + 1];
+                for (int i1 = 0; i1 < numNodes; i1++) {
+                    ss >> tempInts[i0 * (2 + numNodes) + 2 + i1];
+                }
+                totalEle++;
+                chunkEle++;
+            }
 #ifdef ESYS_MPI
-		/* Eventually we'll send chunkSize nodes to each CPU numbered 1 ... mpi_info->size-1, here goes one of them */
-		if (nextCPU < mpi_info->size)
-		{
-		    tempInts[chunkSize * 3] = chunkNodes;	/* The message has one more int to send chunkNodes */
-		    MPI_Send(tempInts, chunkSize * 3 + 1, MPI_INT, nextCPU, 81720, mpi_info->comm);
-		    MPI_Send(tempCoords, chunkSize * numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm);
-		}
+            // Eventually we'll send chunk of elements to each CPU except 0
+            // itself, here goes one of them
+            if (nextCPU < mpiInfo->size) {
+                tempInts[chunkSize * (2 + numNodes)] = chunkEle;
+                MPI_Send(tempInts, chunkSize * (2 + numNodes) + 1, MPI_DIM_T,
+                         nextCPU, 81722, mpiInfo->comm);
+            }
 #endif
-		nextCPU++;
-		/* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-		if (nextCPU > mpi_info->size)
-		    break;	/* End infinite loop */
-	    }			/* Infinite loop */
-	}			/* End master */
-	else			/* Worker */
-	{
+            nextCPU++;
+            // Infinite loop ends when I've read a chunk for each of the worker
+            // nodes plus one more chunk for the master
+            if (nextCPU > mpiInfo->size)
+                break; // End infinite loop
+        } // Infinite loop
+    } // end master
+    else { // Worker
 #ifdef ESYS_MPI
-	    /* Each worker receives two messages */
-	    MPI_Status status;
-	    MPI_Recv(tempInts, chunkSize * 3 + 1, MPI_INT, 0, 81720, mpi_info->comm, &status);
-	    MPI_Recv(tempCoords, chunkSize * numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status);
-	    chunkNodes = tempInts[chunkSize * 3];	/* How many nodes are in this workers chunk? */
+        // Each worker receives one message
+        MPI_Status status;
+        MPI_Recv(tempInts, chunkSize * (2 + numNodes) + 1, MPI_DIM_T, 0,
+                 81722, mpiInfo->comm, &status);
+        chunkEle = tempInts[chunkSize * (2 + numNodes)];
 #endif
-	}			/* Worker */
-
-	/* Copy node data from tempMem to mesh_p */
-	Dudley_NodeFile_allocTable(mesh_p->Nodes, chunkNodes);
-	if (Dudley_noError())
-	{
-#pragma omp parallel for private (i0, i1) schedule(static)
-	    for (i0 = 0; i0 < chunkNodes; i0++)
-	    {
-		mesh_p->Nodes->Id[i0] = tempInts[0 + i0];
-		mesh_p->Nodes->globalDegreesOfFreedom[i0] = tempInts[chunkSize + i0];
-		mesh_p->Nodes->Tag[i0] = tempInts[chunkSize * 2 + i0];
-		for (i1 = 0; i1 < numDim; i1++)
-		{
-		    mesh_p->Nodes->Coordinates[INDEX2(i1, i0, numDim)] = tempCoords[i0 * numDim + i1];
-		}
-	    }
-	}
-	delete[] tempInts;
-	delete[] tempCoords;
+    } // Worker
+
+    out->allocTable(chunkEle);
+
+    // Copy Element data from tempInts to element file
+    out->minColor = 0;
+    out->maxColor = chunkEle - 1;
+#pragma omp parallel for
+    for (index_t i0 = 0; i0 < chunkEle; i0++) {
+        out->Id[i0] = tempInts[i0 * (2 + numNodes) + 0];
+        out->Tag[i0] = tempInts[i0 * (2 + numNodes) + 1];
+        out->Owner[i0] = mpiInfo->rank;
+        out->Color[i0] = i0;
+        for (int i1 = 0; i1 < numNodes; i1++) {
+            out->Nodes[INDEX2(i1, i0, numNodes)] =
+                tempInts[i0 * (2 + numNodes) + 2 + i1];
+        }
     }
+    delete[] tempInts;
+    return out;
+}
 
-    /* ***********************************  read elements *************************************************************************************** */
-    if (Dudley_noError())
-    {
-	/* Read the element typeID */
-	if (mpi_info->rank == 0)
-	{
-	    scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-	    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") typeID = eltTypeFromString(element_type);
-	}
-#ifdef ESYS_MPI
-	if (mpi_info->size > 1)
-	{
-	    int temp1[2], mpi_error;
-	    temp1[0] = (int)typeID;
-	    temp1[1] = numEle;
-	    mpi_error = MPI_Bcast(temp1, 2, MPI_INT, 0, mpi_info->comm);
-	    if (mpi_error != MPI_SUCCESS)
-	    {
-		Dudley_setError(ESYS_MPI_ERROR, "Dudley_Mesh_read: broadcast of Element typeID failed");
-		return NULL;
-	    }
-	    typeID = (Dudley_ElementTypeId) temp1[0];
-	    numEle = temp1[1];
-	}
-#endif
-	if (typeID == Dudley_NoRef)
-	{
-	    sprintf(error_msg, "Dudley_Mesh_read: Unidentified element type %s", element_type);
-	    Dudley_setError(VALUE_ERROR, error_msg);
-	}
-    }
+} // anonymous
 
-    /* Allocate the ElementFile */
-    if (Dudley_noError())
-    {
-	mesh_p->Elements = Dudley_ElementFile_alloc(typeID, mpi_info);
-	numNodes = mesh_p->Elements->numNodes;	/* New meaning for numNodes: num nodes per element */
-    }
+namespace dudley {
 
-    /* *************************** Read the element data **************************************************************************************** */
-    if (Dudley_noError())
-    {
-	int chunkSize = numEle / mpi_info->size + 1, totalEle = 0, nextCPU = 1, chunkEle = 0;
-	int *tempInts = new  index_t[chunkSize * (2 + numNodes) + 1];	/* Store Id + Tag + node list (+ one int at end for chunkEle) */
-	/* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-	if (mpi_info->rank == 0)	/* Master */
-	{
-	    for (;;)		/* Infinite loop */
-	    {
-#pragma omp parallel for private (i0) schedule(static)
-		for (i0 = 0; i0 < chunkSize * (2 + numNodes) + 1; i0++)
-		    tempInts[i0] = -1;
-		chunkEle = 0;
-		for (i0 = 0; i0 < chunkSize; i0++)
-		{
-		    if (totalEle >= numEle)
-			break;	/* End inner loop */
-		    scan_ret =
-			fscanf(fileHandle_p, "%d %d", &tempInts[i0 * (2 + numNodes) + 0],
-			       &tempInts[i0 * (2 + numNodes) + 1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") for (i1 = 0; i1 < numNodes; i1++)
-		    {
-			scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0 * (2 + numNodes) + 2 + i1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    scan_ret = fscanf(fileHandle_p, "\n");
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") totalEle++;
-		    chunkEle++;
-		}
-#ifdef ESYS_MPI
-		/* Eventually we'll send chunk of elements to each CPU except 0 itself, here goes one of them */
-		if (nextCPU < mpi_info->size)
-		{
-		    tempInts[chunkSize * (2 + numNodes)] = chunkEle;
-		    MPI_Send(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, nextCPU, 81722, mpi_info->comm);
-		}
-#endif
-		nextCPU++;
-		/* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-		if (nextCPU > mpi_info->size)
-		    break;	/* End infinite loop */
-	    }			/* Infinite loop */
-	}			/* End master */
-	else
-	{			/* Worker */
-#ifdef ESYS_MPI
-	    /* Each worker receives one message */
-	    MPI_Status status;
-	    MPI_Recv(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, 0, 81722, mpi_info->comm, &status);
-	    chunkEle = tempInts[chunkSize * (2 + numNodes)];
-#endif
-	}			/* Worker */
-	Dudley_ElementFile_allocTable(mesh_p->Elements, chunkEle);
-
-	/* Copy Element data from tempInts to mesh_p */
-	if (Dudley_noError())
-	{
-	    mesh_p->Elements->minColor = 0;
-	    mesh_p->Elements->maxColor = chunkEle - 1;
-#pragma omp parallel for private (i0, i1) schedule(static)
-	    for (i0 = 0; i0 < chunkEle; i0++)
-	    {
-		mesh_p->Elements->Id[i0] = tempInts[i0 * (2 + numNodes) + 0];
-		mesh_p->Elements->Tag[i0] = tempInts[i0 * (2 + numNodes) + 1];
-		mesh_p->Elements->Owner[i0] = mpi_info->rank;
-		mesh_p->Elements->Color[i0] = i0;
-		for (i1 = 0; i1 < numNodes; i1++)
-		{
-		    mesh_p->Elements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0 * (2 + numNodes) + 2 + i1];
-		}
-	    }
-	}
-	delete[] tempInts;
+escript::Domain_ptr DudleyDomain::read(escript::JMPI mpiInfo,
+                                       const std::string& filename,
+                                       bool optimize)
+{
+    dim_t numNodes = 0;
+    int numDim = 0;
+    std::string name, line, token;
+    std::ifstream fileHandle;
+
+    if (mpiInfo->rank == 0) {
+        // open file
+        fileHandle.open(filename.c_str());
+        if (!fileHandle.good()) {
+            std::stringstream ss;
+            ss << "Mesh::read: Opening file " << filename
+               << " for reading failed.";
+            throw IOError(ss.str());
+        }
+
+        // read header
+        std::getline(fileHandle, name);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file header");
+
+        // get the number of dimensions and nodes
+        std::getline(fileHandle, line);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file - expected <?D-Nodes> <numNodes>");
+        numDim = std::stoi(line.substr(0, 1));
+        token = line.substr(line.find(' ')+1);
+        numNodes = std::stoi(token);
     }
-    /* ******************** end of Read the element data ***************************************************** */
-
-    /* ********************* read face elements ************************************************************************************ */
-    if (Dudley_noError())
-    {
-	/* Read the element typeID */
-	if (mpi_info->rank == 0)
-	{
-	    scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-	    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") typeID = eltTypeFromString(element_type);
-	}
+
 #ifdef ESYS_MPI
-	if (mpi_info->size > 1)
-	{
-	    int temp1[2];
-	    temp1[0] = (int)typeID;
-	    temp1[1] = numEle;
-	    MPI_Bcast(temp1, 2, MPI_INT, 0, mpi_info->comm);
-	    typeID = (Dudley_ElementTypeId) temp1[0];
-	    numEle = temp1[1];
-	}
-#endif
-	if (typeID == Dudley_NoRef)
-	{
-	    sprintf(error_msg, "Dudley_Mesh_read: Unidentified element type %s", element_type);
-	    Dudley_setError(VALUE_ERROR, error_msg);
-	}
-	if (Dudley_noError())
-	{
-	    /* Allocate the ElementFile */
-	    mesh_p->FaceElements = Dudley_ElementFile_alloc(typeID, mpi_info);
-	    numNodes = mesh_p->FaceElements->numNodes;	/* New meaning for numNodes: num nodes per element */
-	}
+    // MPI Broadcast numDim, numNodes, name if there are multiple MPI procs
+    if (mpiInfo->size > 1) {
+        dim_t temp1[3];
+        if (mpiInfo->rank == 0) {
+            temp1[0] = numDim;
+            temp1[1] = numNodes;
+            temp1[2] = name.length() + 1;
+        } else {
+            temp1[0] = 0;
+            temp1[1] = 0;
+            temp1[2] = 1;
+        }
+        MPI_Bcast(temp1, 3, MPI_DIM_T, 0, mpiInfo->comm);
+        numDim = temp1[0];
+        numNodes = temp1[1];
+        name.resize(temp1[2]);
+        MPI_Bcast(&name[0], temp1[2], MPI_CHAR, 0, mpiInfo->comm);
     }
-    /* ********************** Read the face element data ******************************************************************************* */
-
-    if (Dudley_noError())
-    {
-	int chunkSize = numEle / mpi_info->size + 1, totalEle = 0, nextCPU = 1, chunkEle = 0;
-	int *tempInts = new  index_t[chunkSize * (2 + numNodes) + 1];	/* Store Id + Tag + node list (+ one int at end for chunkEle) */
-	/* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-	if (mpi_info->rank == 0)	/* Master */
-	{
-	    for (;;)		/* Infinite loop */
-	    {
-#pragma omp parallel for private (i0) schedule(static)
-		for (i0 = 0; i0 < chunkSize * (2 + numNodes) + 1; i0++)
-		    tempInts[i0] = -1;
-		chunkEle = 0;
-		for (i0 = 0; i0 < chunkSize; i0++)
-		{
-		    if (totalEle >= numEle)
-			break;	/* End inner loop */
-		    scan_ret =
-			fscanf(fileHandle_p, "%d %d", &tempInts[i0 * (2 + numNodes) + 0],
-			       &tempInts[i0 * (2 + numNodes) + 1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") for (i1 = 0; i1 < numNodes; i1++)
-		    {
-			scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0 * (2 + numNodes) + 2 + i1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    scan_ret = fscanf(fileHandle_p, "\n");
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") totalEle++;
-		    chunkEle++;
-		}
-#ifdef ESYS_MPI
-		/* Eventually we'll send chunk of elements to each CPU except 0 itself, here goes one of them */
-		if (nextCPU < mpi_info->size)
-		{
-		    tempInts[chunkSize * (2 + numNodes)] = chunkEle;
-		    MPI_Send(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, nextCPU, 81723, mpi_info->comm);
-		}
 #endif
-		nextCPU++;
-		/* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-		if (nextCPU > mpi_info->size)
-		    break;	/* End infinite loop */
-	    }			/* Infinite loop */
-	}			/* End master */
-	else			/* Worker */
-	{
+
+    // allocate domain
+    DudleyDomain* domain = new DudleyDomain(name, numDim, mpiInfo);
+
+    // Each CPU will get at most chunkSize nodes so the message has to be
+    // sufficiently large
+    dim_t chunkSize = numNodes / mpiInfo->size + 1;
+    dim_t totalNodes = 0;
+    dim_t chunkNodes = 0;
+    int nextCPU = 1;
+    // Stores the integer message data
+    index_t* tempInts = new index_t[chunkSize * 3 + 1];
+    // Stores the double message data
+    double* tempCoords = new double[chunkSize * numDim];
+
+    // Read chunkSize nodes, send it in a chunk to worker CPU which copies
+    // chunk into its local domain.  It doesn't matter that a CPU has the wrong
+    // nodes for its elements, this is sorted out later. First chunk sent to
+    // CPU 1, second to CPU 2, ..., last chunk stays on CPU 0 (the master).
+    // The three columns of integers (Id, gDOF, Tag) are gathered into a single
+    // array tempInts and sent together in a single MPI message.
+    if (mpiInfo->rank == 0) { // Master
+        for (;;) {            // Infinite loop
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * 3 + 1; i0++)
+                tempInts[i0] = -1;
+
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * numDim; i0++)
+                tempCoords[i0] = -1.0;
+
+            chunkNodes = 0;
+            for (index_t i1 = 0; i1 < chunkSize; i1++) {
+                if (totalNodes >= numNodes)
+                    break;  // End of inner loop
+                std::getline(fileHandle, line);
+                if (!fileHandle.good())
+                    throw IOError("Mesh::read: Scan error while reading node data");
+                std::stringstream ss;
+                ss << line;
+                ss >> tempInts[0 + i1] >> tempInts[chunkSize + i1]
+                   >> tempInts[chunkSize * 2 + i1];
+                ss >> tempCoords[i1 * numDim];
+                if (numDim > 1)
+                    ss >> tempCoords[i1 * numDim + 1];
+                if (numDim > 2)
+                    ss >> tempCoords[i1 * numDim + 2];
+                totalNodes++; // When do we quit the infinite loop?
+                chunkNodes++; // How many nodes do we actually have in this chunk? It may be smaller than chunkSize.
+            }
+            if (chunkNodes > chunkSize) {
+                throw DudleyException("Mesh::read: error reading chunks of domain, data too large for message size");
+            }
 #ifdef ESYS_MPI
-	    /* Each worker receives one message */
-	    MPI_Status status;
-	    MPI_Recv(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, 0, 81723, mpi_info->comm, &status);
-	    chunkEle = tempInts[chunkSize * (2 + numNodes)];
+            // Eventually we'll send chunkSize nodes to each CPU numbered
+            // 1 ... mpiInfo->size-1, here goes one of them
+            if (nextCPU < mpiInfo->size) {
+                // The message has one more int to send chunkNodes
+                tempInts[chunkSize * 3] = chunkNodes;
+                MPI_Send(tempInts, chunkSize * 3 + 1, MPI_DIM_T, nextCPU, 81720, mpiInfo->comm);
+                MPI_Send(tempCoords, chunkSize * numDim, MPI_DOUBLE, nextCPU, 81721, mpiInfo->comm);
+            }
 #endif
-	}			/* Worker */
-	Dudley_ElementFile_allocTable(mesh_p->FaceElements, chunkEle);
-	if (Dudley_noError())
-	{
-	    /* Copy Element data from tempInts to mesh_p */
-	    mesh_p->FaceElements->minColor = 0;
-	    mesh_p->FaceElements->maxColor = chunkEle - 1;
-#pragma omp parallel for private (i0, i1)
-	    for (i0 = 0; i0 < chunkEle; i0++)
-	    {
-		mesh_p->FaceElements->Id[i0] = tempInts[i0 * (2 + numNodes) + 0];
-		mesh_p->FaceElements->Tag[i0] = tempInts[i0 * (2 + numNodes) + 1];
-		mesh_p->FaceElements->Owner[i0] = mpi_info->rank;
-		mesh_p->FaceElements->Color[i0] = i0;
-		for (i1 = 0; i1 < numNodes; i1++)
-		{
-		    mesh_p->FaceElements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0 * (2 + numNodes) + 2 + i1];
-		}
-	    }
-	}
-
-	delete[] tempInts;
-    }
-    /* ************************************* end of Read the face element data *************************************** */
-
-    /* ********************************* read nodal elements ****************************************************** */
-    /* *******************************  Read the element typeID */
-    if (Dudley_noError())
-    {
-	if (mpi_info->rank == 0)
-	{
-	    scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-	    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") typeID = eltTypeFromString(element_type);
-	}
+            nextCPU++;
+            // Infinite loop ends when I've read a chunk for each of the worker
+            // nodes plus one more chunk for the master
+            if (nextCPU > mpiInfo->size)
+                break; // End infinite loop
+        } // Infinite loop
+    } // End master
+    else { // Worker
 #ifdef ESYS_MPI
-	if (mpi_info->size > 1)
-	{
-	    int temp1[2];
-	    temp1[0] = (int)typeID;
-	    temp1[1] = numEle;
-	    MPI_Bcast(temp1, 2, MPI_INT, 0, mpi_info->comm);
-	    typeID = (Dudley_ElementTypeId) temp1[0];
-	    numEle = temp1[1];
-	}
+        // Each worker receives two messages
+        MPI_Status status;
+        MPI_Recv(tempInts, chunkSize * 3 + 1, MPI_DIM_T, 0, 81720, mpiInfo->comm, &status);
+        MPI_Recv(tempCoords, chunkSize * numDim, MPI_DOUBLE, 0, 81721, mpiInfo->comm, &status);
+        // How many nodes are in this worker's chunk?
+        chunkNodes = tempInts[chunkSize * 3];
 #endif
-	if (typeID == Dudley_NoRef)
-	{
-	    sprintf(error_msg, "Dudley_Mesh_read: Unidentified element type %s", element_type);
-	    Dudley_setError(VALUE_ERROR, error_msg);
-	}
+    } // Worker
+
+    // Copy node data from tempMem to domain
+    NodeFile* nodes = domain->getNodes();
+    nodes->allocTable(chunkNodes);
+
+#pragma omp parallel for
+    for (index_t i0 = 0; i0 < chunkNodes; i0++) {
+        nodes->Id[i0] = tempInts[0 + i0];
+        nodes->globalDegreesOfFreedom[i0] = tempInts[chunkSize + i0];
+        nodes->Tag[i0] = tempInts[chunkSize * 2 + i0];
+        for (int i1 = 0; i1 < numDim; i1++) {
+            nodes->Coordinates[INDEX2(i1, i0, numDim)] = tempCoords[i0 * numDim + i1];
+        }
     }
-    if (Dudley_noError())
-    {
-	/* Allocate the ElementFile */
-	mesh_p->Points = Dudley_ElementFile_alloc(typeID, mpi_info);
-	numNodes = mesh_p->Points->numNodes;	/* New meaning for numNodes: num nodes per element */
-    }
-    /**********************************  Read the nodal element data **************************************************/
-    if (Dudley_noError())
-    {
-	int chunkSize = numEle / mpi_info->size + 1, totalEle = 0, nextCPU = 1, chunkEle = 0;
-	int *tempInts = new  index_t[chunkSize * (2 + numNodes) + 1];	/* Store Id + Tag + node list (+ one int at end for chunkEle) */
-	/* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-	if (mpi_info->rank == 0)	/* Master */
-	{
-	    for (;;)		/* Infinite loop */
-	    {
-#pragma omp parallel for private (i0) schedule(static)
-		for (i0 = 0; i0 < chunkSize * (2 + numNodes) + 1; i0++)
-		    tempInts[i0] = -1;
-		chunkEle = 0;
-		for (i0 = 0; i0 < chunkSize; i0++)
-		{
-		    if (totalEle >= numEle)
-			break;	/* End inner loop */
-		    scan_ret =
-			fscanf(fileHandle_p, "%d %d", &tempInts[i0 * (2 + numNodes) + 0],
-			       &tempInts[i0 * (2 + numNodes) + 1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") for (i1 = 0; i1 < numNodes; i1++)
-		    {
-			scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0 * (2 + numNodes) + 2 + i1]);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-		    scan_ret = fscanf(fileHandle_p, "\n");
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") totalEle++;
-		    chunkEle++;
-		}
-#ifdef ESYS_MPI
-		/* Eventually we'll send chunk of elements to each CPU except 0 itself, here goes one of them */
-		if (nextCPU < mpi_info->size)
-		{
-		    tempInts[chunkSize * (2 + numNodes)] = chunkEle;
-		    MPI_Send(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, nextCPU, 81725, mpi_info->comm);
-		}
-#endif
-		nextCPU++;
-		/* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-		if (nextCPU > mpi_info->size)
-		    break;	/* End infinite loop */
-	    }			/* Infinite loop */
-	}			/* End master */
-	else			/* Worker */
-	{
+    delete[] tempInts;
+    delete[] tempCoords;
+
+    /*************************** read elements ******************************/
+    domain->setElements(readElementFile(fileHandle, mpiInfo));
+
+    /************************ read face elements ****************************/
+    domain->setFaceElements(readElementFile(fileHandle, mpiInfo));
+
+    /************************ read nodal elements ***************************/
+    domain->setPoints(readElementFile(fileHandle, mpiInfo));
+
+    /************************  get the name tags ****************************/
+    std::string remainder;
+    size_t len = 0;
+    int tag_key;
+    if (mpiInfo->rank == 0) { // Master
+        // Read the word 'Tags'
+        if (!fileHandle.eof()) {
+            std::getline(fileHandle, name);
+            if (!fileHandle.good())
+                throw IOError("Mesh::read: Scan error while reading tag header");
+        }
+        // Read rest of file in one chunk, after using seek to find length
+        std::ios::pos_type cur_pos = fileHandle.tellg();
+        fileHandle.seekg(0, std::ios::end);
+        std::ios::pos_type end_pos = fileHandle.tellg();
+        fileHandle.seekg(cur_pos);
+        remainder.resize(end_pos - cur_pos + 1);
+        if (!fileHandle.eof()) {
+            fileHandle.read(&remainder[0], end_pos-cur_pos);
+            if (fileHandle.bad())
+                throw IOError("Mesh::read: Error reading remainder");
+            remainder[end_pos - cur_pos] = 0;
+        }
+        len = remainder.find_last_not_of(' ');
+        remainder = remainder.substr(0, len+1);
+    } // Master
+
 #ifdef ESYS_MPI
-	    /* Each worker receives one message */
-	    MPI_Status status;
-	    MPI_Recv(tempInts, chunkSize * (2 + numNodes) + 1, MPI_INT, 0, 81725, mpi_info->comm, &status);
-	    chunkEle = tempInts[chunkSize * (2 + numNodes)];
-#endif
-	}			/* Worker */
-
-	/* Copy Element data from tempInts to mesh_p */
-	Dudley_ElementFile_allocTable(mesh_p->Points, chunkEle);
-	if (Dudley_noError())
-	{
-	    mesh_p->Points->minColor = 0;
-	    mesh_p->Points->maxColor = chunkEle - 1;
-#pragma omp parallel for private (i0, i1) schedule(static)
-	    for (i0 = 0; i0 < chunkEle; i0++)
-	    {
-		mesh_p->Points->Id[i0] = tempInts[i0 * (2 + numNodes) + 0];
-		mesh_p->Points->Tag[i0] = tempInts[i0 * (2 + numNodes) + 1];
-		mesh_p->Points->Owner[i0] = mpi_info->rank;
-		mesh_p->Points->Color[i0] = i0;
-		for (i1 = 0; i1 < numNodes; i1++)
-		{
-		    mesh_p->Points->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0 * (2 + numNodes) + 2 + i1];
-		}
-	    }
-	}
-
-	delete[] tempInts;
+    int len_i = static_cast<int>(len);
+    MPI_Bcast(&len_i, 1, MPI_INT, 0, mpiInfo->comm);
+    len = static_cast<size_t>(len_i);
+    if (mpiInfo->rank != 0) {
+        remainder.resize(len + 1);
     }
-    /* ******************************** end of Read the nodal element data ************************************ */
- /******************  get the name tags *****************************************/
-    if (Dudley_noError())
-    {
-	char *remainder = 0, *ptr;
-	size_t len = 0;
-#ifdef ESYS_MPI
-	int len_i;
-#endif
-	int tag_key;
-	if (mpi_info->rank == 0)	/* Master */
-	{
-	    /* Read the word 'Tag' */
-	    if (!feof(fileHandle_p))
-	    {
-		scan_ret = fscanf(fileHandle_p, "%s\n", name);
-	    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read")}
-#if defined(_WIN32)		/* windows ftell lies on unix formatted text files */
-	    remainder = NULL;
-	    len = 0;
-	    while (1)
-	    {
-		size_t malloc_chunk = 1024;
-		size_t buff_size = 0;
-		int ch;
-		ch = fgetc(fileHandle_p);
-		if (ch == '\r')
-		{
-		    continue;
-		}
-		if (len + 1 > buff_size)
-		{
-		    TMPMEMREALLOC(remainder, remainder, buff_size + malloc_chunk, char);
-		}
-		if (ch == EOF)
-		{
-		    /* hit EOF */
-		    remainder[len] = (char)0;
-		    break;
-		}
-		remainder[len] = (char)ch;
-		len++;
-	    }
-#else
-	    /* Read rest of file in one chunk, after using seek to find length */
-	    {
-		long cur_pos, end_pos;
-		cur_pos = ftell(fileHandle_p);
-		fseek(fileHandle_p, 0L, SEEK_END);
-		end_pos = ftell(fileHandle_p);
-		fseek(fileHandle_p, (long)cur_pos, SEEK_SET);
-		remainder = new  char[end_pos - cur_pos + 1];
-		if (!feof(fileHandle_p))
-		{
-		    scan_ret = fread(remainder, (size_t) end_pos - cur_pos, sizeof(char), fileHandle_p);
-		    FSCANF_CHECK(scan_ret, "Dudley_Mesh_read") remainder[end_pos - cur_pos] = 0;
-		}
-	    }
-#endif
-	    len = strlen(remainder);
-	    while ((len > 1) && isspace(remainder[--len]))
-	    {
-		remainder[len] = 0;
-	    }
-	    len = strlen(remainder);
-	    // shrink the allocation unit
-//	    TMPMEMREALLOC(remainder, remainder, len + 1, char);
-	}			/* Master */
-#ifdef ESYS_MPI
-
-	len_i = (int)len;
-	MPI_Bcast(&len_i, 1, MPI_INT, 0, mpi_info->comm);
-	len = (size_t) len_i;
-	if (mpi_info->rank != 0)
-	{
-	    remainder = new  char[len + 1];
-	    remainder[0] = 0;
-	}
-	if (MPI_Bcast(remainder, len + 1, MPI_CHAR, 0, mpi_info->comm) != MPI_SUCCESS)
-	    Dudley_setError(ESYS_MPI_ERROR, "Dudley_Mesh_read: broadcast of remainder failed");
+    if (MPI_Bcast(&remainder[0], len+1, MPI_CHAR, 0, mpiInfo->comm) != MPI_SUCCESS)
+        throw DudleyException("Mesh::read: broadcast of remainder failed");
 #endif
 
-	if (remainder[0])
-	{
-	    ptr = remainder;
-	    do
-	    {
-		sscanf(ptr, "%s %d\n", name, &tag_key);
-		if (*name)
-		    Dudley_Mesh_addTagMap(mesh_p, name, tag_key);
-		ptr++;
-	    }
-	    while (NULL != (ptr = strchr(ptr, '\n')) && *ptr);
-	}
-	if (remainder)
-	    delete[] remainder;
+    std::stringstream rem;
+    rem << remainder;
+    while (std::getline(rem, line)) {
+        size_t pos = line.find(' ');
+        if (pos != std::string::npos) {
+            name = line.substr(0, pos);
+            tag_key = std::stoi(line.substr(pos+1));
+            domain->setTagMap(name, tag_key);
+        }
     }
 
-    /* close file */
-    if (mpi_info->rank == 0)
-	fclose(fileHandle_p);
-
-    /*   resolve id's : */
-    /* rearrange elements: */
-    if (Dudley_noError())
-	Dudley_Mesh_resolveNodeIds(mesh_p);
-    if (Dudley_noError())
-	Dudley_Mesh_prepare(mesh_p, optimize);
-
-    /* that's it */
-    if (!Dudley_noError())
-    {
-	Dudley_Mesh_free(mesh_p);
-    }
-    /* free up memory */
-    return mesh_p;
+    // close file
+    if (mpiInfo->rank == 0)
+        fileHandle.close();
+
+    domain->resolveNodeIds();
+    domain->prepare(optimize);
+    return domain->getPtr();
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_readGmsh.cpp b/dudley/src/Mesh_readGmsh.cpp
index 08b45fc..d8ea9d3 100644
--- a/dudley/src/Mesh_readGmsh.cpp
+++ b/dudley/src/Mesh_readGmsh.cpp
@@ -14,417 +14,292 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: read mesh */
+#include <escript/index.h>
 
-/************************************************************************************/
+using escript::IOError;
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-#include <stdio.h>
-
-#define FSCANF_CHECK(scan_ret, reason) { if (scan_ret == EOF) { perror(reason); Dudley_setError(IO_ERROR,"scan error while reading dudley file"); return NULL;} }
-
-/************************************************************************************/
-
-/*  reads a mesh from a Dudley file of name fname */
+namespace dudley {
 
 #define MAX_numNodes_gmsh 20
 
-Dudley_Mesh *Dudley_Mesh_readGmsh(char *fname, index_t numDim, index_t order, index_t reduced_order, bool optimize,
-				  bool useMacroElements)
+/// reads a mesh from a gmsh file of name filename
+escript::Domain_ptr DudleyDomain::readGmsh(escript::JMPI mpiInfo,
+                                           const std::string& filename,
+                                           int numDim, bool optimize)
 {
-
     double version = 1.0;
-    int format = 0, size = sizeof(double), scan_ret;
-    dim_t numNodes, totalNumElements = 0, numTags = 0, numNodesPerElement = 0, numNodesPerElement2, element_dim = 0;
-    index_t e, i0, j, gmsh_type, partition_id, itmp, elementary_id;
-    index_t numElements = 0, numFaceElements = 0, *id = NULL, *tag = NULL, *vertices = NULL;
-    Dudley_Mesh *mesh_p = NULL;
-    char line[LenString_MAX + 1];
-    char error_msg[LenErrorMsg_MAX];
-    double rtmp0, rtmp1;
-#ifdef Dudley_TRACE
-    double time0 = Dudley_timer();
-#endif
-    FILE *fileHandle_p = NULL;
-    Dudley_ElementTypeId *element_type = NULL;
-
-    /* No! Bad! take a parameter for this */
-    esysUtils::JMPI mpi_info = esysUtils::makeInfo(MPI_COMM_WORLD);
-    Dudley_resetError();
-    if (mpi_info->size > 1)
-    {
-	Dudley_setError(IO_ERROR, "reading GMSH with MPI is not supported yet.");
-	return NULL;
-    }
-    else
-    {
+    int format = 0, size = sizeof(double);
+    dim_t numNodes, totalNumElements = 0;
+    int numTags = 0;
+    int numNodesPerElement = 0, numNodesPerElement2, element_dim = 0;
+    int gmsh_type, partition_id, itmp, elementary_id;
+    index_t numElements = 0, numFaceElements = 0, *id = NULL, *vertices = NULL;
+    int* tag = NULL;
+    std::string line;
 
-	/* allocate mesh */
+    if (mpiInfo->size > 1)
+        throw DudleyException("reading gmsh with MPI is not supported yet.");
 
-	mesh_p = Dudley_Mesh_alloc(fname, numDim, mpi_info);
-	if (!Dudley_noError())
-	    return NULL;
+    // allocate domain
+    DudleyDomain* domain = new DudleyDomain(filename, numDim, mpiInfo);
 
-	/* get file handle */
-	fileHandle_p = fopen(fname, "r");
-	if (fileHandle_p == NULL)
-	{
-	    sprintf(error_msg, "Opening Gmsh file %s for reading failed.", fname);
-	    Dudley_setError(IO_ERROR, error_msg);
-	    return NULL;
-	}
-
-	/* start reading */
-	while (1)
-	{
-	    if (!Dudley_noError())
-		break;
-	    /* find line staring with $ */
-	    do
-	    {
-		if (!fgets(line, sizeof(line), fileHandle_p))
-		    break;
-		if (feof(fileHandle_p))
-		    break;
-	    }
-	    while (line[0] != '$');
+    // open file
+    std::ifstream fileHandle(filename);
+    if (!fileHandle.good()) {
+        std::stringstream ss;
+        ss << "Opening gmsh file " << filename << " for reading failed.";
+        throw IOError(ss.str());
+    }
 
-	    if (feof(fileHandle_p))
-		break;
+    // start reading
+    while (1) {
+        // find line staring with $
+        do {
+            std::getline(fileHandle, line);
+            if (!fileHandle.good())
+                break;
+        } while (line[0] != '$');
 
-	    /* format */
-	    if (!strncmp(&line[1], "MeshFormat", 10))
-	    {
-		scan_ret = fscanf(fileHandle_p, "%lf %d %d\n", &version, &format, &size);
-		FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-	    }
-	    /* nodes are read */
-	    if (!strncmp(&line[1], "NOD", 3) || !strncmp(&line[1], "NOE", 3) || !strncmp(&line[1], "Nodes", 5))
-	    {
+        if (fileHandle.eof())
+            break;
 
-		scan_ret = fscanf(fileHandle_p, "%d", &numNodes);
-		FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		if (!Dudley_noError())
-		    break;
-		Dudley_NodeFile_allocTable(mesh_p->Nodes, numNodes);
-		if (!Dudley_noError())
-		    break;
-		for (i0 = 0; i0 < numNodes; i0++)
-		{
-		    if (1 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %le %le %le\n", &mesh_p->Nodes->Id[i0],
-					  &mesh_p->Nodes->Coordinates[INDEX2(0, i0, numDim)], &rtmp0, &rtmp1);
-			FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		    }
-		    else if (2 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %le %le %le\n", &mesh_p->Nodes->Id[i0],
-					  &mesh_p->Nodes->Coordinates[INDEX2(0, i0, numDim)],
-					  &mesh_p->Nodes->Coordinates[INDEX2(1, i0, numDim)], &rtmp0);
-			FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		    }
-		    else if (3 == numDim)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %le %le %le\n", &mesh_p->Nodes->Id[i0],
-					  &mesh_p->Nodes->Coordinates[INDEX2(0, i0, numDim)],
-					  &mesh_p->Nodes->Coordinates[INDEX2(1, i0, numDim)],
-					  &mesh_p->Nodes->Coordinates[INDEX2(2, i0, numDim)]);
-			FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		    }
-		    mesh_p->Nodes->globalDegreesOfFreedom[i0] = mesh_p->Nodes->Id[i0];
-		    mesh_p->Nodes->Tag[i0] = 0;
-		}
-	    }
-	    /* elements */
-	    else if (!strncmp(&line[1], "ELM", 3) || !strncmp(&line[1], "Elements", 8))
-	    {
+        // format
+        if (line.substr(1,10) == "MeshFormat") {
+            std::getline(fileHandle, line);
+            if (fileHandle.eof())
+                throw IOError("readGmsh: early EOF while reading file");
+            std::stringstream ss(line);
+            ss >> version >> format >> size;
+        } else if (line.substr(1,3) == "NOD" || line.substr(1,3) == "NOE" ||
+                   line.substr(1,5) == "Nodes") {
+            // nodes
+            std::getline(fileHandle, line);
+            if (fileHandle.eof())
+                throw IOError("readGmsh: early EOF while reading file");
+            numNodes = std::stol(line);
+            NodeFile* nodes = domain->getNodes();
+            nodes->allocTable(numNodes);
+            for (index_t i0 = 0; i0 < numNodes; i0++) {
+                std::getline(fileHandle, line);
+                if (!fileHandle.good())
+                    throw IOError("readGmsh: early EOF while reading file");
+                std::stringstream ss(line);
+                ss >> nodes->Id[i0]
+                   >> nodes->Coordinates[INDEX2(0, i0, numDim)];
+                if (numDim > 1)
+                    ss >> nodes->Coordinates[INDEX2(1, i0, numDim)];
+                if (numDim > 2)
+                    ss >> nodes->Coordinates[INDEX2(2, i0, numDim)];
+                nodes->globalDegreesOfFreedom[i0] = nodes->Id[i0];
+                nodes->Tag[i0] = 0;
+            }
+        } else if (line.substr(1,3) == "ELM" || line.substr(1,8) == "Elements") {
+            // elements
+            ElementTypeId final_element_type = Dudley_NoRef;
+            ElementTypeId final_face_element_type = Dudley_NoRef;
+            numElements = 0;
+            numFaceElements = 0;
+            std::getline(fileHandle, line);
+            if (fileHandle.eof())
+                throw IOError("readGmsh: early EOF while reading file");
+            totalNumElements = std::stol(line);
 
-		Dudley_ElementTypeId final_element_type = Dudley_NoRef;
-		Dudley_ElementTypeId final_face_element_type = Dudley_NoRef;
-		numElements = 0;
-		numFaceElements = 0;
-		scan_ret = fscanf(fileHandle_p, "%d", &totalNumElements);
-		FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
+            id = new index_t[totalNumElements];
+            tag = new int[totalNumElements];
 
-		id = new  index_t[totalNumElements];
-		tag = new  index_t[totalNumElements];
+            ElementTypeId* element_type = new ElementTypeId[totalNumElements];
+            vertices = new index_t[totalNumElements * MAX_numNodes_gmsh];
+            // read all in
+            for (index_t e = 0; e < totalNumElements; e++) {
+                std::getline(fileHandle, line);
+                if (fileHandle.eof())
+                    throw IOError("readGmsh: early EOF while reading file");
+                std::stringstream ss(line);
+                ss >> id[e];
+                ss >> gmsh_type;
+                switch (gmsh_type) {
+                    case 1: // line order 1
+                        element_type[e] = Dudley_Line2;
+                        element_dim = 1;
+                        numNodesPerElement = 2;
+                        break;
+                    case 2: // triangle order 1
+                        element_type[e] = Dudley_Tri3;
+                        numNodesPerElement = 3;
+                        element_dim = 2;
+                        break;
+                    case 4: // tetrahedron order 1
+                        element_type[e] = Dudley_Tet4;
+                        numNodesPerElement = 4;
+                        element_dim = 3;
+                        break;
+                    case 15: // point
+                        element_type[e] = Dudley_Point1;
+                        numNodesPerElement = 1;
+                        element_dim = 0;
+                        break;
+                    default: {
+                        std::stringstream ss2;
+                        ss2 << "Unexpected gmsh element type " << gmsh_type
+                           << " in mesh file " << filename;
+                        throw IOError(ss2.str());
+                    }
+                }
+                if (element_dim == numDim) {
+                    if (final_element_type == Dudley_NoRef) {
+                        final_element_type = element_type[e];
+                    } else if (final_element_type != element_type[e]) {
+                        throw IOError("Dudley can handle a single type of "
+                                      "internal elements only.");
+                    }
+                    numElements++;
+                } else if (element_dim == numDim - 1) {
+                    if (final_face_element_type == Dudley_NoRef) {
+                        final_face_element_type = element_type[e];
+                    } else if (final_face_element_type != element_type[e]) {
+                        throw IOError("Dudley can handle a single type of "
+                                      "face elements only.");
+                    }
+                    numFaceElements++;
+                }
 
-		element_type = new  Dudley_ElementTypeId[totalNumElements];
-		vertices = new  index_t[totalNumElements * MAX_numNodes_gmsh];
-		if (!
-		    (Dudley_checkPtr(id) || Dudley_checkPtr(tag) || Dudley_checkPtr(element_type)
-		     || Dudley_checkPtr(vertices)))
-		{
-		    /* read all in */
-		    for (e = 0; e < totalNumElements; e++)
-		    {
-			scan_ret = fscanf(fileHandle_p, "%d %d", &id[e], &gmsh_type);
-			FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-			switch (gmsh_type)
-			{
-			case 1:	/* line order 1 */
-			    element_type[e] = Dudley_Line2;
-			    element_dim = 1;
-			    numNodesPerElement = 2;
-			    break;
-			case 2:	/* triangle order 1 */
-			    element_type[e] = Dudley_Tri3;
-			    numNodesPerElement = 3;
-			    element_dim = 2;
-			    break;
-			case 4:	/* tetrahedron order 1 */
-			    element_type[e] = Dudley_Tet4;
-			    numNodesPerElement = 4;
-			    element_dim = 3;
-			    break;
-			case 15:	/* point */
-			    element_type[e] = Dudley_Point1;
-			    numNodesPerElement = 1;
-			    element_dim = 0;
-			    break;
-			default:
-			    element_type[e] = Dudley_NoRef;
-			    sprintf(error_msg, "Unexpected gmsh element type %d in mesh file %s.", gmsh_type, fname);
-			    Dudley_setError(IO_ERROR, error_msg);
-			}
-			if (element_dim == numDim)
-			{
-			    if (final_element_type == Dudley_NoRef)
-			    {
-				final_element_type = element_type[e];
-			    }
-			    else if (final_element_type != element_type[e])
-			    {
-				sprintf(error_msg, "Dudley can handle a single type of internal elements only.");
-				Dudley_setError(IO_ERROR, error_msg);
-				break;
-			    }
-			    numElements++;
-			}
-			else if (element_dim == numDim - 1)
-			{
-			    if (final_face_element_type == Dudley_NoRef)
-			    {
-				final_face_element_type = element_type[e];
-			    }
-			    else if (final_face_element_type != element_type[e])
-			    {
-				sprintf(error_msg, "Dudley can handle a single type of face elements only.");
-				Dudley_setError(IO_ERROR, error_msg);
-				break;
-			    }
-			    numFaceElements++;
-			}
+                if (version <= 1.0) {
+                    ss >> tag[e] >> elementary_id >> numNodesPerElement2;
+                    partition_id = 1;
+                    if (numNodesPerElement2 != numNodesPerElement) {
+                        std::stringstream ss;
+                        ss << "Illegal number of nodes for element " << id[e]
+                            << " in mesh file " << filename;
+                        throw IOError(ss.str());
+                    }
+                } else {
+                    ss >> numTags;
+                    elementary_id = tag[e] = partition_id = 1;
+                    numNodesPerElement2 = -1;
+                    for (int j = 0; j < numTags; j++) {
+                        ss >> itmp;
+                        if (j == 0) {
+                            tag[e] = itmp;
+                        } else if (j == 1) {
+                            elementary_id = itmp;
+                        } else if (j == 2) {
+                            partition_id = itmp;
+                        }
+                        // ignore any other tags
+                    }
+                }
+                for (int j = 0; j < numNodesPerElement; j++) {
+                    ss >> vertices[INDEX2(j, e, MAX_numNodes_gmsh)];
+                }
+            }
+            // all elements have been read, now we have to identify the
+            // dudley elements to define Elements and FaceElements
+            if (final_element_type == Dudley_NoRef) {
+                if (numDim == 1) {
+                    final_element_type = Dudley_Line2;
+                } else if (numDim == 2) {
+                    final_element_type = Dudley_Tri3;
+                } else if (numDim == 3) {
+                    final_element_type = Dudley_Tet4;
+                }
+            }
+            if (final_face_element_type == Dudley_NoRef) {
+                if (numDim == 1) {
+                    final_face_element_type = Dudley_Point1;
+                } else if (numDim == 2) {
+                    final_face_element_type = Dudley_Line2;
+                } else if (numDim == 3) {
+                    final_face_element_type = Dudley_Tri3;
+                }
+            }
+            ElementFile* elements = new ElementFile(final_element_type, mpiInfo);
+            domain->setElements(elements);
+            ElementFile* faces = new ElementFile(final_face_element_type, mpiInfo);
+            domain->setFaceElements(faces);
+            ElementFile* points = new ElementFile(Dudley_Point1, mpiInfo);
+            domain->setPoints(points);
+            elements->allocTable(numElements);
+            faces->allocTable(numFaceElements);
+            points->allocTable(0);
+            elements->minColor = 0;
+            elements->maxColor = numElements - 1;
+            faces->minColor = 0;
+            faces->maxColor = numFaceElements - 1;
+            points->minColor = 0;
+            points->maxColor = 0;
+            numElements = 0;
+            numFaceElements = 0;
+            for (index_t e = 0; e < totalNumElements; e++) {
+                if (element_type[e] == final_element_type) {
+                    elements->Id[numElements] = id[e];
+                    elements->Tag[numElements] = tag[e];
+                    elements->Color[numElements] = numElements;
+                    elements->Owner[numElements] = 0;
+                    for (int j = 0; j < elements->numNodes; ++j) {
+                        elements->Nodes[INDEX2(j, numElements,
+                                                 elements->numNodes)] =
+                            vertices[INDEX2(j, e, MAX_numNodes_gmsh)];
+                    }
+                    numElements++;
+                } else if (element_type[e] == final_face_element_type) {
+                    faces->Id[numFaceElements] = id[e];
+                    faces->Tag[numFaceElements] = tag[e];
+                    faces->Color[numFaceElements] = numFaceElements;
+                    faces->Owner[numFaceElements] = 0;
+                    for (int j = 0; j < faces->numNodes; ++j) {
+                        faces->Nodes[INDEX2(j, numFaceElements,
+                                              faces->numNodes)] =
+                            vertices[INDEX2(j, e, MAX_numNodes_gmsh)];
+                    }
+                    numFaceElements++;
+                }
+            }
+            // and clean up
+            delete[] id;
+            delete[] tag;
+            delete[] element_type;
+            delete[] vertices;
+        } else if (line.substr(1,13) == "PhysicalNames") {
+            // name tags (thanks to Antoine Lefebvre,
+            // antoine.lefebvre2 at mail.mcgill.ca)
+            std::getline(fileHandle, line);
+            if (fileHandle.eof())
+                throw IOError("readGmsh: early EOF while reading file");
+            numTags = std::stoi(line);
+            for (int i0 = 0; i0 < numTags; i0++) {
+                std::getline(fileHandle, line);
+                if (fileHandle.eof())
+                    throw IOError("readGmsh: early EOF while reading file");
+                std::stringstream ss(line);
+                int tag_key;
+                ss >> itmp >> tag_key;
+                std::string name = line.substr((int)ss.tellg()+1);
+                if (itmp != 2)
+                    throw IOError("readGmsh: expecting two entries per physical name.");
+                if (name.length() < 3)
+                    throw IOError("readGmsh: illegal tagname (\" missing?)");
+                name = name.substr(1, name.length()-2);
+                domain->setTagMap(name, tag_key);
+            }
+        }
+        // search for end of data block
+        do {
+            std::getline(fileHandle, line);
+            if (fileHandle.eof()) {
+                std::stringstream ss;
+                ss << "Unexpected end of file in " << filename;
+                throw IOError(ss.str());
+            }
+        } while (line[0] != '$');
+    }
 
-			if (version <= 1.0)
-			{
-			    scan_ret = fscanf(fileHandle_p, "%d %d %d", &tag[e], &elementary_id, &numNodesPerElement2);
-			    FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-			    partition_id = 1;
-			    if (numNodesPerElement2 != numNodesPerElement)
-			    {
-				sprintf(error_msg, "Illegal number of nodes for element %d in mesh file %s.", id[e],
-					fname);
-				Dudley_setError(IO_ERROR, error_msg);
-			    }
-			}
-			else
-			{
-			    scan_ret = fscanf(fileHandle_p, "%d", &numTags);
-			    FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-			    elementary_id = tag[e] = partition_id = 1;
-			    numNodesPerElement2 = -1;
-			    for (j = 0; j < numTags; j++)
-			    {
-				scan_ret = fscanf(fileHandle_p, "%d", &itmp);
-				FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-				if (j == 0)
-				{
-				    tag[e] = itmp;
-				}
-				else if (j == 1)
-				{
-				    elementary_id = itmp;
-				}
-				else if (j == 2)
-				{
-				    partition_id = itmp;
-				}
-				/* ignore any other tags */
-			    }
-			}
-			if (!Dudley_noError())
-			    break;
-			for (j = 0; j < numNodesPerElement; j++)
-			{
-			    scan_ret = fscanf(fileHandle_p, "%d", &vertices[INDEX2(j, e, MAX_numNodes_gmsh)]);
-			    FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-			}
-		    }
-		    /* all elements have been read, now we have to identify the elements for dudley */
+    fileHandle.close();
+    domain->resolveNodeIds();
+    domain->prepare(optimize);
+    return domain->getPtr();
+}
 
-		    if (Dudley_noError())
-		    {
-			/* first we have to identify the elements to define Elements and FaceElements */
-			if (final_element_type == Dudley_NoRef)
-			{
-			    if (numDim == 1)
-			    {
-				final_element_type = Dudley_Line2;
-			    }
-			    else if (numDim == 2)
-			    {
-				final_element_type = Dudley_Tri3;
-			    }
-			    else if (numDim == 3)
-			    {
-				final_element_type = Dudley_Tet4;
-			    }
-			}
-			if (final_face_element_type == Dudley_NoRef)
-			{
-			    if (numDim == 1)
-			    {
-				final_face_element_type = Dudley_Point1;
-			    }
-			    else if (numDim == 2)
-			    {
-				final_face_element_type = Dudley_Line2;
-			    }
-			    else if (numDim == 3)
-			    {
-				final_face_element_type = Dudley_Tri3;
-			    }
-			}
-			mesh_p->Elements = Dudley_ElementFile_alloc(final_element_type, mpi_info);
-			mesh_p->FaceElements = Dudley_ElementFile_alloc(final_face_element_type, mpi_info);
-			mesh_p->Points = Dudley_ElementFile_alloc(Dudley_Point1, mpi_info);
-			if (Dudley_noError())
-			{
-			    Dudley_ElementFile_allocTable(mesh_p->Elements, numElements);
-			    Dudley_ElementFile_allocTable(mesh_p->FaceElements, numFaceElements);
-			    Dudley_ElementFile_allocTable(mesh_p->Points, 0);
-			    if (Dudley_noError())
-			    {
-				mesh_p->Elements->minColor = 0;
-				mesh_p->Elements->maxColor = numElements - 1;
-				mesh_p->FaceElements->minColor = 0;
-				mesh_p->FaceElements->maxColor = numFaceElements - 1;
-				mesh_p->Points->minColor = 0;
-				mesh_p->Points->maxColor = 0;
-				numElements = 0;
-				numFaceElements = 0;
-				for (e = 0; e < totalNumElements; e++)
-				{
-				    if (element_type[e] == final_element_type)
-				    {
-					mesh_p->Elements->Id[numElements] = id[e];
-					mesh_p->Elements->Tag[numElements] = tag[e];
-					mesh_p->Elements->Color[numElements] = numElements;
-					mesh_p->Elements->Owner[numElements] = 0;
-					for (j = 0; j < mesh_p->Elements-> /*referenceElementSet-> */ numNodes; ++j)
-					{
-					    mesh_p->Elements->Nodes[INDEX2
-								    (j, numElements,
-								     mesh_p->
-								     Elements-> /*referenceElementSet-> */ numNodes)] =
-						vertices[INDEX2(j, e, MAX_numNodes_gmsh)];
-					}
-					numElements++;
-				    }
-				    else if (element_type[e] == final_face_element_type)
-				    {
-					mesh_p->FaceElements->Id[numFaceElements] = id[e];
-					mesh_p->FaceElements->Tag[numFaceElements] = tag[e];
-					mesh_p->FaceElements->Color[numFaceElements] = numFaceElements;
-					mesh_p->FaceElements->Owner[numFaceElements] = 0;
-					for (j = 0; j < mesh_p->FaceElements-> /*referenceElementSet-> */ numNodes; ++j)
-					{
-					    mesh_p->FaceElements->Nodes[INDEX2
-									(j, numFaceElements,
-									 mesh_p->
-									 FaceElements-> /*referenceElementSet-> */
-									 numNodes)] =
-						vertices[INDEX2(j, e, MAX_numNodes_gmsh)];
-					}
-					numFaceElements++;
-				    }
-				}
-			    }
-			}
-		    }
-		}
-		/* and clean up */
-		delete[] id;
-		delete[] tag;
-		delete[] element_type;
-		delete[] vertices;
-	    }      
-	    /* name tags (thanks to Antoine Lefebvre, antoine.lefebvre2 at mail.mcgill.ca ) */
-	    else if (!strncmp(&line[1], "PhysicalNames", 13)) {
-	        char name[LenString_MAX+1];
-	        index_t tag_key;
-		scan_ret = fscanf(fileHandle_p, "%d", &numTags);
-		FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		if (! Dudley_noError()) break;
-		for (i0 = 0; i0 < numTags; i0++) {
-		    scan_ret = fscanf(fileHandle_p, "%d %d %s\n", &itmp, &tag_key, name);
-		    FSCANF_CHECK(scan_ret, "fscanf: Dudley_Mesh_readGmsh");
-		    if (! (itmp == 2)) Dudley_setError(IO_ERROR,"Dudley_Mesh_readGmsh: expecting two entries per physical name.");
-		    if ( strlen(name) < 3 ) Dudley_setError(IO_ERROR,"Dudley_Mesh_readGmsh: illegal tagname (\" missing?)");
-		    if (! Dudley_noError()) break;
-		    name[strlen(name)-1]='\0';
-		    Dudley_Mesh_addTagMap(mesh_p,&name[1],tag_key);
-		}
-	      }
-	    /* search for end of data block */
-	    do
-	    {
-		if (!fgets(line, sizeof(line), fileHandle_p))
-		{
-		    sprintf(error_msg, "Unexpected end of file in %s", fname);
-		    Dudley_setError(IO_ERROR, error_msg);
-		}
-		if (feof(fileHandle_p))
-		{
-		    sprintf(error_msg, "Unexpected end of file in %s", fname);
-		    Dudley_setError(IO_ERROR, error_msg);
-		}
-		if (!Dudley_noError())
-		    break;
-	    }
-	    while (line[0] != '$');
-	}
+} // namespace dudley
 
-	/* close file */
-	fclose(fileHandle_p);
-	/* clean up */
-	if (!Dudley_noError())
-	{
-	    Dudley_Mesh_free(mesh_p);
-	    return NULL;
-	}
-	/*   resolve id's : */
-	if (Dudley_noError())
-	    Dudley_Mesh_resolveNodeIds(mesh_p);
-	/* rearrange elements: */
-	if (Dudley_noError())
-	    Dudley_Mesh_prepare(mesh_p, optimize);
-	/* free up memory */
-	if (!Dudley_noError())
-	{
-	    Dudley_Mesh_free(mesh_p);
-	    return NULL;
-	}
-	else
-	{
-	    return mesh_p;
-	}
-    }
-}
diff --git a/dudley/src/Mesh_relableElementNodes.cpp b/dudley/src/Mesh_relableElementNodes.cpp
deleted file mode 100644
index e3ccfca..0000000
--- a/dudley/src/Mesh_relableElementNodes.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh */
-
-/*                                                                      */
-/*   assigns new node reference numbers to elements in element file in. */
-/*   if k is the old node, the new node is newNode[k-offset].           */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_relableElementNodes(index_t * newNode, index_t offset, Dudley_Mesh * in)
-{
-    Dudley_ElementFile_relableNodes(newNode, offset, in->Elements);
-    Dudley_ElementFile_relableNodes(newNode, offset, in->FaceElements);
-    Dudley_ElementFile_relableNodes(newNode, offset, in->Points);
-}
diff --git a/dudley/src/Mesh_resolveNodeIds.cpp b/dudley/src/Mesh_resolveNodeIds.cpp
index 0bb347e..4f2f1fe 100644
--- a/dudley/src/Mesh_resolveNodeIds.cpp
+++ b/dudley/src/Mesh_resolveNodeIds.cpp
@@ -14,139 +14,89 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Dudley: Mesh */
-
-/*   at input the element nodes refers to the numbering defined the global Id assigned to the nodes in the */
-/*   NodeFile. It is also not ensured that all nodes referred to by an element are actually available */
-/*   on the process.  At the output, a local node labelling is used and all nodes are available */
-/*   In particular the numbering of the element nodes is between 0 and in->NodeFile->numNodes */
-/*   The function does not create a distribution of the degrees of freedom. */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
+#include "DudleyDomain.h"
 #include "Util.h"
 
-/************************************************************************************/
+namespace dudley {
 
-void Dudley_Mesh_resolveNodeIds(Dudley_Mesh * in)
+void DudleyDomain::resolveNodeIds()
 {
-
-    index_t min_id, max_id, min_id2, max_id2, global_min_id, global_max_id,
-	*globalToNewLocalNodeLabels = NULL, *newLocalToGlobalNodeLabels = NULL;
-    dim_t len, n, newNumNodes, numDim;
-    Dudley_NodeFile *newNodeFile = NULL;
+    // find the minimum and maximum id used by elements
+    index_t min_id = escript::DataTypes::index_t_max();
+    index_t max_id = -escript::DataTypes::index_t_max();
+    std::pair<index_t,index_t> range(m_elements->getNodeRange());
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+    range = m_faceElements->getNodeRange();
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+    range = m_points->getNodeRange();
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+#ifdef Dudley_TRACE
+    index_t global_min_id, global_max_id;
 #ifdef ESYS_MPI
     index_t id_range[2], global_id_range[2];
-#endif
-    numDim = Dudley_Mesh_getDim(in);
-    /*  find the minimum and maximum id used by elements: */
-    min_id = INDEX_T_MAX;
-    max_id = -INDEX_T_MAX;
-    Dudley_ElementFile_setNodeRange(&min_id2, &max_id2, in->Elements);
-    max_id = MAX(max_id, max_id2);
-    min_id = MIN(min_id, min_id2);
-    Dudley_ElementFile_setNodeRange(&min_id2, &max_id2, in->FaceElements);
-    max_id = MAX(max_id, max_id2);
-    min_id = MIN(min_id, min_id2);
-    Dudley_ElementFile_setNodeRange(&min_id2, &max_id2, in->Points);
-    max_id = MAX(max_id, max_id2);
-    min_id = MIN(min_id, min_id2);
-#ifdef ESYS_MPI
     id_range[0] = -min_id;
     id_range[1] = max_id;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
+    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, m_mpiInfo->comm);
     global_min_id = -global_id_range[0];
     global_max_id = global_id_range[1];
 #else
     global_min_id = min_id;
     global_max_id = max_id;
 #endif
-#ifdef Dudley_TRACE
     printf("Node id range used by elements is %d:%d\n", global_min_id, global_max_id);
-#else
-    /* avoid unused var warning if Dudley_TRACE is not defined */
-    (void)global_min_id;
-    (void)global_max_id;
 #endif
-    if (min_id > max_id)
-    {
-	max_id = -1;
-	min_id = 0;
+    if (min_id > max_id) {
+        max_id = -1;
+        min_id = 0;
     }
 
-    /* allocate mappings for new local node labelling to global node labelling (newLocalToGlobalNodeLabels)
-       and global node labelling to the new local node labelling (globalToNewLocalNodeLabels[i-min_id] is the 
-       new local id of global node i) */
-    len = (max_id >= min_id) ? max_id - min_id + 1 : 0;
-    globalToNewLocalNodeLabels = new  index_t[len];	/* local mask for used nodes */
-    newLocalToGlobalNodeLabels = new  index_t[len];
-    if (!((Dudley_checkPtr(globalToNewLocalNodeLabels) && Dudley_checkPtr(newLocalToGlobalNodeLabels))))
-    {
+    // allocate mappings for new local node labeling to global node labeling
+    // (newLocalToGlobalNodeLabels) and global node labeling to the new local
+    // node labeling (globalToNewLocalNodeLabels[i-min_id] is the new local id
+    // of global node i)
+    index_t len = (max_id >= min_id) ? max_id - min_id + 1 : 0;
 
-#pragma omp parallel
-	{
-#pragma omp for private(n) schedule(static)
-	    for (n = 0; n < len; n++)
-		newLocalToGlobalNodeLabels[n] = -1;
-#pragma omp for private(n) schedule(static)
-	    for (n = 0; n < len; n++)
-		globalToNewLocalNodeLabels[n] = -1;
-	}
+    // mark the nodes referred by elements in usedMask
+    std::vector<short> usedMask(len, -1);
+    markNodes(usedMask, min_id);
 
-	/*  mark the nodes referred by elements in globalToNewLocalNodeLabels which is currently used as a mask: */
-	Dudley_Mesh_markNodes(globalToNewLocalNodeLabels, min_id, in, FALSE);
+    // create a local labeling newLocalToGlobalNodeLabels of the local nodes
+    // by packing the mask usedMask
+    std::vector<index_t> newLocalToGlobalNodeLabels =  util::packMask(usedMask);
+    const dim_t newNumNodes = newLocalToGlobalNodeLabels.size();
 
-	/* create a local labelling newLocalToGlobalNodeLabels of the local nodes by packing the mask globalToNewLocalNodeLabels */
+    usedMask.clear();
 
-	newNumNodes = Dudley_Util_packMask(len, globalToNewLocalNodeLabels, newLocalToGlobalNodeLabels);
+    // invert the new labeling and shift the index newLocalToGlobalNodeLabels
+    // to global node IDs
+    index_t* globalToNewLocalNodeLabels = new index_t[len];
 
-	/* invert the new labelling and shift the index newLocalToGlobalNodeLabels to global node ids */
-#pragma omp parallel for private(n) schedule(static)
-	for (n = 0; n < newNumNodes; n++)
-	{
+#pragma omp parallel for
+    for (index_t n = 0; n < newNumNodes; n++) {
 #ifdef BOUNDS_CHECK
-	    if (n >= len || n < 0)
-	    {
-		printf("BOUNDS_CHECK %s %d n=%d\n", __FILE__, __LINE__, n);
-		exit(1);
-	    }
-	    if (newLocalToGlobalNodeLabels[n] >= len || newLocalToGlobalNodeLabels[n] < 0)
-	    {
-		printf("BOUNDS_CHECK %s %d n=%d\n", __FILE__, __LINE__, n);
-		exit(1);
-	    }
+        ESYS_ASSERT(newLocalToGlobalNodeLabels[n] < len, "BOUNDS_CHECK");
+        ESYS_ASSERT(newLocalToGlobalNodeLabels[n] >= 0, "BOUNDS_CHECK");
 #endif
-	    globalToNewLocalNodeLabels[newLocalToGlobalNodeLabels[n]] = n;
-	    newLocalToGlobalNodeLabels[n] += min_id;
-	}
-	/* create a new table */
-	newNodeFile = Dudley_NodeFile_alloc(numDim, in->MPIInfo);
-	if (Dudley_noError())
-	{
-	    Dudley_NodeFile_allocTable(newNodeFile, newNumNodes);
-	}
-	if (Dudley_noError())
-	{
-	    Dudley_NodeFile_gather_global(newLocalToGlobalNodeLabels, in->Nodes, newNodeFile);
-	}
-	if (Dudley_noError())
-	{
-	    Dudley_NodeFile_free(in->Nodes);
-	    in->Nodes = newNodeFile;
-	    /*  relabel nodes of the elements: */
-	    Dudley_Mesh_relableElementNodes(globalToNewLocalNodeLabels, min_id, in);
-	}
+        globalToNewLocalNodeLabels[newLocalToGlobalNodeLabels[n]] = n;
+        newLocalToGlobalNodeLabels[n] += min_id;
     }
+    // create a new node file
+    NodeFile* newNodeFile = new NodeFile(getDim(), m_mpiInfo);
+    newNodeFile->allocTable(newNumNodes);
+    if (len)
+        newNodeFile->gather_global(&newLocalToGlobalNodeLabels[0], m_nodes);
+    else
+        newNodeFile->gather_global(NULL, m_nodes);
+
+    delete m_nodes;
+    m_nodes = newNodeFile;
+    // relabel nodes of the elements
+    relabelElementNodes(globalToNewLocalNodeLabels, min_id);
     delete[] globalToNewLocalNodeLabels;
-    delete[] newLocalToGlobalNodeLabels;
-    if (!Dudley_noError())
-    {
-	Dudley_NodeFile_free(newNodeFile);
-    }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_setCoordinates.cpp b/dudley/src/Mesh_setCoordinates.cpp
deleted file mode 100644
index d2dd03e..0000000
--- a/dudley/src/Mesh_setCoordinates.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: sets new coordinates for nodes */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_setCoordinates(Dudley_Mesh * self, const escript::Data* newX)
-{
-    Dudley_NodeFile_setCoordinates(self->Nodes, newX);
-}
-
diff --git a/dudley/src/Mesh_tagmaps.cpp b/dudley/src/Mesh_tagmaps.cpp
deleted file mode 100644
index 230fa3e..0000000
--- a/dudley/src/Mesh_tagmaps.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh tagmaps: provides access to the mesh tagmap */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
-
-/************************************************************************************/
-
-void Dudley_Mesh_addTagMap(Dudley_Mesh * mesh_p, const char *name, index_t tag_key)
-{
-    Dudley_TagMap_insert(&(mesh_p->TagMap), name, tag_key);
-}
-
-index_t Dudley_Mesh_getTag(Dudley_Mesh * mesh_p, const char *name)
-{
-    return Dudley_TagMap_getTag(mesh_p->TagMap, name);
-}
-
-bool Dudley_Mesh_isValidTagName(Dudley_Mesh * mesh_p, const char *name)
-{
-    return Dudley_TagMap_isValidTagName(mesh_p->TagMap, name);
-}
diff --git a/dudley/src/Mesh_tet4.cpp b/dudley/src/Mesh_tet4.cpp
index 360fee8..260db6d 100644
--- a/dudley/src/Mesh_tet4.cpp
+++ b/dudley/src/Mesh_tet4.cpp
@@ -14,637 +14,547 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: generates rectangular meshes */
+#include <escript/index.h>
 
-/*   Generates a numElements[0] x numElements[1] x numElements[2] mesh with first order elements (Hex8) in the brick */
-/*   [0,Length[0]] x [0,Length[1]] x [0,Length[2]]. order is the desired accuracy of the */
-/*   integration scheme. */
+using escript::DataTypes::real_t;
 
-/************************************************************************************/
+#define MAX3(_arg1_,_arg2_,_arg3_) std::max(_arg1_,std::max(_arg2_,_arg3_))
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+namespace dudley {
 
-#include "TriangularMesh.h"
-
-/* Be careful reading this function. The X? and NStride? are 1,2,3 but the loop vars are 0,1,2 */
-Dudley_Mesh *Dudley_TriangularMesh_Tet4(dim_t * numElements,
-					double *Length, index_t order, index_t reduced_order, bool optimize, esysUtils::JMPI& mpi_info)
+escript::Domain_ptr DudleyDomain::create3D(dim_t NE0, dim_t NE1, dim_t NE2,
+                                           real_t l0, real_t l1, real_t l2,
+                                           bool optimize,
+                                           escript::JMPI mpiInfo)
 {
-#define N_PER_E 1
-#define DIM 3
-    dim_t N0, N1, N2, NE0, NE1, NE2, i0, i1, i2, k, Nstride0 = 0, Nstride1 = 0, Nstride2 =
-	0, local_NE0, local_NE1, local_NE2, local_N0 = 0, local_N1 = 0, local_N2 = 0;
-    dim_t totalNECount, faceNECount, NDOF0 = 0, NDOF1 = 0, NDOF2 = 0, NFaceElements = 0, NN;
-    index_t node0, myRank, e_offset2, e_offset1, e_offset0 = 0, offset1 = 0, offset2 = 0, offset0 =
-	0, global_i0, global_i1, global_i2;
-    Dudley_Mesh *out;
-    char name[50];
+    const int DIM = 3;
 #ifdef Dudley_TRACE
     double time0 = Dudley_timer();
 #endif
 
-    const int LEFTTAG = 1;	/* boundary x1=0 */
-    const int RIGHTTAG = 2;	/* boundary x1=1 */
-    const int BOTTOMTAG = 100;	/* boundary x3=1 */
-    const int TOPTAG = 200;	/* boundary x3=0 */
-    const int FRONTTAG = 10;	/* boundary x2=0 */
-    const int BACKTAG = 20;	/* boundary x2=1 */
-
-    /* get MPI information */
-    myRank = mpi_info->rank;
-
-    /* set up the global dimensions of the mesh */
-
-    NE0 = MAX(1, numElements[0]);
-    NE1 = MAX(1, numElements[1]);
-    NE2 = MAX(1, numElements[2]);
-    N0 = N_PER_E * NE0 + 1;
-    N1 = N_PER_E * NE1 + 1;
-    N2 = N_PER_E * NE2 + 1;
-
-    /*  allocate mesh: */
-    sprintf(name, "Triangular %d x %d x %d (x 5) mesh", N0, N1, N2);
-    out = Dudley_Mesh_alloc(name, DIM, mpi_info);
-    if (!Dudley_noError())
-    {
-	return NULL;
+    const int LEFTTAG = 1;      /* boundary x1=0 */
+    const int RIGHTTAG = 2;     /* boundary x1=1 */
+    const int BOTTOMTAG = 100;  /* boundary x3=1 */
+    const int TOPTAG = 200;     /* boundary x3=0 */
+    const int FRONTTAG = 10;    /* boundary x2=0 */
+    const int BACKTAG = 20;     /* boundary x2=1 */
+
+    const int myRank = mpiInfo->rank;
+
+    // set up the global dimensions of the mesh
+    NE0 = std::max(dim_t(1), NE0);
+    NE1 = std::max(dim_t(1), NE1);
+    NE2 = std::max(dim_t(1), NE2);
+    const dim_t N0 = NE0 + 1;
+    const dim_t N1 = NE1 + 1;
+    const dim_t N2 = NE2 + 1;
+
+    // allocate mesh
+    std::stringstream name;
+    name << "Rectangular " << N0 << " x " << N1 << " x " << N2 << " (x 5) mesh";
+    DudleyDomain* out = new DudleyDomain(name.str(), DIM, mpiInfo);
+    NodeFile* nodes = out->getNodes();
+    ElementFile* elements = new ElementFile(Dudley_Tet4, mpiInfo);
+    out->setElements(elements);
+    ElementFile* faces = new ElementFile(Dudley_Tri3, mpiInfo);
+    out->setFaceElements(faces);
+    ElementFile* points = new ElementFile(Dudley_Point1, mpiInfo);
+    out->setPoints(points);
+
+    dim_t Nstride0, Nstride1, Nstride2;
+    dim_t local_NE0, local_NE1, local_NE2;
+    index_t e_offset0, e_offset1, e_offset2;
+    // work out the largest dimension
+    if (N2 == MAX3(N0, N1, N2)) {
+        Nstride0 = 1;
+        Nstride1 = N0;
+        Nstride2 = N0 * N1;
+        local_NE0 = NE0;
+        e_offset0 = 0;
+        local_NE1 = NE1;
+        e_offset1 = 0;
+        mpiInfo->split(NE2, &local_NE2, &e_offset2);
+    } else if (N1 == MAX3(N0, N1, N2)) {
+        Nstride0 = N2;
+        Nstride1 = N0 * N2;
+        Nstride2 = 1;
+        local_NE0 = NE0;
+        e_offset0 = 0;
+        mpiInfo->split(NE1, &local_NE1, &e_offset1);
+        local_NE2 = NE2;
+        e_offset2 = 0;
+    } else {
+        Nstride0 = N1 * N2;
+        Nstride1 = 1;
+        Nstride2 = N1;
+        mpiInfo->split(NE0, &local_NE0, &e_offset0);
+        local_NE1 = NE1;
+        e_offset1 = 0;
+        local_NE2 = NE2;
+        e_offset2 = 0;
     }
-    if (Dudley_noError())
-    {
-
-	Dudley_Mesh_setPoints(out, Dudley_ElementFile_alloc(Dudley_Point1, mpi_info));
-	Dudley_Mesh_setFaceElements(out, Dudley_ElementFile_alloc(Dudley_Tri3, mpi_info));
-	Dudley_Mesh_setElements(out, Dudley_ElementFile_alloc(Dudley_Tet4, mpi_info));
-
-	/* work out the largest dimension */
-	if (N2 == MAX3(N0, N1, N2))
-	{
-	    Nstride0 = 1;
-	    Nstride1 = N0;
-	    Nstride2 = N0 * N1;
-	    local_NE0 = NE0;
-	    e_offset0 = 0;
-	    local_NE1 = NE1;
-	    e_offset1 = 0;
-	    mpi_info->split(NE2, &local_NE2, &e_offset2);
-	}
-	else if (N1 == MAX3(N0, N1, N2))
-	{
-	    Nstride0 = N2;
-	    Nstride1 = N0 * N2;
-	    Nstride2 = 1;
-	    local_NE0 = NE0;
-	    e_offset0 = 0;
-	    mpi_info->split(NE1, &local_NE1, &e_offset1);
-	    local_NE2 = NE2;
-	    e_offset2 = 0;
-	}
-	else
-	{
-	    Nstride0 = N1 * N2;
-	    Nstride1 = 1;
-	    Nstride2 = N1;
-	    mpi_info->split(NE0, &local_NE0, &e_offset0);
-	    local_NE1 = NE1;
-	    e_offset1 = 0;
-	    local_NE2 = NE2;
-	    e_offset2 = 0;
-	}
-	offset0 = e_offset0 * N_PER_E;
-	offset1 = e_offset1 * N_PER_E;
-	offset2 = e_offset2 * N_PER_E;
-	local_N0 = local_NE0 > 0 ? local_NE0 * N_PER_E + 1 : 0;
-	local_N1 = local_NE1 > 0 ? local_NE1 * N_PER_E + 1 : 0;
-	local_N2 = local_NE2 > 0 ? local_NE2 * N_PER_E + 1 : 0;
-
-	/* get the number of surface elements */
-
-	NFaceElements = 0;
-	if (local_NE2 > 0)
-	{
-	    NDOF2 = N2;
-	    if (offset2 == 0)
-		NFaceElements += 2 * local_NE1 * local_NE0;	/* each face is split */
-	    if (local_NE2 + e_offset2 == NE2)
-		NFaceElements += 2 * local_NE1 * local_NE0;
-	}
-	else
-	{
-	    NDOF2 = N2 - 1;
-	}
-
-	if (local_NE0 > 0)
-	{
-	    NDOF0 = N0;
-	    if (e_offset0 == 0)
-		NFaceElements += 2 * local_NE1 * local_NE2;
-	    if (local_NE0 + e_offset0 == NE0)
-		NFaceElements += 2 * local_NE1 * local_NE2;
-	}
-	else
-	{
-	    NDOF0 = N0 - 1;
-	}
-
-	if (local_NE1 > 0)
-	{
-	    NDOF1 = N1;
-	    if (e_offset1 == 0)
-		NFaceElements += 2 * local_NE0 * local_NE2;
-	    if (local_NE1 + e_offset1 == NE1)
-		NFaceElements += 2 * local_NE0 * local_NE2;
-	}
-	else
-	{
-	    NDOF1 = N1 - 1;
-	}
+    const index_t offset0 = e_offset0;
+    const index_t offset1 = e_offset1;
+    const index_t offset2 = e_offset2;
+    const dim_t local_N0 = local_NE0 > 0 ? local_NE0 + 1 : 0;
+    const dim_t local_N1 = local_NE1 > 0 ? local_NE1 + 1 : 0;
+    const dim_t local_N2 = local_NE2 > 0 ? local_NE2 + 1 : 0;
+
+    // get the number of surface elements
+    dim_t NFaceElements = 0;
+    dim_t NDOF0, NDOF1, NDOF2;
+    if (local_NE2 > 0) {
+        NDOF2 = N2;
+        if (offset2 == 0)
+            NFaceElements += 2 * local_NE1 * local_NE0; // each face is split
+        if (local_NE2 + e_offset2 == NE2)
+            NFaceElements += 2 * local_NE1 * local_NE0;
+    } else {
+        NDOF2 = N2 - 1;
     }
 
-    /*  allocate tables: */
-    if (Dudley_noError())
-    {
+    if (local_NE0 > 0) {
+        NDOF0 = N0;
+        if (e_offset0 == 0)
+            NFaceElements += 2 * local_NE1 * local_NE2;
+        if (local_NE0 + e_offset0 == NE0)
+            NFaceElements += 2 * local_NE1 * local_NE2;
+    } else {
+        NDOF0 = N0 - 1;
+    }
 
-	Dudley_NodeFile_allocTable(out->Nodes, local_N0 * local_N1 * local_N2);
-	/* we split the rectangular prism this code used to produce into 5 tetrahedrons */
-	Dudley_ElementFile_allocTable(out->Elements, local_NE0 * local_NE1 * local_NE2 * 5);
-	/* each border face will be split in half */
-	Dudley_ElementFile_allocTable(out->FaceElements, NFaceElements);
+    if (local_NE1 > 0) {
+        NDOF1 = N1;
+        if (e_offset1 == 0)
+            NFaceElements += 2 * local_NE0 * local_NE2;
+        if (local_NE1 + e_offset1 == NE1)
+            NFaceElements += 2 * local_NE0 * local_NE2;
+    } else {
+        NDOF1 = N1 - 1;
     }
 
-    if (Dudley_noError())
-    {
-	int global_adjustment;
-
-	/* create nodes */
-
-#pragma omp parallel for private(i0,i1,i2,k,global_i0,global_i1,global_i2)
-	for (i2 = 0; i2 < local_N2; i2++)
-	{
-	    for (i1 = 0; i1 < local_N1; i1++)
-	    {
-		for (i0 = 0; i0 < local_N0; i0++)
-		{
-		    k = i0 + local_N0 * i1 + local_N0 * local_N1 * i2;
-		    global_i0 = i0 + offset0;
-		    global_i1 = i1 + offset1;
-		    global_i2 = i2 + offset2;
-		    out->Nodes->Coordinates[INDEX2(0, k, DIM)] = DBLE(global_i0) / DBLE(N0 - 1) * Length[0];
-		    out->Nodes->Coordinates[INDEX2(1, k, DIM)] = DBLE(global_i1) / DBLE(N1 - 1) * Length[1];
-		    out->Nodes->Coordinates[INDEX2(2, k, DIM)] = DBLE(global_i2) / DBLE(N2 - 1) * Length[2];
-		    out->Nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1 + Nstride2 * global_i2;
-		    out->Nodes->Tag[k] = 0;
-		    out->Nodes->globalDegreesOfFreedom[k] = Nstride0 * (global_i0 % NDOF0)
-			+ Nstride1 * (global_i1 % NDOF1) + Nstride2 * (global_i2 % NDOF2);
-		}
-	    }
-	}
-	/*   set the elements: */
-
-	global_adjustment = (offset0 + offset1 + offset2) % 2;	/* If we are not the only rank we may need to shift our pattern to match neighbours */
-
-	NN = out->Elements->numNodes;
-#pragma omp parallel for private(i0,i1,i2,k,node0)
-	for (i2 = 0; i2 < local_NE2; i2++)
-	{
-	    for (i1 = 0; i1 < local_NE1; i1++)
-	    {
-		for (i0 = 0; i0 < local_NE0; i0++)
-		{
-		    index_t res;
-		    index_t v[8];
-		    int j;
-		    k = 5 * (i0 + local_NE0 * i1 + local_NE0 * local_NE1 * i2);
-		    node0 =
-			Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (i1 + e_offset1) +
-			Nstride2 * N_PER_E * (i2 + e_offset2);
-
-		    res = 5 * ((i0 + e_offset0) + NE0 * (i1 + e_offset1) + NE0 * NE1 * (i2 + e_offset2));
-		    for (j = 0; j < 5; ++j)
-		    {
-			out->Elements->Id[k + j] = res + j;
-			out->Elements->Tag[k + j] = 0;
-			out->Elements->Owner[k + j] = myRank;
-		    }
-
-/*	   in non-rotated orientation the points are numbered as follows:
-	   The bottom face (anticlockwise= 0,1,3,2), top face (anticlockwise 4,5,7,6)*/
-		    if ((global_adjustment + i0 + i1 + i2) % 2 == 0)
-		    {
-			v[0] = node0;
-			v[1] = node0 + Nstride0;
-			v[2] = node0 + Nstride1;
-			v[3] = node0 + Nstride1 + Nstride0;
-			v[4] = node0 + Nstride2;
-			v[5] = node0 + Nstride0 + Nstride2;
-			v[6] = node0 + Nstride1 + Nstride2;
-			v[7] = node0 + Nstride2 + Nstride1 + Nstride0;
-		    }
-		    else
-		    {
-			/* this form is rotated around the 0,2,4,6 face clockwise 90 degrees */
-
-			v[0] = node0 + Nstride1;	/* node 0 ends up in position 2 */
-			v[2] = node0 + Nstride1 + Nstride2;	/* node 2 ends up in position 6 */
-			v[6] = node0 + Nstride2;	/* node 6 ends up in position 4 */
-			v[4] = node0;	/* node 4 ends up in position 0 */
-
-			v[1] = node0 + Nstride0 + Nstride1;	/* node 1 -> pos 3 */
-			v[3] = node0 + Nstride2 + Nstride1 + Nstride0;	/* node 3-> pos 7 */
-			v[7] = node0 + Nstride0 + Nstride2;	/* node 7 -> pos 5 */
-			v[5] = node0 + Nstride0;	/* node 5 -> pos 1 */
-		    }
-
-		    /* elements nodes are numbered: centre, x, y, z */
-
-		    out->Elements->Nodes[INDEX2(0, k, NN)] = v[4];
-		    out->Elements->Nodes[INDEX2(1, k, NN)] = v[5];
-		    out->Elements->Nodes[INDEX2(2, k, NN)] = v[6];
-		    out->Elements->Nodes[INDEX2(3, k, NN)] = v[0];
-
-		    out->Elements->Nodes[INDEX2(0, k + 1, NN)] = v[7];
-		    out->Elements->Nodes[INDEX2(1, k + 1, NN)] = v[6];
-		    out->Elements->Nodes[INDEX2(2, k + 1, NN)] = v[5];
-		    out->Elements->Nodes[INDEX2(3, k + 1, NN)] = v[3];
-
-		    out->Elements->Nodes[INDEX2(0, k + 2, NN)] = v[2];
-		    out->Elements->Nodes[INDEX2(1, k + 2, NN)] = v[3];
-		    out->Elements->Nodes[INDEX2(2, k + 2, NN)] = v[0];
-		    out->Elements->Nodes[INDEX2(3, k + 2, NN)] = v[6];
-
-		    out->Elements->Nodes[INDEX2(0, k + 3, NN)] = v[1];
-		    out->Elements->Nodes[INDEX2(1, k + 3, NN)] = v[0];
-		    out->Elements->Nodes[INDEX2(2, k + 3, NN)] = v[3];
-		    out->Elements->Nodes[INDEX2(3, k + 3, NN)] = v[5];
-
-		    /* I can't work out where the center is for this one */
-		    out->Elements->Nodes[INDEX2(0, k + 4, NN)] = v[5];
-		    out->Elements->Nodes[INDEX2(1, k + 4, NN)] = v[0];
-		    out->Elements->Nodes[INDEX2(2, k + 4, NN)] = v[6];
-		    out->Elements->Nodes[INDEX2(3, k + 4, NN)] = v[3];
-		}
-
-	    }
-	}			/* end for */
-	/* face elements */
-	NN = out->FaceElements->numNodes;
-	totalNECount = 5 * NE0 * NE1 * NE2;
-	faceNECount = 0;
-	/*   these are the quadrilateral elements on boundary 1 (x3=0): */
-	if (local_NE2 > 0)
-	{
-	    /* **  elements on boundary 100 (x3=0): */
-	    if (e_offset2 == 0)
-	    {
-#pragma omp parallel for private(i0,i1,k,node0)
-		for (i1 = 0; i1 < local_NE1; i1++)
-		{
-		    for (i0 = 0; i0 < local_NE0; i0++)
-		    {
-			index_t res, n0, n1, n2, n3;
-			k = 2 * (i0 + local_NE0 * i1) + faceNECount;
-			node0 = Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (i1 + e_offset1);
-			res = 2 * ((i0 + e_offset0) + NE0 * (i1 + e_offset1)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = BOTTOMTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = BOTTOMTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n0 = node0;
-			n1 = node0 + Nstride0;
-			n2 = node0 + Nstride1;
-			n3 = node0 + Nstride0 + Nstride1;
-
-			if ((global_adjustment + i0 + i1) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n3;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n1;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n3;
-
-			}
-			else
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n1;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n3;
-
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE1 * local_NE0;
-	    }
-	    totalNECount += 2 * NE1 * NE0;
-	    /* **  elements on boundary 200 (x3=1) - Top */
-	    if (local_NE2 + e_offset2 == NE2)
-	    {
-#pragma omp parallel for private(i0,i1,k,node0)
-		for (i1 = 0; i1 < local_NE1; i1++)
-		{
-		    for (i0 = 0; i0 < local_NE0; i0++)
-		    {
-
-			index_t res, n4, n5, n6, n7;
-			k = 2 * (i0 + local_NE0 * i1) + faceNECount;
-			node0 =
-			    Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (i1 + e_offset1) +
-			    Nstride2 * N_PER_E * (NE2 - 1);
-
-			res = 2 * ((i0 + e_offset0) + NE0 * (i1 + e_offset1)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = TOPTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = TOPTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n4 = node0 + Nstride2;
-			n5 = node0 + Nstride0 + Nstride2;
-			n6 = node0 + Nstride1 + Nstride2;
-			n7 = node0 + Nstride1 + Nstride0 + Nstride2;
-
-			if ((global_adjustment + i0 + i1 + local_NE2 - 1) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n5;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n6;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n5;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n6;
-			}
-			else
-			{
-
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n5;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n7;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n6;
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE1 * local_NE0;
-	    }
-	    totalNECount += 2 * NE1 * NE0;
-	}
-	if (local_NE0 > 0)
-	{
-	    /* **  elements on boundary 001 (x1=0): - Left */
-
-	    if (e_offset0 == 0)
-	    {
-#pragma omp parallel for private(i1,i2,k,node0)
-		for (i2 = 0; i2 < local_NE2; i2++)
-		{
-		    for (i1 = 0; i1 < local_NE1; i1++)
-		    {
-
-			index_t res, n0, n2, n4, n6;
-			k = 2 * (i1 + local_NE1 * i2) + faceNECount;
-			node0 = Nstride1 * N_PER_E * (i1 + e_offset1) + Nstride2 * N_PER_E * (i2 + e_offset2);
-			res = 2 * ((i1 + e_offset1) + NE1 * (i2 + e_offset2)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = LEFTTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = LEFTTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n0 = node0;
-			n2 = node0 + Nstride1;
-			n4 = node0 + Nstride2;
-			n6 = node0 + Nstride1 + Nstride2;
-
-			if ((global_adjustment + 0 + i1 + i2) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n6;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n6;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n2;
-			}
-			else
-			{
-			    /* this form is rotated around the 0,2,4,6 face clockwise 90 degrees */
-
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n2;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n4;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n6;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n2;
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE1 * local_NE2;
-	    }
-	    totalNECount += 2 * NE1 * NE2;
-	    /* **  elements on boundary 002 (x1=1): - Right */
-	    if (local_NE0 + e_offset0 == NE0)
-	    {
-#pragma omp parallel for private(i1,i2,k,node0)
-		for (i2 = 0; i2 < local_NE2; i2++)
-		{
-		    for (i1 = 0; i1 < local_NE1; i1++)
-		    {
-			index_t res, n1, n3, n5, n7;
-			k = 2 * (i1 + local_NE1 * i2) + faceNECount;
-
-			node0 =
-			    Nstride0 * N_PER_E * (NE0 - 1) + Nstride1 * N_PER_E * (i1 + e_offset1) +
-			    Nstride2 * N_PER_E * (i2 + e_offset2);
-			res = 2 * ((i1 + e_offset1) + NE1 * (i2 + e_offset2)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = RIGHTTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = RIGHTTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n1 = node0 + Nstride0;
-			n3 = node0 + Nstride0 + Nstride1;
-			n5 = node0 + Nstride0 + Nstride2;
-			n7 = node0 + Nstride0 + Nstride1 + Nstride2;
-			if ((global_adjustment + local_NE0 - 1 + i1 + i2) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n3;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n5;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n3;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n5;
-			}
-			else
-			{
-			    /* this form is rotated around the 0,2,4,6 face clockwise 90 degrees */
-
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n5;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n3;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n7;
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE1 * local_NE2;
-	    }
-	    totalNECount += 2 * NE1 * NE2;
-	}
-	if (local_NE1 > 0)
-	{
-	    /* **  elements on boundary 010 (x2=0): -Front */
-	    if (e_offset1 == 0)
-	    {
-#pragma omp parallel for private(i0,i2,k,node0)
-		for (i2 = 0; i2 < local_NE2; i2++)
-		{
-		    for (i0 = 0; i0 < local_NE0; i0++)
-		    {
-			index_t res, n0, n1, n4, n5;
-			k = 2 * (i0 + local_NE0 * i2) + faceNECount;
-			node0 = Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride2 * N_PER_E * (i2 + e_offset2);
-			res = 2 * ((i2 + e_offset2) + NE2 * (e_offset0 + i0)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = FRONTTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = FRONTTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n0 = node0;
-			n1 = node0 + Nstride0;
-			n4 = node0 + Nstride2;
-			n5 = node0 + Nstride0 + Nstride2;
-
-			if ((global_adjustment + i0 + 0 + i2) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n5;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n5;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n4;
-
-			}
-			else
-			{
-			    /* this form is rotated around the 0,2,4,6 face clockwise 90 degrees */
-
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n0;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n4;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n1;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n5;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n4;
-
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE0 * local_NE2;
-	    }
-	    totalNECount += 2 * NE0 * NE2;
-	    /* **  elements on boundary 020 (x2=1): - Back */
-	    if (local_NE1 + e_offset1 == NE1)
-	    {
-#pragma omp parallel for private(i0,i2,k,node0)
-		for (i2 = 0; i2 < local_NE2; i2++)
-		{
-		    for (i0 = 0; i0 < local_NE0; i0++)
-		    {
-			index_t res, n2, n6, n7, n3;
-			k = 2 * (i0 + local_NE0 * i2) + faceNECount;
-			node0 =
-			    Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (NE1 - 1) +
-			    Nstride2 * N_PER_E * (i2 + e_offset2);
-			res = 2 * ((i2 + e_offset2) + NE2 * (i0 + e_offset0)) + totalNECount;
-			out->FaceElements->Id[k] = res;
-			out->FaceElements->Tag[k] = BACKTAG;
-			out->FaceElements->Owner[k] = myRank;
-			out->FaceElements->Id[k + 1] = res + 1;
-			out->FaceElements->Tag[k + 1] = BACKTAG;
-			out->FaceElements->Owner[k + 1] = myRank;
-
-			n2 = node0 + Nstride1;
-			n6 = node0 + Nstride1 + Nstride2;
-			n7 = node0 + Nstride0 + Nstride1 + Nstride2;
-			n3 = node0 + Nstride0 + Nstride1;
-
-			if ((global_adjustment + i0 + local_NE1 - 1 + i2) % 2 == 0)
-			{
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n6;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n3;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n6;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n3;
-
-			}
-			else
-			{
-			    /* this form is rotated around the 0,2,4,6 face clockwise 90 degrees */
-			    out->FaceElements->Nodes[INDEX2(0, k, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(1, k, NN)] = n6;
-			    out->FaceElements->Nodes[INDEX2(2, k, NN)] = n7;
-
-			    out->FaceElements->Nodes[INDEX2(0, k + 1, NN)] = n2;
-			    out->FaceElements->Nodes[INDEX2(1, k + 1, NN)] = n7;
-			    out->FaceElements->Nodes[INDEX2(2, k + 1, NN)] = n3;
-
-			}
-		    }
-		}
-		faceNECount += 2 * local_NE0 * local_NE2;
-	    }
-	    totalNECount += 2 * NE0 * NE2;
-	}
+    // allocate tables
+    nodes->allocTable(local_N0 * local_N1 * local_N2);
+    // we split the rectangular prism this code used to produce into 5
+    // tetrahedra
+    elements->allocTable(local_NE0 * local_NE1 * local_NE2 * 5);
+    // each border face will be split in half
+    faces->allocTable(NFaceElements);
+
+    // create nodes
+#pragma omp parallel for
+    for (index_t i2 = 0; i2 < local_N2; i2++) {
+        for (index_t i1 = 0; i1 < local_N1; i1++) {
+            for (index_t i0 = 0; i0 < local_N0; i0++) {
+                const index_t k = i0 + local_N0 * i1 + local_N0 * local_N1 * i2;
+                const index_t global_i0 = i0 + offset0;
+                const index_t global_i1 = i1 + offset1;
+                const index_t global_i2 = i2 + offset2;
+                nodes->Coordinates[INDEX2(0, k, DIM)] = (real_t)global_i0 / (real_t)(N0 - 1) * l0;
+                nodes->Coordinates[INDEX2(1, k, DIM)] = (real_t)global_i1 / (real_t)(N1 - 1) * l1;
+                nodes->Coordinates[INDEX2(2, k, DIM)] = (real_t)global_i2 / (real_t)(N2 - 1) * l2;
+                nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1 + Nstride2 * global_i2;
+                nodes->Tag[k] = 0;
+                nodes->globalDegreesOfFreedom[k] =
+                                    Nstride0 * (global_i0 % NDOF0)
+                                    + Nstride1 * (global_i1 % NDOF1)
+                                    + Nstride2 * (global_i2 % NDOF2);
+            }
+        }
     }
-    if (Dudley_noError())
-    {
-	/* add tag names */
-	Dudley_Mesh_addTagMap(out, "top", TOPTAG);
-	Dudley_Mesh_addTagMap(out, "bottom", BOTTOMTAG);
-	Dudley_Mesh_addTagMap(out, "left", LEFTTAG);
-	Dudley_Mesh_addTagMap(out, "right", RIGHTTAG);
-	Dudley_Mesh_addTagMap(out, "front", FRONTTAG);
-	Dudley_Mesh_addTagMap(out, "back", BACKTAG);
+
+    // set the elements
+    // If we are not the only rank we may need to shift our pattern to match
+    // neighbours
+    int global_adjustment = (offset0 + offset1 + offset2) % 2;
+
+    int NN = elements->numNodes;
+#pragma omp parallel for
+    for (index_t i2 = 0; i2 < local_NE2; i2++) {
+        for (index_t i1 = 0; i1 < local_NE1; i1++) {
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const index_t k = 5 * (i0 + local_NE0 * i1 + local_NE0 * local_NE1 * i2);
+                const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                    + Nstride1 * (i1 + e_offset1)
+                                    + Nstride2 * (i2 + e_offset2);
+
+                const index_t res = 5 * ((i0 + e_offset0)
+                                  + NE0 * (i1 + e_offset1)
+                                  + NE0 * NE1 * (i2 + e_offset2));
+                for (int j = 0; j < 5; ++j) {
+                    elements->Id[k + j] = res + j;
+                    elements->Tag[k + j] = 0;
+                    elements->Owner[k + j] = myRank;
+                }
+
+                // in non-rotated orientation the points are numbered as
+                // follows:
+                // The bottom face (anticlockwise = 0,1,3,2),
+                // top face (anticlockwise 4,5,7,6)
+                index_t v[8];
+                if ((global_adjustment + i0 + i1 + i2) % 2 == 0) {
+                    v[0] = node0;
+                    v[1] = node0 + Nstride0;
+                    v[2] = node0 + Nstride1;
+                    v[3] = node0 + Nstride1 + Nstride0;
+                    v[4] = node0 + Nstride2;
+                    v[5] = node0 + Nstride0 + Nstride2;
+                    v[6] = node0 + Nstride1 + Nstride2;
+                    v[7] = node0 + Nstride2 + Nstride1 + Nstride0;
+                } else {
+                    // this form is rotated around the 0,2,4,6 face clockwise
+                    // 90 degrees
+                    v[0] = node0 + Nstride1; // node 0 ends up in position 2
+                    v[2] = node0 + Nstride1 + Nstride2; // node 2 ends up in position 6
+                    v[6] = node0 + Nstride2; // node 6 ends up in position 4
+                    v[4] = node0; // node 4 ends up in position 0
+                    v[1] = node0 + Nstride0 + Nstride1; // node 1 -> pos 3
+                    v[3] = node0 + Nstride2 + Nstride1 + Nstride0; // node 3 -> pos 7
+                    v[7] = node0 + Nstride0 + Nstride2; // node 7 -> pos 5
+                    v[5] = node0 + Nstride0; // node 5 -> pos 1
+                }
+
+                // elements nodes are numbered: centre, x, y, z
+                elements->Nodes[INDEX2(0, k, NN)] = v[4];
+                elements->Nodes[INDEX2(1, k, NN)] = v[5];
+                elements->Nodes[INDEX2(2, k, NN)] = v[6];
+                elements->Nodes[INDEX2(3, k, NN)] = v[0];
+
+                elements->Nodes[INDEX2(0, k + 1, NN)] = v[7];
+                elements->Nodes[INDEX2(1, k + 1, NN)] = v[6];
+                elements->Nodes[INDEX2(2, k + 1, NN)] = v[5];
+                elements->Nodes[INDEX2(3, k + 1, NN)] = v[3];
+
+                elements->Nodes[INDEX2(0, k + 2, NN)] = v[2];
+                elements->Nodes[INDEX2(1, k + 2, NN)] = v[3];
+                elements->Nodes[INDEX2(2, k + 2, NN)] = v[0];
+                elements->Nodes[INDEX2(3, k + 2, NN)] = v[6];
+
+                elements->Nodes[INDEX2(0, k + 3, NN)] = v[1];
+                elements->Nodes[INDEX2(1, k + 3, NN)] = v[0];
+                elements->Nodes[INDEX2(2, k + 3, NN)] = v[3];
+                elements->Nodes[INDEX2(3, k + 3, NN)] = v[5];
+
+                // I can't work out where the center is for this one
+                elements->Nodes[INDEX2(0, k + 4, NN)] = v[5];
+                elements->Nodes[INDEX2(1, k + 4, NN)] = v[0];
+                elements->Nodes[INDEX2(2, k + 4, NN)] = v[6];
+                elements->Nodes[INDEX2(3, k + 4, NN)] = v[3];
+            }
+        }
+    } // for all elements
+
+    // face elements
+    NN = faces->numNodes;
+    dim_t totalNECount = 5 * NE0 * NE1 * NE2;
+    dim_t faceNECount = 0;
+
+    // these are the quadrilateral elements on boundary 1 (x3=0)
+    if (local_NE2 > 0) {
+        // ** elements on boundary 100 (x3=0)
+        if (e_offset2 == 0) {
+#pragma omp parallel for
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const index_t k = 2 * (i0 + local_NE0 * i1) + faceNECount;
+                    const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                        + Nstride1 * (i1 + e_offset1);
+                    const index_t res = 2 * (i0 + e_offset0)
+                                     + NE0 * (i1 + e_offset1) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = BOTTOMTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = BOTTOMTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n0 = node0;
+                    const index_t n1 = node0 + Nstride0;
+                    const index_t n2 = node0 + Nstride1;
+                    const index_t n3 = node0 + Nstride0 + Nstride1;
+
+                    if ((global_adjustment + i0 + i1) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n3;
+                        faces->Nodes[INDEX2(2, k, NN)] = n1;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n2;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n3;
+
+                    } else {
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n2;
+                        faces->Nodes[INDEX2(2, k, NN)] = n1;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n1;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n2;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n3;
+
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE1 * local_NE0;
+        }
+        totalNECount += 2 * NE1 * NE0;
+        // ** elements on boundary 200 (x3=1) - Top
+        if (local_NE2 + e_offset2 == NE2) {
+#pragma omp parallel for
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const index_t k = 2 * (i0 + local_NE0 * i1) + faceNECount;
+                    const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                        + Nstride1 * (i1 + e_offset1)
+                                        + Nstride2 * (NE2 - 1);
+
+                    const index_t res = 2 * (i0 + e_offset0)
+                                      + NE0 * (i1 + e_offset1) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = TOPTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = TOPTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n4 = node0 + Nstride2;
+                    const index_t n5 = node0 + Nstride0 + Nstride2;
+                    const index_t n6 = node0 + Nstride1 + Nstride2;
+                    const index_t n7 = node0 + Nstride1 + Nstride0 + Nstride2;
+
+                    if ((global_adjustment + i0 + i1 + local_NE2 - 1) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n4;
+                        faces->Nodes[INDEX2(1, k, NN)] = n5;
+                        faces->Nodes[INDEX2(2, k, NN)] = n6;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n5;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n6;
+                    } else {
+
+                        faces->Nodes[INDEX2(0, k, NN)] = n4;
+                        faces->Nodes[INDEX2(1, k, NN)] = n5;
+                        faces->Nodes[INDEX2(2, k, NN)] = n7;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n4;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n6;
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE1 * local_NE0;
+        }
+        totalNECount += 2 * NE1 * NE0;
     }
-    /* prepare mesh for further calculations: */
-    if (Dudley_noError())
-    {
-	Dudley_Mesh_resolveNodeIds(out);
+
+    if (local_NE0 > 0) {
+        // ** elements on boundary 001 (x1=0) - Left
+        if (e_offset0 == 0) {
+#pragma omp parallel for
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                    const index_t k = 2 * (i1 + local_NE1 * i2) + faceNECount;
+                    const index_t node0 = Nstride1 * (i1 + e_offset1)
+                                        + Nstride2 * (i2 + e_offset2);
+                    const index_t res = 2 * (i1 + e_offset1)
+                                      + NE1 * (i2 + e_offset2) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = LEFTTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = LEFTTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n0 = node0;
+                    const index_t n2 = node0 + Nstride1;
+                    const index_t n4 = node0 + Nstride2;
+                    const index_t n6 = node0 + Nstride1 + Nstride2;
+
+                    if ((global_adjustment + 0 + i1 + i2) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n4;
+                        faces->Nodes[INDEX2(2, k, NN)] = n6;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n6;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n2;
+                    } else {
+                        // this form is rotated around the 0,2,4,6 face
+                        // clockwise 90 degrees
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n4;
+                        faces->Nodes[INDEX2(2, k, NN)] = n2;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n4;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n6;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n2;
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE1 * local_NE2;
+        }
+        totalNECount += 2 * NE1 * NE2;
+        // ** elements on boundary 002 (x1=1) - Right
+        if (local_NE0 + e_offset0 == NE0) {
+#pragma omp parallel for
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                    const index_t k = 2 * (i1 + local_NE1 * i2) + faceNECount;
+                    const index_t node0 = Nstride0 * (NE0 - 1)
+                                        + Nstride1 * (i1 + e_offset1)
+                                        + Nstride2 * (i2 + e_offset2);
+                    const index_t res = 2 * (i1 + e_offset1)
+                                      + NE1 * (i2 + e_offset2) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = RIGHTTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = RIGHTTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n1 = node0 + Nstride0;
+                    const index_t n3 = node0 + Nstride0 + Nstride1;
+                    const index_t n5 = node0 + Nstride0 + Nstride2;
+                    const index_t n7 = node0 + Nstride0 + Nstride1 + Nstride2;
+
+                    if ((global_adjustment + local_NE0 - 1 + i1 + i2) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n1;
+                        faces->Nodes[INDEX2(1, k, NN)] = n3;
+                        faces->Nodes[INDEX2(2, k, NN)] = n5;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n3;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n5;
+                    } else {
+                        // this form is rotated around the 0,2,4,6 face
+                        // clockwise 90 degrees
+                        faces->Nodes[INDEX2(0, k, NN)] = n1;
+                        faces->Nodes[INDEX2(1, k, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k, NN)] = n5;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n1;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n3;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n7;
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE1 * local_NE2;
+        }
+        totalNECount += 2 * NE1 * NE2;
     }
-    if (Dudley_noError())
-    {
-	Dudley_Mesh_prepare(out, optimize);
+    if (local_NE1 > 0) {
+        // ** elements on boundary 010 (x2=0) - Front
+        if (e_offset1 == 0) {
+#pragma omp parallel for
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const index_t k = 2 * (i0 + local_NE0 * i2) + faceNECount;
+                    const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                        + Nstride2 * (i2 + e_offset2);
+                    const index_t res = 2 * (i2 + e_offset2)
+                                      + NE2 * (e_offset0 + i0) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = FRONTTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = FRONTTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n0 = node0;
+                    const index_t n1 = node0 + Nstride0;
+                    const index_t n4 = node0 + Nstride2;
+                    const index_t n5 = node0 + Nstride0 + Nstride2;
+
+                    if ((global_adjustment + i0 + 0 + i2) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n1;
+                        faces->Nodes[INDEX2(2, k, NN)] = n5;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n5;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n4;
+
+                    } else {
+                        // this form is rotated around the 0,2,4,6 face
+                        // clockwise 90 degrees
+                        faces->Nodes[INDEX2(0, k, NN)] = n0;
+                        faces->Nodes[INDEX2(1, k, NN)] = n1;
+                        faces->Nodes[INDEX2(2, k, NN)] = n4;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n1;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n5;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n4;
+
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE0 * local_NE2;
+        }
+        totalNECount += 2 * NE0 * NE2;
+        // ** elements on boundary 020 (x2=1) - Back
+        if (local_NE1 + e_offset1 == NE1) {
+#pragma omp parallel for
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const index_t k = 2 * (i0 + local_NE0 * i2) + faceNECount;
+                    const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                        + Nstride1 * (NE1 - 1)
+                                        + Nstride2 * (i2 + e_offset2);
+                    const index_t res = 2 * (i2 + e_offset2)
+                                      + NE2 * (i0 + e_offset0) + totalNECount;
+                    faces->Id[k] = res;
+                    faces->Tag[k] = BACKTAG;
+                    faces->Owner[k] = myRank;
+                    faces->Id[k + 1] = res + 1;
+                    faces->Tag[k + 1] = BACKTAG;
+                    faces->Owner[k + 1] = myRank;
+
+                    const index_t n2 = node0 + Nstride1;
+                    const index_t n6 = node0 + Nstride1 + Nstride2;
+                    const index_t n7 = node0 + Nstride0 + Nstride1 + Nstride2;
+                    const index_t n3 = node0 + Nstride0 + Nstride1;
+
+                    if ((global_adjustment + i0 + local_NE1 - 1 + i2) % 2 == 0) {
+                        faces->Nodes[INDEX2(0, k, NN)] = n2;
+                        faces->Nodes[INDEX2(1, k, NN)] = n6;
+                        faces->Nodes[INDEX2(2, k, NN)] = n3;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n6;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n3;
+
+                    } else {
+                        // this form is rotated around the 0,2,4,6 face
+                        // clockwise 90 degrees
+                        faces->Nodes[INDEX2(0, k, NN)] = n2;
+                        faces->Nodes[INDEX2(1, k, NN)] = n6;
+                        faces->Nodes[INDEX2(2, k, NN)] = n7;
+
+                        faces->Nodes[INDEX2(0, k + 1, NN)] = n2;
+                        faces->Nodes[INDEX2(1, k + 1, NN)] = n7;
+                        faces->Nodes[INDEX2(2, k + 1, NN)] = n3;
+                    }
+                }
+            }
+            faceNECount += 2 * local_NE0 * local_NE2;
+        }
+        totalNECount += 2 * NE0 * NE2;
     }
 
-    if (!Dudley_noError())
-    {
-	Dudley_Mesh_free(out);
-    }
-    /* free up memory */
-    return out;
+    // add tag names
+    out->setTagMap("top", TOPTAG);
+    out->setTagMap("bottom", BOTTOMTAG);
+    out->setTagMap("left", LEFTTAG);
+    out->setTagMap("right", RIGHTTAG);
+    out->setTagMap("front", FRONTTAG);
+    out->setTagMap("back", BACKTAG);
+
+    // prepare mesh for further calculations
+    out->resolveNodeIds();
+    out->prepare(optimize);
+    return out->getPtr();
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/Mesh_tri3.cpp b/dudley/src/Mesh_tri3.cpp
index 8285f04..a5b7056 100644
--- a/dudley/src/Mesh_tri3.cpp
+++ b/dudley/src/Mesh_tri3.cpp
@@ -14,296 +14,242 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: generates triangular meshes by splitting rectangles */
+#include <escript/index.h>
 
-/*   Generates a numElements[0] x numElements[1] x 2 mesh with first order elements (Tri3) in the rectangle */
-/*   [0,Length[0]] x [0,Length[1]]. order is the desired accuracy of the integration scheme. */
+using escript::DataTypes::real_t;
 
-/************************************************************************************/
+namespace dudley {
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "TriangularMesh.h"
-
-Dudley_Mesh *Dudley_TriangularMesh_Tri3(dim_t * numElements,
-					double *Length, index_t order, index_t reduced_order, bool optimize, esysUtils::JMPI& mpi_info)
+escript::Domain_ptr DudleyDomain::create2D(dim_t NE0, dim_t NE1,
+                                           real_t l0, real_t l1,
+                                           bool optimize,
+                                           escript::JMPI mpiInfo)
 {
-#define N_PER_E 1
-#define DIM 2
-    dim_t N0, N1, NE0, NE1, i0, i1, Nstride0 = 0, Nstride1 = 0, local_NE0, local_NE1, local_N0 = 0, local_N1 = 0;
-    index_t offset0 = 0, offset1 = 0, e_offset0 = 0, e_offset1 = 0;
-    dim_t totalNECount, faceNECount, NDOF0 = 0, NDOF1 = 0, NFaceElements;
-    index_t myRank;
-    Dudley_Mesh *out;
-    char name[50];
-    const int LEFTTAG = 1;	/* boundary x1=0 */
-    const int RIGHTTAG = 2;	/* boundary x1=1 */
-    const int BOTTOMTAG = 10;	/* boundary x2=0 */
-    const int TOPTAG = 20;	/* boundary x2=1 */
+    const int DIM = 2;
+    const int LEFTTAG = 1;    // boundary x1=0
+    const int RIGHTTAG = 2;   // boundary x1=1
+    const int BOTTOMTAG = 10; // boundary x2=0
+    const int TOPTAG = 20;    // boundary x2=1
 
 #ifdef Dudley_TRACE
     double time0 = Dudley_timer();
 #endif
 
-    /* get MPI information */
-    myRank = mpi_info->rank;
-
-    /* set up the global dimensions of the mesh */
-
-    NE0 = MAX(1, numElements[0]);
-    NE1 = MAX(1, numElements[1]);
-    N0 = N_PER_E * NE0 + 1;
-    N1 = N_PER_E * NE1 + 1;
-
-    /* This code was originally copied from Finley's Rec4 constructor.
-       NE? refers to the number of rectangular elements in each direction.
-       The number of nodes produced is the same but the number of non-face elements
-       will double.
-     */
-
-    /*  allocate mesh: */
-    sprintf(name, "Triangular %d x %d (x 2) mesh", N0, N1);
-    out = Dudley_Mesh_alloc(name, DIM, mpi_info);
-    if (!Dudley_noError())
-    {
-	return NULL;
+    const int myRank = mpiInfo->rank;
+
+    // set up the global dimensions of the mesh
+    NE0 = std::max((dim_t)1, NE0);
+    NE1 = std::max((dim_t)1, NE1);
+    const dim_t N0 = NE0 + 1;
+    const dim_t N1 = NE1 + 1;
+
+    // This code was originally copied from Finley's Rec4 constructor.
+    // NE? refers to the number of rectangular elements in each direction.
+    // The number of nodes produced is the same but the number of non-face
+    // elements will double since each "rectangle" is split into two triangles.
+
+    // allocate mesh
+    std::stringstream name;
+    name << "Triangular " << N0 << " x " << N1 << " (x 2) mesh";
+    DudleyDomain* out = new DudleyDomain(name.str(), DIM, mpiInfo);
+
+    ElementFile* elements = new ElementFile(Dudley_Tri3, mpiInfo);
+    out->setElements(elements);
+    ElementFile* faces = new ElementFile(Dudley_Line2, mpiInfo);
+    out->setFaceElements(faces);
+    ElementFile* points = new ElementFile(Dudley_Point1, mpiInfo);
+    out->setPoints(points);
+
+    const dim_t Nstride0 = 1;
+    const dim_t Nstride1 = N0;
+    dim_t local_NE0, local_NE1;
+    index_t e_offset0 = 0, e_offset1 = 0;
+    if (N1 == std::max(N0, N1)) {
+        local_NE0 = NE0;
+        e_offset0 = 0;
+        mpiInfo->split(NE1, &local_NE1, &e_offset1);
+    } else {
+        mpiInfo->split(NE0, &local_NE0, &e_offset0);
+        local_NE1 = NE1;
+        e_offset1 = 0;
     }
-    if (Dudley_noError())
-    {
-
-	Dudley_Mesh_setPoints(out, Dudley_ElementFile_alloc(Dudley_Point1, mpi_info));
-	Dudley_Mesh_setFaceElements(out, Dudley_ElementFile_alloc(Dudley_Line2, mpi_info));
-	Dudley_Mesh_setElements(out, Dudley_ElementFile_alloc(Dudley_Tri3, mpi_info));
-	Nstride0 = 1;
-	Nstride1 = N0;
-	if (N1 == MAX(N0, N1))
-	{
-	    local_NE0 = NE0;
-	    e_offset0 = 0;
-	    mpi_info->split(NE1, &local_NE1, &e_offset1);
-	}
-	else
-	{
-	    mpi_info->split(NE0, &local_NE0, &e_offset0);
-	    local_NE1 = NE1;
-	    e_offset1 = 0;
-	}
-	offset0 = e_offset0 * N_PER_E;
-	offset1 = e_offset1 * N_PER_E;
-	local_N0 = local_NE0 > 0 ? local_NE0 * N_PER_E + 1 : 0;
-	local_N1 = local_NE1 > 0 ? local_NE1 * N_PER_E + 1 : 0;
-
-	/* get the number of surface elements */
-
-	NFaceElements = 0;
-	if (local_NE0 > 0)
-	{
-	    NDOF0 = N0;
-	    if (e_offset0 == 0)
-		NFaceElements += local_NE1;
-	    if (local_NE0 + e_offset0 == NE0)
-		NFaceElements += local_NE1;
-	}
-	else
-	{
-	    NDOF0 = N0 - 1;
-	}
-	if (local_NE1 > 0)
-	{
-	    NDOF1 = N1;
-	    if (e_offset1 == 0)
-		NFaceElements += local_NE0;
-	    if (local_NE1 + e_offset1 == NE1)
-		NFaceElements += local_NE0;
-	}
-	else
-	{
-	    NDOF1 = N1 - 1;
-	}
-
-	/*  allocate tables: */
-
-	Dudley_NodeFile_allocTable(out->Nodes, local_N0 * local_N1);
-
-	/* This code was originally copied from Finley's rec4 generator 
-	   We double these numbers because each "rectangle" will be split into
-	   two triangles. So the number of nodes is the same but the 
-	   number of elements will double */
-	Dudley_ElementFile_allocTable(out->Elements, local_NE0 * local_NE1 * 2);
-	Dudley_ElementFile_allocTable(out->FaceElements, NFaceElements);
-
+    const index_t offset0 = e_offset0;
+    const index_t offset1 = e_offset1;
+    const dim_t local_N0 = local_NE0 > 0 ? local_NE0 + 1 : 0;
+    const dim_t local_N1 = local_NE1 > 0 ? local_NE1 + 1 : 0;
+
+    // get the number of surface elements
+    dim_t NFaceElements = 0;
+    dim_t NDOF0, NDOF1;
+    if (local_NE0 > 0) {
+        NDOF0 = N0;
+        if (e_offset0 == 0)
+            NFaceElements += local_NE1;
+        if (local_NE0 + e_offset0 == NE0)
+            NFaceElements += local_NE1;
+    } else {
+        NDOF0 = N0 - 1;
+    }
+    if (local_NE1 > 0) {
+        NDOF1 = N1;
+        if (e_offset1 == 0)
+            NFaceElements += local_NE0;
+        if (local_NE1 + e_offset1 == NE1)
+            NFaceElements += local_NE0;
+    } else {
+        NDOF1 = N1 - 1;
     }
-    if (Dudley_noError())
-    {
-	dim_t NN;
-	index_t global_adjustment;
-	/* create nodes */
-#pragma omp parallel for private(i0,i1)
-	for (i1 = 0; i1 < local_N1; i1++)
-	{
-	    for (i0 = 0; i0 < local_N0; i0++)
-	    {
-		dim_t k = i0 + local_N0 * i1;
-		dim_t global_i0 = i0 + offset0;
-		dim_t global_i1 = i1 + offset1;
-		out->Nodes->Coordinates[INDEX2(0, k, DIM)] = DBLE(global_i0) / DBLE(N0 - 1) * Length[0];
-		out->Nodes->Coordinates[INDEX2(1, k, DIM)] = DBLE(global_i1) / DBLE(N1 - 1) * Length[1];
-		out->Nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1;
-		out->Nodes->Tag[k] = 0;
-		out->Nodes->globalDegreesOfFreedom[k] = Nstride0 * (global_i0 % NDOF0) + Nstride1 * (global_i1 % NDOF1);
-	    }
-	}
-	/*   set the elements: */
-	NN = out->Elements->numNodes;
-	global_adjustment = (offset0 + offset1) % 2;
-#pragma omp parallel for private(i0,i1)
-	for (i1 = 0; i1 < local_NE1; i1++)
-	{
-	    for (i0 = 0; i0 < local_NE0; i0++)
-	    {
-		index_t a, b, c, d;
-		/* we will split this "rectangle" into two triangles */
-		dim_t k = 2 * (i0 + local_NE0 * i1);
-		index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (i1 + e_offset1);
-
-		out->Elements->Id[k] = 2 * ((i0 + e_offset0) + NE0 * (i1 + e_offset1));
-		out->Elements->Tag[k] = 0;
-		out->Elements->Owner[k] = myRank;
-		out->Elements->Id[k + 1] = out->Elements->Id[k] + 1;
-		out->Elements->Tag[k + 1] = 0;
-		out->Elements->Owner[k + 1] = myRank;
-
-		/* a,b,c,d gives the nodes in the rectangle in clockwise order */
-		a = node0; b = node0 + Nstride0; c = node0 + Nstride1 + Nstride0; d = node0 + Nstride1;
-		/* For a little bit of variety  */
-		if ((global_adjustment + node0) % 2)
-		{
-		    out->Elements->Nodes[INDEX2(0, k, NN)] = a;
-		    out->Elements->Nodes[INDEX2(1, k, NN)] = b;
-		    out->Elements->Nodes[INDEX2(2, k, NN)] = d;
-		    out->Elements->Nodes[INDEX2(0, k + 1, NN)] = b;
-		    out->Elements->Nodes[INDEX2(1, k + 1, NN)] = c;
-		    out->Elements->Nodes[INDEX2(2, k + 1, NN)] = d;
-		}
-		else
-		{
-		    out->Elements->Nodes[INDEX2(0, k, NN)] = a;
-		    out->Elements->Nodes[INDEX2(1, k, NN)] = b;
-		    out->Elements->Nodes[INDEX2(2, k, NN)] = c;
-		    out->Elements->Nodes[INDEX2(0, k + 1, NN)] = a;
-		    out->Elements->Nodes[INDEX2(1, k + 1, NN)] = c;
-		    out->Elements->Nodes[INDEX2(2, k + 1, NN)] = d;
-		}
-	    }
-	}
-	/* face elements */
-	NN = out->FaceElements->numNodes;
-	totalNECount = 2 * NE0 * NE1;	/* because we have split the rectangles */
-	faceNECount = 0;
-	if (local_NE0 > 0)
-	{
-	    /* **  elements on boundary 001 (x1=0): */
-
-	    if (e_offset0 == 0)
-	    {
-#pragma omp parallel for private(i1)
-		for (i1 = 0; i1 < local_NE1; i1++)
-		{
 
-		    dim_t k = i1 + faceNECount;
-		    index_t node0 = Nstride1 * N_PER_E * (i1 + e_offset1);
+    NodeFile* nodes = out->getNodes();
+    nodes->allocTable(local_N0 * local_N1);
+    elements->allocTable(local_NE0 * local_NE1 * 2);
+    faces->allocTable(NFaceElements);
+
+    // create nodes
+#pragma omp parallel for
+    for (index_t i1 = 0; i1 < local_N1; i1++) {
+        for (index_t i0 = 0; i0 < local_N0; i0++) {
+            const dim_t k = i0 + local_N0 * i1;
+            const dim_t global_i0 = i0 + offset0;
+            const dim_t global_i1 = i1 + offset1;
+            nodes->Coordinates[INDEX2(0, k, DIM)] = (real_t)global_i0 / (real_t)(N0 - 1) * l0;
+            nodes->Coordinates[INDEX2(1, k, DIM)] = (real_t)global_i1 / (real_t)(N1 - 1) * l1;
+            nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1;
+            nodes->Tag[k] = 0;
+            nodes->globalDegreesOfFreedom[k] = Nstride0 * (global_i0 % NDOF0)
+                                             + Nstride1 * (global_i1 % NDOF1);
+        }
+    }
 
-		    out->FaceElements->Id[k] = i1 + e_offset1 + totalNECount;
-		    out->FaceElements->Tag[k] = LEFTTAG;
-		    out->FaceElements->Owner[k] = myRank;
-		    out->FaceElements->Nodes[INDEX2(0, k, NN)] = node0 + Nstride1;
-		    out->FaceElements->Nodes[INDEX2(1, k, NN)] = node0;
-		}
-		faceNECount += local_NE1;
-	    }
-	    totalNECount += NE1;
-	    /* **  elements on boundary 002 (x1=1): */
-	    if (local_NE0 + e_offset0 == NE0)
-	    {
-#pragma omp parallel for private(i1)
-		for (i1 = 0; i1 < local_NE1; i1++)
-		{
-		    dim_t k = i1 + faceNECount;
-		    index_t node0 = Nstride0 * N_PER_E * (NE0 - 1) + Nstride1 * N_PER_E * (i1 + e_offset1);
+    // set the elements
+    dim_t NN = elements->numNodes;
+    const index_t global_adjustment = (offset0 + offset1) % 2;
+
+#pragma omp parallel for
+    for (index_t i1 = 0; i1 < local_NE1; i1++) {
+        for (index_t i0 = 0; i0 < local_NE0; i0++) {
+            // we will split this "rectangle" into two triangles
+            const dim_t k = 2 * (i0 + local_NE0 * i1);
+            const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                + Nstride1 * (i1 + e_offset1);
+
+            elements->Id[k] = 2 * (i0 + e_offset0) + NE0*(i1 + e_offset1);
+            elements->Tag[k] = 0;
+            elements->Owner[k] = myRank;
+            elements->Id[k + 1] = elements->Id[k] + 1;
+            elements->Tag[k + 1] = 0;
+            elements->Owner[k + 1] = myRank;
+
+            // a,b,c,d gives the nodes of the rectangle in clockwise order
+            const index_t a = node0;
+            const index_t b = node0 + Nstride0;
+            const index_t c = node0 + Nstride1 + Nstride0;
+            const index_t d = node0 + Nstride1;
+            // For a little bit of variety
+            if ((global_adjustment + node0) % 2) {
+                elements->Nodes[INDEX2(0, k, NN)] = a;
+                elements->Nodes[INDEX2(1, k, NN)] = b;
+                elements->Nodes[INDEX2(2, k, NN)] = d;
+                elements->Nodes[INDEX2(0, k + 1, NN)] = b;
+                elements->Nodes[INDEX2(1, k + 1, NN)] = c;
+                elements->Nodes[INDEX2(2, k + 1, NN)] = d;
+            } else {
+                elements->Nodes[INDEX2(0, k, NN)] = a;
+                elements->Nodes[INDEX2(1, k, NN)] = b;
+                elements->Nodes[INDEX2(2, k, NN)] = c;
+                elements->Nodes[INDEX2(0, k + 1, NN)] = a;
+                elements->Nodes[INDEX2(1, k + 1, NN)] = c;
+                elements->Nodes[INDEX2(2, k + 1, NN)] = d;
+            }
+        }
+    }
 
-		    out->FaceElements->Id[k] = (i1 + e_offset1) + totalNECount;
-		    out->FaceElements->Tag[k] = RIGHTTAG;
-		    out->FaceElements->Owner[k] = myRank;
-		    out->FaceElements->Nodes[INDEX2(0, k, NN)] = node0 + Nstride0;
-		    out->FaceElements->Nodes[INDEX2(1, k, NN)] = node0 + Nstride1 + Nstride0;
-		}
-		faceNECount += local_NE1;
-	    }
-	    totalNECount += NE1;
-	}
-	if (local_NE1 > 0)
-	{
-	    /* **  elements on boundary 010 (x2=0): */
-	    if (e_offset1 == 0)
-	    {
-#pragma omp parallel for private(i0)
-		for (i0 = 0; i0 < local_NE0; i0++)
-		{
-		    dim_t k = i0 + faceNECount;
-		    index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0);
+    // face elements
+    NN = faces->numNodes;
+    dim_t totalNECount = 2 * NE0 * NE1; // because we have split the rectangles
+    dim_t faceNECount = 0;
+    if (local_NE0 > 0) {
+        // ** elements on boundary 001 (x1=0)
+        if (e_offset0 == 0) {
+#pragma omp parallel for
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride1 * (i1 + e_offset1);
+                faces->Id[k] = i1 + e_offset1 + totalNECount;
+                faces->Tag[k] = LEFTTAG;
+                faces->Owner[k] = myRank;
+                faces->Nodes[INDEX2(0, k, NN)] = node0 + Nstride1;
+                faces->Nodes[INDEX2(1, k, NN)] = node0;
+            }
+            faceNECount += local_NE1;
+        }
+        totalNECount += NE1;
+        // ** elements on boundary 002 (x1=1)
+        if (local_NE0 + e_offset0 == NE0) {
+#pragma omp parallel for
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride0 * (NE0 - 1)
+                                    + Nstride1 * (i1 + e_offset1);
+
+                faces->Id[k] = (i1 + e_offset1) + totalNECount;
+                faces->Tag[k] = RIGHTTAG;
+                faces->Owner[k] = myRank;
+                faces->Nodes[INDEX2(0, k, NN)] = node0 + Nstride0;
+                faces->Nodes[INDEX2(1, k, NN)] = node0 + Nstride1 + Nstride0;
+            }
+            faceNECount += local_NE1;
+        }
+        totalNECount += NE1;
+    }
+    if (local_NE1 > 0) {
+        // ** elements on boundary 010 (x2=0)
+        if (e_offset1 == 0) {
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * (i0 + e_offset0);
+                faces->Id[k] = e_offset0 + i0 + totalNECount;
+                faces->Tag[k] = BOTTOMTAG;
+                faces->Owner[k] = myRank;
+                faces->Nodes[INDEX2(0, k, NN)] = node0;
+                faces->Nodes[INDEX2(1, k, NN)] = node0 + Nstride0;
+            }
+            faceNECount += local_NE0;
+        }
+        totalNECount += NE0;
+        // ** elements on boundary 020 (x2=1)
+        if (local_NE1 + e_offset1 == NE1) {
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * (i0 + e_offset0)
+                                    + Nstride1 * (NE1 - 1);
+
+                faces->Id[k] = i0 + e_offset0 + totalNECount;
+                faces->Tag[k] = TOPTAG;
+                faces->Owner[k] = myRank;
+                faces->Nodes[INDEX2(0, k, NN)] = node0 + Nstride1 + Nstride0;
+                faces->Nodes[INDEX2(1, k, NN)] = node0 + Nstride1;
+            }
+            faceNECount += local_NE0;
+        }
+        totalNECount += NE0;
+    }
 
-		    out->FaceElements->Id[k] = e_offset0 + i0 + totalNECount;
-		    out->FaceElements->Tag[k] = BOTTOMTAG;
-		    out->FaceElements->Owner[k] = myRank;
+    // add tag names
+    out->setTagMap("top", TOPTAG);
+    out->setTagMap("bottom", BOTTOMTAG);
+    out->setTagMap("left", LEFTTAG);
+    out->setTagMap("right", RIGHTTAG);
 
-		    out->FaceElements->Nodes[INDEX2(0, k, NN)] = node0;
-		    out->FaceElements->Nodes[INDEX2(1, k, NN)] = node0 + Nstride0;
-		}
-		faceNECount += local_NE0;
-	    }
-	    totalNECount += NE0;
-	    /* **  elements on boundary 020 (x2=1): */
-	    if (local_NE1 + e_offset1 == NE1)
-	    {
-#pragma omp parallel for private(i0)
-		for (i0 = 0; i0 < local_NE0; i0++)
-		{
-		    dim_t k = i0 + faceNECount;
-		    index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0) + Nstride1 * N_PER_E * (NE1 - 1);
+    // prepare mesh for further calculations
+    out->resolveNodeIds();
+    out->prepare(optimize);
+    return out->getPtr();
+}
 
-		    out->FaceElements->Id[k] = i0 + e_offset0 + totalNECount;
-		    out->FaceElements->Tag[k] = TOPTAG;
-		    out->FaceElements->Owner[k] = myRank;
+} // namespace dudley
 
-		    out->FaceElements->Nodes[INDEX2(0, k, NN)] = node0 + Nstride1 + Nstride0;
-		    out->FaceElements->Nodes[INDEX2(1, k, NN)] = node0 + Nstride1;
-/*printf("E=%d: %d=%d %d=%d\n",k,INDEX2(0,k,NN),out->FaceElements->Nodes[INDEX2(0,k,NN)], 
-INDEX2(1,k,NN),out->FaceElements->Nodes[INDEX2(1,k,NN)]); */
-		}
-		faceNECount += local_NE0;
-	    }
-	    totalNECount += NE0;
-	}
-    }
-    if (Dudley_noError())
-    {
-	/* add tag names */
-	Dudley_Mesh_addTagMap(out, "top", TOPTAG);
-	Dudley_Mesh_addTagMap(out, "bottom", BOTTOMTAG);
-	Dudley_Mesh_addTagMap(out, "left", LEFTTAG);
-	Dudley_Mesh_addTagMap(out, "right", RIGHTTAG);
-    }
-    /* prepare mesh for further calculations: */
-    if (Dudley_noError())
-    {
-	Dudley_Mesh_resolveNodeIds(out);
-    }
-    if (Dudley_noError())
-    {
-	Dudley_Mesh_prepare(out, optimize);
-    }
-    return out;
-}
diff --git a/dudley/src/Mesh_write.cpp b/dudley/src/Mesh_write.cpp
index 8487e51..4de8cc5 100644
--- a/dudley/src/Mesh_write.cpp
+++ b/dudley/src/Mesh_write.cpp
@@ -14,277 +14,179 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+#include "DudleyDomain.h"
 
-/*   Dudley: write Mesh */
+#include <escript/index.h>
 
-/************************************************************************************/
+#include <iomanip>
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+using std::cout;
+using std::endl;
+using std::ios;
+using std::setw;
+using std::string;
 
-#include "Mesh.h"
+namespace dudley {
 
-/************************************************************************************/
-
-/*  writes the mesh to the external file fname using the Dudley file format: */
-
-void Dudley_Mesh_write(Dudley_Mesh * in, char *fname)
+// private
+void DudleyDomain::writeElementInfo(std::ostream& stream, const ElementFile* e,
+                                    const string& defaultType) const
 {
-    char error_msg[LenErrorMsg_MAX];
-    FILE *f;
-    int NN, i, j, numDim;
-    Dudley_TagMap *tag_map = in->TagMap;
-
-    if (in->MPIInfo->size > 1)
-    {
-	Dudley_setError(IO_ERROR, "Mesh_write: only single processor runs are supported.");
-	return;
-
-    }
-    /* open file */
-    f = fopen(fname, "w");
-    if (f == NULL)
-    {
-	sprintf(error_msg, "Mesh_write: Opening file %s for writing failed.", fname);
-	Dudley_setError(IO_ERROR, error_msg);
-	return;
-    }
-
-    /* write header */
-
-    fprintf(f, "%s\n", in->Name);
-
-    /*  write nodes: */
-
-    if (in->Nodes != NULL)
-    {
-	numDim = Dudley_Mesh_getDim(in);
-	fprintf(f, "%1dD-Nodes %d\n", numDim, in->Nodes->numNodes);
-	for (i = 0; i < in->Nodes->numNodes; i++)
-	{
-	    fprintf(f, "%d %d %d", in->Nodes->Id[i], in->Nodes->globalDegreesOfFreedom[i], in->Nodes->Tag[i]);
-	    for (j = 0; j < numDim; j++)
-		fprintf(f, " %20.15e", in->Nodes->Coordinates[INDEX2(j, i, numDim)]);
-	    fprintf(f, "\n");
-	}
+    if (e != NULL) {
+        stream << e->ename << " " << e->numElements << endl;
+        const int NN = e->numNodes;
+        for (index_t i = 0; i < e->numElements; i++) {
+            stream << e->Id[i] << " " << e->Tag[i];
+            for (int j = 0; j < NN; j++)
+                stream << " " << m_nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
+            stream << endl;
+        }
+    } else {
+        stream << defaultType << " 0" << endl;
     }
-    else
-    {
-	fprintf(f, "0D-Nodes 0\n");
-    }
-
-    /*  write elements: */
+}
 
-    if (in->Elements != NULL)
-    {
-	fprintf(f, "%s %d\n", in->Elements->ename /*referenceElementSet->referenceElement->Type->Name */ ,
-		in->Elements->numElements);
-	NN = in->Elements->numNodes;
-	for (i = 0; i < in->Elements->numElements; i++)
-	{
-	    fprintf(f, "%d %d", in->Elements->Id[i], in->Elements->Tag[i]);
-	    for (j = 0; j < NN; j++)
-		fprintf(f, " %d", in->Nodes->Id[in->Elements->Nodes[INDEX2(j, i, NN)]]);
-	    fprintf(f, "\n");
-	}
-    }
-    else
-    {
-	fprintf(f, "Tet4 0\n");
+// private
+void DudleyDomain::printElementInfo(const ElementFile* e, const string& title,
+                                    const string& defaultType, bool full) const
+{
+    if (e != NULL) {
+        dim_t mine = 0, overlap = 0;
+        for (index_t i = 0; i < e->numElements; i++) {
+            if (e->Owner[i] == m_mpiInfo->rank)
+                mine++;
+            else
+                overlap++;
+        }
+        cout << "\t" << title << ": "
+            << e->ename << " " << e->numElements << " (TypeId=" << e->etype
+            << ") owner=" << mine << " overlap=" << overlap << endl;
+        if (full) {
+            const int NN = e->numNodes;
+            cout << "\t     Id   Tag Owner Color:  Nodes" << endl;
+            for (index_t i = 0; i < e->numElements; i++) {
+                cout << "\t" << setw(7) << e->Id[i]
+                     << setw(6) << e->Tag[i]
+                     << setw(6) << e->Owner[i]
+                     << setw(6) << e->Color[i] << ": ";
+                for (int j = 0; j < NN; j++)
+                    cout << setw(6) << m_nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
+                cout << endl;
+            }
+        }
+    } else {
+        cout << "\t" << title << ": " << defaultType << " 0" << endl;
     }
+}
 
-    /*  write face elements: */
-    if (in->FaceElements != NULL)
-    {
-	fprintf(f, "%s %d\n", in->FaceElements->ename /*referenceElementSet->referenceElement->Type->Name */ ,
-		in->FaceElements->numElements);
-	NN = in->FaceElements->numNodes;
-	for (i = 0; i < in->FaceElements->numElements; i++)
-	{
-	    fprintf(f, "%d %d", in->FaceElements->Id[i], in->FaceElements->Tag[i]);
-	    for (j = 0; j < NN; j++)
-		fprintf(f, " %d", in->Nodes->Id[in->FaceElements->Nodes[INDEX2(j, i, NN)]]);
-	    fprintf(f, "\n");
-	}
-    }
-    else
-    {
-	fprintf(f, "Tri3 0\n");
-    }
 
-    /*  write points: */
-    if (in->Points != NULL)
-    {
-	fprintf(f, "%s %d\n", in->Points->ename /*referenceElementSet->referenceElement->Type->Name */ ,
-		in->Points->numElements);
-	for (i = 0; i < in->Points->numElements; i++)
-	{
-	    fprintf(f, "%d %d %d\n", in->Points->Id[i], in->Points->Tag[i],
-		    in->Nodes->Id[in->Points->Nodes[INDEX2(0, i, 1)]]);
-	}
-    }
-    else
-    {
-	fprintf(f, "Point1 0\n");
-    }
-
-    /*  write tags: */
-    if (tag_map)
-    {
-	fprintf(f, "Tags\n");
-	while (tag_map)
-	{
-	    fprintf(f, "%s %d\n", tag_map->name, tag_map->tag_key);
-	    tag_map = tag_map->next;
-	}
-    }
-    fclose(f);
+void DudleyDomain::write(const std::string& filename) const
+{
+    if (m_mpiInfo->size > 1)
+        throw escript::NotImplementedError("DudleyDomain::write: only single rank "
+                                           "runs are supported.");
+
+    std::ofstream f(filename.c_str());
+    if (!f.is_open()) {
+        std::stringstream ss;
+        ss << "DudleyDomain::write: Opening file " << filename << " for writing failed";
+        throw escript::IOError(ss.str());
+    }
+
+    // write header
+    f << m_name << endl;
+
+    // write nodes
+    if (m_nodes != NULL) {
+        const int numDim = getDim();
+        f << numDim << "D-Nodes " << m_nodes->getNumNodes() << endl;
+        for (index_t i = 0; i < m_nodes->getNumNodes(); i++) {
+            f << m_nodes->Id[i] << " " << m_nodes->globalDegreesOfFreedom[i]
+              << " " << m_nodes->Tag[i];
+            f.setf(ios::scientific, ios::floatfield);
+            f.precision(15);
+            for (int j = 0; j < numDim; j++)
+                f << " " << m_nodes->Coordinates[INDEX2(j,i,numDim)];
+            f << endl;
+        }
+    } else {
+        f << "0D-Nodes 0" << endl;
+    }
+
+    // write elements
+    writeElementInfo(f, m_elements, "Tet4");
+
+    // write face elements
+    writeElementInfo(f, m_faceElements, "Tri3");
+
+    // write points
+    writeElementInfo(f, m_points, "Point1");
+
+    // write tags
+    if (m_tagMap.size() > 0) {
+        f <<  "Tags" << endl;
+        TagMap::const_iterator it;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
+            f << it->first << " " << it->second << endl;
+        }
+    }
+    f.close();
 #ifdef Dudley_TRACE
-    printf("mesh %s has been written to file %s\n", in->Name, fname);
+    cout << "mesh " << m_name << " has been written to file " << filename << endl;
 #endif
 }
 
-void Dudley_PrintMesh_Info(Dudley_Mesh * in, bool full)
+void DudleyDomain::Print_Mesh_Info(bool full) const
 {
-    int NN, i, j, numDim;
-    Dudley_TagMap *tag_map = in->TagMap;
-
-    fprintf(stdout, "Dudley_PrintMesh_Info running on CPU %d of %d\n", in->MPIInfo->rank, in->MPIInfo->size);
-    fprintf(stdout, "\tMesh name '%s'\n", in->Name);
-    fprintf(stdout, "\tApproximation order %d\n", in->approximationOrder);
-    fprintf(stdout, "\tReduced Approximation order %d\n", in->reducedApproximationOrder);
-    fprintf(stdout, "\tIntegration order %d\n", in->integrationOrder);
-    fprintf(stdout, "\tReduced Integration order %d\n", in->reducedIntegrationOrder);
-
-    /* write nodes: */
-    if (in->Nodes != NULL)
-    {
-	numDim = Dudley_Mesh_getDim(in);
-	fprintf(stdout, "\tNodes: %1dD-Nodes %d\n", numDim, in->Nodes->numNodes);
-	if (full)
-	{
-	    fprintf(stdout, "\t     Id   Tag  gDOF   gNI grDfI  grNI:  Coordinates\n");
-	    for (i = 0; i < in->Nodes->numNodes; i++)
-	    {
-		fprintf(stdout, "\t  %5d %5d %5d %5d %5d %5d: ", in->Nodes->Id[i], in->Nodes->Tag[i],
-			in->Nodes->globalDegreesOfFreedom[i], in->Nodes->globalNodesIndex[i],
-			in->Nodes->globalReducedDOFIndex[i], in->Nodes->globalReducedNodesIndex[i]);
-		for (j = 0; j < numDim; j++)
-		    fprintf(stdout, " %20.15e", in->Nodes->Coordinates[INDEX2(j, i, numDim)]);
-		fprintf(stdout, "\n");
-	    }
-	}
-    }
-    else
-    {
-	fprintf(stdout, "\tNodes: 0D-Nodes 0\n");
-    }
-
-    /* write elements: */
-    if (in->Elements != NULL)
-    {
-	int mine = 0, overlap = 0;
-	for (i = 0; i < in->Elements->numElements; i++)
-	{
-	    if (in->Elements->Owner[i] == in->MPIInfo->rank)
-		mine++;
-	    else
-		overlap++;
-	}
-	fprintf(stdout, "\tElements: %s %d (TypeId=%d) owner=%d overlap=%d\n",
-		in->Elements->ename /*referenceElementSet->referenceElement->Type->Name */ , in->Elements->numElements,
-		in->Elements->etype /*referenceElementSet->referenceElement->Type->TypeId */ , mine, overlap);
-	NN = in->Elements->numNodes;
-	if (full)
-	{
-	    fprintf(stdout, "\t     Id   Tag Owner Color:  Nodes\n");
-	    for (i = 0; i < in->Elements->numElements; i++)
-	    {
-		fprintf(stdout, "\t  %5d %5d %5d %5d: ", in->Elements->Id[i], in->Elements->Tag[i],
-			in->Elements->Owner[i], in->Elements->Color[i]);
-		for (j = 0; j < NN; j++)
-		    fprintf(stdout, " %5d", in->Nodes->Id[in->Elements->Nodes[INDEX2(j, i, NN)]]);
-		fprintf(stdout, "\n");
-	    }
-	}
-    }
-    else
-    {
-	fprintf(stdout, "\tElements: Tet4 0\n");
+    cout << "PrintMeshInfo running on CPU " << m_mpiInfo->rank << " of "
+              << m_mpiInfo->size << endl;
+    cout << "\tMesh name '" << m_name << "'\n";
+    cout << "\tApproximation order " << 1 << endl;
+    cout << "\tIntegration order " << 2 << endl;
+    cout << "\tReduced Integration order " << 0 << endl;
+
+    // write nodes
+    if (m_nodes != NULL) {
+        const int numDim = getDim();
+        cout << "\tNodes: " << numDim << "D-Nodes " << m_nodes->getNumNodes() << endl;
+        if (full) {
+            cout << "\t     Id   Tag  gDOF   gNI grDfI  grNI:  Coordinates\n";
+            for (index_t i = 0; i < m_nodes->getNumNodes(); i++) {
+                cout << "\t" << setw(7) << m_nodes->Id[i]
+                     << setw(6) << m_nodes->Tag[i]
+                     << setw(6) << m_nodes->globalDegreesOfFreedom[i]
+                     << setw(6) << m_nodes->globalNodesIndex[i]
+                     << setw(6) << m_nodes->globalDegreesOfFreedom[i]
+                     << setw(6) << m_nodes->globalNodesIndex[i] << ": ";
+                cout.setf(ios::scientific, ios::floatfield);
+                cout.precision(15);
+                for (int j = 0; j < numDim; j++)
+                    cout << " " << m_nodes->Coordinates[INDEX2(j,i,numDim)];
+                cout << endl;
+            }
+        }
+    } else {
+        cout << "\tNodes: 0D-Nodes 0\n";
+    }
+
+    // write elements
+    printElementInfo(m_elements, "Elements", "Tet4", full);
+
+    // write face elements
+    printElementInfo(m_faceElements, "Face elements", "Tri3", full);
+
+    // write points
+    printElementInfo(m_points, "Points", "Point1", full);
+
+    // write tags
+    if (m_tagMap.size() > 0) {
+        cout << "\tTags:\n";
+        TagMap::const_iterator it;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
+            cout << "\t" << setw(7) << it->second << " " << it->first << endl;
+        }
     }
+}
 
-    /* write face elements: */
-    if (in->FaceElements != NULL)
-    {
-	int mine = 0, overlap = 0;
-	for (i = 0; i < in->FaceElements->numElements; i++)
-	{
-	    if (in->FaceElements->Owner[i] == in->MPIInfo->rank)
-		mine++;
-	    else
-		overlap++;
-	}
-	fprintf(stdout, "\tFace elements: %s %d (TypeId=%d) owner=%d overlap=%d\n",
-		in->FaceElements->ename /*referenceElementSet->referenceElement->Type->Name */ ,
-		in->FaceElements->numElements,
-		in->FaceElements->etype /*->referenceElementSet->referenceElement->Type->TypeId*/ , mine, overlap);
-	NN = in->FaceElements->numNodes;
-	if (full)
-	{
-	    fprintf(stdout, "\t     Id   Tag Owner Color:  Nodes\n");
-	    for (i = 0; i < in->FaceElements->numElements; i++)
-	    {
-		fprintf(stdout, "\t  %5d %5d %5d %5d: ", in->FaceElements->Id[i], in->FaceElements->Tag[i],
-			in->FaceElements->Owner[i], in->FaceElements->Color[i]);
-		for (j = 0; j < NN; j++)
-		    fprintf(stdout, " %5d", in->Nodes->Id[in->FaceElements->Nodes[INDEX2(j, i, NN)]]);
-		fprintf(stdout, "\n");
-	    }
-	}
-    }
-    else
-    {
-	fprintf(stdout, "\tFace elements: Tri3 0\n");
-    }
+} // namespace dudley
 
-    /* write points: */
-    if (in->Points != NULL)
-    {
-	int mine = 0, overlap = 0;
-	for (i = 0; i < in->Points->numElements; i++)
-	{
-	    if (in->Points->Owner[i] == in->MPIInfo->rank)
-		mine++;
-	    else
-		overlap++;
-	}
-	fprintf(stdout, "\tPoints: %s %d (TypeId=%d) owner=%d overlap=%d\n",
-		in->Points->ename /*->referenceElementSet->referenceElement->Type->Name*/ , in->Points->numElements,
-		in->Points->etype /*referenceElementSet->referenceElement->Type->TypeId */ , mine, overlap);
-	if (full)
-	{
-	    fprintf(stdout, "\t     Id   Tag Owner Color:  Nodes\n");
-	    for (i = 0; i < in->Points->numElements; i++)
-	    {
-		fprintf(stdout, "\t  %5d %5d %5d %5d %5d\n", in->Points->Id[i], in->Points->Tag[i],
-			in->Points->Owner[i], in->Points->Color[i], in->Nodes->Id[in->Points->Nodes[INDEX2(0, i, 1)]]);
-	    }
-	}
-    }
-    else
-    {
-	fprintf(stdout, "\tPoints: Point1 0\n");
-    }
-
-    /* write tags: */
-    if (tag_map)
-    {
-	fprintf(stdout, "\tTags:\n");
-	while (tag_map)
-	{
-	    fprintf(stdout, "\t  %5d %s\n", tag_map->tag_key, tag_map->name);
-	    tag_map = tag_map->next;
-	}
-    }
-}
diff --git a/dudley/src/NodeFile.cpp b/dudley/src/NodeFile.cpp
index 715ddc9..c631426 100644
--- a/dudley/src/NodeFile.cpp
+++ b/dudley/src/NodeFile.cpp
@@ -14,311 +14,226 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-/*                                                             */
-/*   Dudley: Mesh : NodeFile                                   */
-/*                                                             */
-/*   allocates and frees node files                            */
-/*                                                             */
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "NodeFile.h"
 
-/************************************************************************************/
+#include <escript/index.h>
+
+namespace dudley {
 
-/*   allocates a node file to hold nodes */
-/*   use Dudley_NodeFile_allocTable to allocate the node table (Id,Coordinates). */
-
-Dudley_NodeFile *Dudley_NodeFile_alloc(dim_t numDim, esysUtils::JMPI& MPIInfo)
+static std::pair<index_t,index_t> getGlobalRange(dim_t n, const index_t* id,
+                                                 escript::JMPI mpiInfo)
 {
-    Dudley_NodeFile *out;
-
-    /*  allocate the return value */
-
-    out = new Dudley_NodeFile;
-    if (Dudley_checkPtr(out))
-	return NULL;
-    out->numNodes = 0;
-    out->numDim = numDim;
-    out->numTagsInUse = 0;
-    out->Id = NULL;
-    out->globalDegreesOfFreedom = NULL;
-    out->Tag = NULL;
-    out->Coordinates = NULL;
-    out->status = DUDLEY_INITIAL_STATUS;
-
-    out->nodesMapping = NULL;
-    out->reducedNodesMapping = NULL;
-    out->degreesOfFreedomMapping = NULL;
-    out->reducedDegreesOfFreedomMapping = NULL;
-
-    out->globalReducedDOFIndex = NULL;
-    out->globalReducedNodesIndex = NULL;
-    out->globalNodesIndex = NULL;
-    out->reducedNodesId = NULL;
-    out->degreesOfFreedomId = NULL;
-    out->reducedDegreesOfFreedomId = NULL;
-    out->tagsInUse = NULL;
-
-    out->MPIInfo = MPIInfo;
-    return out;
-}
-
-/*  frees a node file: */
+    std::pair<index_t,index_t> result(util::getMinMaxInt(1, n, id));
 
-void Dudley_NodeFile_free(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	Dudley_NodeFile_freeTable(in);
-	delete in;
+#ifdef ESYS_MPI
+    index_t global_id_range[2];
+    index_t id_range[2] = { -result.first, result.second };
+    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX,
+                  mpiInfo->comm);
+    result.first = -global_id_range[0];
+    result.second = global_id_range[1];
+#endif
+    if (result.second < result.first) {
+        result.first = -1;
+        result.second = 0;
     }
-}
+    return result;
+}
 
-index_t Dudley_NodeFile_getFirstReducedNode(Dudley_NodeFile * in)
+NodeFile::NodeFile(int nDim, escript::JMPI mpiInfo) :
+    numNodes(0),
+    MPIInfo(mpiInfo),
+    numDim(nDim),
+    Id(NULL),
+    Tag(NULL),
+    globalDegreesOfFreedom(NULL),
+    Coordinates(NULL),
+    globalNodesIndex(NULL),
+    degreesOfFreedomId(NULL),
+    status(DUDLEY_INITIAL_STATUS)
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesDistribution->getFirstComponent();
-    }
-    else
-    {
-	return 0;
-    }
 }
 
-index_t Dudley_NodeFile_getLastReducedNode(Dudley_NodeFile * in)
+NodeFile::~NodeFile()
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesDistribution->getLastComponent();
-    }
-    else
-    {
-	return 0;
-    }
-
+    freeTable();
 }
 
-dim_t Dudley_NodeFile_getGlobalNumReducedNodes(Dudley_NodeFile * in)
+void NodeFile::allocTable(dim_t NN)
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesDistribution->getGlobalNumComponents();
-    }
-    else
-    {
-	return 0;
-    }
+    if (numNodes > 0)
+        freeTable();
 
-}
+    Id = new index_t[NN];
+    Coordinates = new escript::DataTypes::real_t[NN*numDim];
+    Tag = new int[NN];
+    globalDegreesOfFreedom = new index_t[NN];
+    globalNodesIndex = new index_t[NN];
+    degreesOfFreedomId = new index_t[NN];
+    numNodes = NN;
 
-index_t *Dudley_NodeFile_borrowGlobalReducedNodesIndex(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->globalReducedNodesIndex;
-    }
-    else
-    {
-	return NULL;
+    // this initialization makes sure that data are located on the right
+    // processor
+#pragma omp parallel for
+    for (index_t n=0; n<numNodes; n++) {
+        Id[n] = -1;
+        for (int i=0; i<numDim; i++)
+            Coordinates[INDEX2(i,n,numDim)] = 0.;
+        Tag[n] = -1;
+        globalDegreesOfFreedom[n] = -1;
+        globalNodesIndex[n] = -1;
+        degreesOfFreedomId[n] = -1;
     }
 }
 
-index_t Dudley_NodeFile_getFirstNode(Dudley_NodeFile * in)
+void NodeFile::freeTable()
 {
-    if (in != NULL)
-    {
-	return in->nodesDistribution->getFirstComponent();
-    }
-    else
-    {
-	return 0;
-    }
+    delete[] Id;
+    delete[] Coordinates;
+    delete[] globalDegreesOfFreedom;
+    delete[] globalNodesIndex;
+    delete[] Tag;
+    delete[] degreesOfFreedomId;
+    nodesMapping.clear();
+    degreesOfFreedomMapping.clear();
+    nodesDistribution.reset();
+    dofDistribution.reset();
+#ifdef ESYS_HAVE_PASO
+    degreesOfFreedomConnector.reset();
+#endif
+    numNodes = 0;
 }
 
-index_t Dudley_NodeFile_getLastNode(Dudley_NodeFile * in)
+void NodeFile::print() const
 {
-    if (in != NULL)
-    {
-	return in->nodesDistribution->getLastComponent();
+    std::cout << "=== " << numDim << "D-Nodes:\nnumber of nodes=" << numNodes
+        << std::endl;
+    std::cout << "Id,Tag,globalDegreesOfFreedom,degreesOfFreedom,node,Coordinates" << std::endl;
+    for (index_t i=0; i<numNodes; i++) {
+        std::cout << Id[i] << "," << Tag[i] << "," << globalDegreesOfFreedom[i]
+            << "," << degreesOfFreedomMapping.target[i]
+            << "," << nodesMapping.target[i] << " ";
+        std::cout.precision(15);
+        std::cout.setf(std::ios::scientific, std::ios::floatfield);
+        for (int j=0; j<numDim; j++)
+            std:: cout << Coordinates[INDEX2(j,i,numDim)];
+        std::cout << std::endl;
     }
-    else
-    {
-	return 0;
-    }
-
 }
 
-dim_t Dudley_NodeFile_getGlobalNumNodes(Dudley_NodeFile * in)
+void NodeFile::copyTable(index_t offset, index_t idOffset, index_t dofOffset,
+                         const NodeFile* in)
 {
-    if (in != NULL)
-    {
-	return in->nodesDistribution->getGlobalNumComponents();
-    }
-    else
-    {
-	return 0;
-    }
+    // check number of dimensions and table size
+    if (numDim != in->numDim)
+        throw escript::ValueError("NodeFile::copyTable: dimensions of node files don't match");
 
-}
+    if (numNodes < in->numNodes + offset)
+        throw escript::ValueError("NodeFile::copyTable: node table is too small.");
 
-index_t *Dudley_NodeFile_borrowGlobalNodesIndex(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->globalNodesIndex;
-    }
-    else
-    {
-	return NULL;
+#pragma omp parallel for
+    for (index_t n = 0; n < in->numNodes; n++) {
+        Id[offset + n] = in->Id[n] + idOffset;
+        Tag[offset + n] = in->Tag[n];
+        globalDegreesOfFreedom[offset + n] = in->globalDegreesOfFreedom[n] + dofOffset;
+        for (int i = 0; i < numDim; i++)
+            Coordinates[INDEX2(i, offset + n, numDim)] =
+                                    in->Coordinates[INDEX2(i, n, in->numDim)];
     }
 }
 
-dim_t Dudley_NodeFile_getNumReducedNodes(Dudley_NodeFile * in)
+std::pair<index_t,index_t> NodeFile::getDOFRange() const
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesMapping->numTargets;
+    std::pair<index_t,index_t> result(util::getMinMaxInt(
+                                        1, numNodes, globalDegreesOfFreedom));
+    if (result.second < result.first) {
+        result.first = -1;
+        result.second = 0;
     }
-    else
-    {
-	return 0;
-    }
-
+    return result;
 }
 
-dim_t Dudley_NodeFile_getNumDegreesOfFreedom(Dudley_NodeFile * in)
+std::pair<index_t,index_t> NodeFile::getGlobalIdRange() const
 {
-    if (in != NULL)
-    {
-	return in->degreesOfFreedomDistribution->getMyNumComponents();
-    }
-    else
-    {
-	return 0;
-    }
+    return getGlobalRange(numNodes, Id, MPIInfo);
 }
 
-dim_t Dudley_NodeFile_getNumNodes(Dudley_NodeFile * in)
+std::pair<index_t,index_t> NodeFile::getGlobalDOFRange() const
 {
-    if (in != NULL)
-    {
-	return in->nodesMapping->numNodes;
-    }
-    else
-    {
-	return 0;
-    }
+    return getGlobalRange(numNodes, globalDegreesOfFreedom, MPIInfo);
 }
 
-dim_t Dudley_NodeFile_getNumReducedDegreesOfFreedom(Dudley_NodeFile * in)
+std::pair<index_t,index_t> NodeFile::getGlobalNodeIDIndexRange() const
 {
-    if (in != NULL)
-    {
-	return in->reducedDegreesOfFreedomDistribution->getMyNumComponents();
-    }
-    else
-    {
-	return 0;
-    }
+    return getGlobalRange(numNodes, globalNodesIndex, MPIInfo);
 }
 
-index_t *Dudley_NodeFile_borrowTargetReducedNodes(Dudley_NodeFile * in)
+void NodeFile::setCoordinates(const escript::Data& newX)
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesMapping->target;
-    }
-    else
-    {
-	return NULL;
+    if (newX.getDataPointSize() != numDim) {
+        std::stringstream ss;
+        ss << "NodeFile::setCoordinates: number of dimensions of new "
+            "coordinates has to be " << numDim;
+        throw escript::ValueError(ss.str());
+    } else if (newX.getNumDataPointsPerSample() != 1 ||
+            newX.getNumSamples() != numNodes) {
+        std::stringstream ss;
+        ss << "NodeFile::setCoordinates: number of given nodes must be "
+            << numNodes;
+        throw escript::ValueError(ss.str());
+    } else {
+        const size_t numDim_size = numDim * sizeof(double);
+        ++status;
+#pragma omp parallel for
+        for (index_t n = 0; n < numNodes; n++) {
+            memcpy(&Coordinates[INDEX2(0, n, numDim)],
+                    newX.getSampleDataRO(n), numDim_size);
+        }
     }
 }
 
-index_t *Dudley_NodeFile_borrowTargetDegreesOfFreedom(Dudley_NodeFile * in)
+void NodeFile::setTags(int newTag, const escript::Data& mask)
 {
-    if (in != NULL)
-    {
-	return in->degreesOfFreedomMapping->target;
-    }
-    else
-    {
-	return NULL;
-    }
-}
-
-index_t *Dudley_NodeFile_borrowTargetNodes(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->nodesMapping->target;
-    }
-    else
-    {
-	return NULL;
+    if (1 != mask.getDataPointSize()) {
+        throw escript::ValueError("NodeFile::setTags: number of components of mask must be 1.");
+    } else if (mask.getNumDataPointsPerSample() != 1 ||
+            mask.getNumSamples() != numNodes) {
+        throw escript::ValueError("NodeFile::setTags: illegal number of samples of mask Data object");
     }
-}
 
-index_t *Dudley_NodeFile_borrowTargetReducedDegreesOfFreedom(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->reducedDegreesOfFreedomMapping->target;
-    }
-    else
-    {
-	return NULL;
+#pragma omp parallel for
+    for (index_t n = 0; n < numNodes; n++) {
+        if (mask.getSampleDataRO(n)[0] > 0)
+            Tag[n] = newTag;
     }
+    updateTagList();
 }
 
-index_t *Dudley_NodeFile_borrowReducedNodesTarget(Dudley_NodeFile * in)
+void NodeFile::assignMPIRankToDOFs(int* mpiRankOfDOF,
+                                   const IndexVector& distribution)
 {
-    if (in != NULL)
-    {
-	return in->reducedNodesMapping->map;
-    }
-    else
-    {
-	return NULL;
-    }
-}
+    int p_min = MPIInfo->size, p_max = -1;
+    // first we calculate the min and max DOF on this processor to reduce
+    // costs for searching
+    const std::pair<index_t,index_t> dofRange(getDOFRange());
 
-index_t *Dudley_NodeFile_borrowDegreesOfFreedomTarget(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->degreesOfFreedomMapping->map;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        if (distribution[p] <= dofRange.first)
+            p_min = p;
+        if (distribution[p] <= dofRange.second)
+            p_max = p;
     }
-    else
-    {
-	return NULL;
+#pragma omp parallel for
+    for (index_t n = 0; n < numNodes; ++n) {
+        const index_t k = globalDegreesOfFreedom[n];
+        for (int p = p_min; p <= p_max; ++p) {
+            if (k < distribution[p + 1]) {
+                mpiRankOfDOF[n] = p;
+                break;
+            }
+        }
     }
 }
 
-index_t *Dudley_NodeFile_borrowNodesTarget(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->nodesMapping->map;
-    }
-    else
-    {
-	return NULL;
-    }
-}
+} // namespace dudley
 
-index_t *Dudley_NodeFile_borrowReducedDegreesOfFreedomTarget(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	return in->reducedDegreesOfFreedomMapping->map;
-    }
-    else
-    {
-	return NULL;
-    }
-}
diff --git a/dudley/src/NodeFile.h b/dudley/src/NodeFile.h
index f0bcf39..81f1af2 100644
--- a/dudley/src/NodeFile.h
+++ b/dudley/src/NodeFile.h
@@ -14,132 +14,234 @@
 *
 *****************************************************************************/
 
-#ifndef INC_DUDLEY_NODEFILE
-#define INC_DUDLEY_NODEFILE
-
-#define MAX_numDim 3
+#ifndef __DUDLEY_NODEFILE_H__
+#define __DUDLEY_NODEFILE_H__
 
 #include "Dudley.h"
 #include "NodeMapping.h"
-#include "escript/DataC.h"
-#include "paso/Distribution.h"
-#include "paso/Coupler.h"
-
-struct Dudley_NodeFile {
-    esysUtils::JMPI MPIInfo;	/* MPI information */
-
-    dim_t numNodes;		/* number of nodes */
-    int numDim;		/* spatial dimension */
-    index_t *Id;		/* Id[i] is the id number of node i. It need to be unique. */
-    index_t *Tag;		/* Tag[i] is the tag of node i. */
-    index_t *tagsInUse;		/* array of tags which are actually used */
-    dim_t numTagsInUse;		/* number of tags used */
-
-    index_t *globalDegreesOfFreedom;	/* globalDegreesOfFreedom[i] is the global degree of freedom assigned to node i */
-    /* this index is used to consider periodic boundary conditions by assigning */
-    /* the same degreesOfFreedom to the same node */
-    double *Coordinates;	/* Coordinates[INDEX2(k,i,numDim)] is the k-th coordinate of the */
-    /* node i. */
-    index_t *globalReducedDOFIndex;	/* assigns each local node a global unique Id in a dens labeling of reduced DOF */
-    /* value <0 indicates that the DOF is not used */
-    index_t *globalReducedNodesIndex;	/* assigns each local node a global unique Id in a dens labeling */
-    /* value <0 indicates that the DOF is not used */
-    index_t *globalNodesIndex;	/* assigns each local reduced node a global unique Id in a dens labeling */
-
-    Dudley_NodeMapping *nodesMapping;
-    Dudley_NodeMapping *reducedNodesMapping;
-    Dudley_NodeMapping *degreesOfFreedomMapping;
-    Dudley_NodeMapping *reducedDegreesOfFreedomMapping;
-
-    paso::Distribution_ptr nodesDistribution;
-    paso::Distribution_ptr reducedNodesDistribution;
-    paso::Distribution_ptr degreesOfFreedomDistribution;
-    paso::Distribution_ptr reducedDegreesOfFreedomDistribution;
 
-    paso::Connector_ptr degreesOfFreedomConnector;
-    paso::Connector_ptr reducedDegreesOfFreedomConnector;
+#include <escript/Distribution.h>
 
-    /* these a the packed versions of Id */
-    index_t *reducedNodesId;
-    index_t *degreesOfFreedomId;
-    index_t *reducedDegreesOfFreedomId;
+#ifdef ESYS_HAVE_PASO
+#include <paso/Coupler.h>
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/types.h>
+#endif
 
-    int status;			/* the status counts the updates done on the node coordinates */
-    /* the value of status is increased by when the node coordinates are updated. */
+namespace escript {
+    struct IndexList;
+}
 
-};
+namespace dudley {
+
+class NodeFile
+{
+public:
+
+    /// constructor - creates empty node file.
+    /// Use allocTable() to allocate the node table (Id,Coordinates).
+    NodeFile(int nDim, escript::JMPI MPIInfo);
+
+    /// destructor
+    ~NodeFile();
+
+    /// allocates the node table within this node file to hold numNodes nodes.
+    void allocTable(dim_t numNodes);
+
+    /// empties the node table and frees all memory
+    void freeTable();
+
+    void print() const;
+
+    inline index_t getFirstNode() const;
+    inline index_t getLastNode() const;
+    inline dim_t getGlobalNumNodes() const;
+    inline const index_t* borrowGlobalNodesIndex() const;
+
+    /// returns the number of FEM nodes (on this rank)
+    inline dim_t getNumNodes() const;
+
+    /// returns the number of degrees of freedom (on this rank)
+    inline dim_t getNumDegreesOfFreedom() const;
+
+    /// returns the number of degrees of freedom targets (own and shared)
+    inline dim_t getNumDegreesOfFreedomTargets() const;
+
+    /// returns the mapping from target to the local nodes
+    inline const index_t* borrowNodesTarget() const;
+
+    /// returns the mapping from target to the local degrees of freedom
+    inline const index_t* borrowDegreesOfFreedomTarget() const;
+
+    /// returns the mapping from local degrees of freedom to a target
+    inline const index_t* borrowTargetDegreesOfFreedom() const;
+
+    /// returns the mapping from local nodes to a target
+    inline const index_t* borrowTargetNodes() const;
+
+    inline void updateTagList();
+
+    /// creates a dense labeling of the global degrees of freedom and returns
+    /// the new number of global degrees of freedom
+    dim_t createDenseDOFLabeling();
+
+    dim_t createDenseNodeLabeling(IndexVector& nodeDistribution,
+                                  const IndexVector& dofDistribution);
+
+    void createNodeMappings(const IndexVector& dofDistribution,
+                            const IndexVector& nodeDistribution);
+
+    void assignMPIRankToDOFs(int* mpiRankOfDOF,
+                             const IndexVector& distribution);
+
+    void copyTable(index_t offset, index_t idOffset, index_t dofOffset,
+                   const NodeFile* in);
+
+    /// gathers nodes from the NodeFile `in` using the entries in
+    /// index[0:numNodes-1] which are between min_index and max_index
+    /// (exclusive)
+    void gather(const index_t* index, const NodeFile* in);
+
+    void gather_global(const index_t* index, const NodeFile* in);
+
+    void setCoordinates(const escript::Data& newX);
 
-typedef struct Dudley_NodeFile Dudley_NodeFile;
-
-
-Dudley_NodeFile *Dudley_NodeFile_alloc(dim_t, esysUtils::JMPI& MPIInfo);
-index_t Dudley_NodeFile_getFirstReducedNode(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_getLastReducedNode(Dudley_NodeFile * in);
-dim_t Dudley_NodeFile_getGlobalNumReducedNodes(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowGlobalReducedNodesIndex(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_maxGlobalNodeIDIndex(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_maxGlobalReducedNodeIDIndex(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_GlobalDegreeOfFreedomIndex(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_GlobalReducedDegreeOfFreedomIndex(Dudley_NodeFile * in);
-
-index_t Dudley_NodeFile_getFirstNode(Dudley_NodeFile * in);
-index_t Dudley_NodeFile_getLastNode(Dudley_NodeFile * in);
-dim_t Dudley_NodeFile_getGlobalNumNodes(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowGlobalNodesIndex(Dudley_NodeFile * in);
-
-/* returns the number of target */
-dim_t Dudley_NodeFile_getNumReducedNodes(Dudley_NodeFile * in);
-dim_t Dudley_NodeFile_getNumDegreesOfFreedom(Dudley_NodeFile * in);
-dim_t Dudley_NodeFile_getNumNodes(Dudley_NodeFile * in);
-dim_t Dudley_NodeFile_getNumReducedDegreesOfFreedom(Dudley_NodeFile * in);
-
-/* returns the mapping from local nodes to a target */
-index_t *Dudley_NodeFile_borrowTargetReducedNodes(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowTargetDegreesOfFreedom(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowTargetNodes(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowTargetReducedDegreesOfFreedom(Dudley_NodeFile * in);
-/* returns the mapping from target to the local nodes */
-index_t *Dudley_NodeFile_borrowReducedNodesTarget(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowDegreesOfFreedomTarget(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowNodesTarget(Dudley_NodeFile * in);
-index_t *Dudley_NodeFile_borrowReducedDegreesOfFreedomTarget(Dudley_NodeFile * in);
-
-void Dudley_NodeFile_allocTable(Dudley_NodeFile *, dim_t);
-void Dudley_NodeFile_free(Dudley_NodeFile *);
-void Dudley_NodeFile_freeTable(Dudley_NodeFile *);
-void Dudley_NodeFile_setIdGlobalRange(index_t *, index_t *, Dudley_NodeFile *);
-void Dudley_NodeFile_setIdRange(index_t *, index_t *, Dudley_NodeFile *);
-void Dudley_NodeFile_setDOFGlobalRange(index_t *, index_t *, Dudley_NodeFile *);
-void Dudley_NodeFile_setDOFRange(index_t *, index_t *, Dudley_NodeFile *);
-
-void Dudley_NodeFile_setGlobalDOFRange(index_t *, index_t *, Dudley_NodeFile *);
-void Dudley_NodeFile_setGlobalIdRange(index_t *, index_t *, Dudley_NodeFile *);
-index_t Dudley_NodeFile_maxGlobalDegreeOfFreedomIndex(Dudley_NodeFile *);
-index_t Dudley_NodeFile_maxGlobalReducedDegreeOfFreedomIndex(Dudley_NodeFile *);
-
-void Dudley_NodeFile_setReducedDOFRange(index_t *, index_t *, Dudley_NodeFile *);
-dim_t Dudley_NodeFile_createDenseDOFLabeling(Dudley_NodeFile *);
-dim_t Dudley_NodeFile_createDenseNodeLabeling(Dudley_NodeFile * in, index_t * node_distribution,
-					      const index_t * dof_distribution);
-dim_t Dudley_NodeFile_createDenseReducedNodeLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask);
-dim_t Dudley_NodeFile_createDenseReducedDOFLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask);
-void Dudley_NodeFile_assignMPIRankToDOFs(Dudley_NodeFile * in, Esys_MPI_rank * mpiRankOfDOF, index_t * distribution);
-void Dudley_NodeFile_gather(index_t *, Dudley_NodeFile *, Dudley_NodeFile *);
-void Dudley_NodeFile_gather_global(index_t *, Dudley_NodeFile *, Dudley_NodeFile *);
-void Dudley_NodeFile_gatherEntries(dim_t, index_t *, index_t, index_t, index_t *, index_t *, index_t *, index_t *,
-				   index_t *, index_t *, dim_t numDim, double *, double *);
-void Dudley_NodeFile_copyTable(dim_t, Dudley_NodeFile *, dim_t, dim_t, Dudley_NodeFile *);
-void Dudley_NodeFile_scatter(index_t *, Dudley_NodeFile *, Dudley_NodeFile *);
-void Dudley_NodeFile_scatterEntries(dim_t, index_t *, index_t, index_t, index_t *, index_t *, index_t *, index_t *,
-				    index_t *, index_t *, dim_t numDim, double *, double *);
-void Dudley_NodeFile_copyTable(dim_t, Dudley_NodeFile *, dim_t, dim_t, Dudley_NodeFile *);
-void Dudley_NodeFile_setGlobalReducedDegreeOfFreedomRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in);
-void Dudley_NodeFile_setGlobalNodeIDIndexRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in);
-void Dudley_NodeFile_setGlobalReducedNodeIDIndexRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in);
-
-/* ===================== */
-void Dudley_NodeFile_setCoordinates(Dudley_NodeFile *, const escript::Data *);
-void Dudley_NodeFile_setTags(Dudley_NodeFile *, const int, const escript::Data *);
-void Dudley_NodeFile_setTagsInUse(Dudley_NodeFile * in);
+    /// set tags to newTag where mask > 0
+    void setTags(int newTag, const escript::Data& mask);
 
+    std::pair<index_t,index_t> getDOFRange() const;
+
+#ifdef ESYS_HAVE_TRILINOS
+    void createTrilinosGraph(const escript::IndexList* indexList);
+    esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph() const {
+        return m_graph;
+    }
+#endif
+
+private:
+    std::pair<index_t,index_t> getGlobalIdRange() const;
+    std::pair<index_t,index_t> getGlobalDOFRange() const;
+    std::pair<index_t,index_t> getGlobalNodeIDIndexRange() const;
+    dim_t prepareLabeling(const std::vector<short>& mask, IndexVector& buffer,
+                          IndexVector& distribution, bool useNodes);
+    void createDOFMappingAndCoupling();
+
+    NodeMapping nodesMapping;
+    NodeMapping degreesOfFreedomMapping;
+
+    /// number of nodes
+    dim_t numNodes;
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// Trilinos graph structure, cached for efficiency
+    esys_trilinos::const_TrilinosGraph_ptr m_graph;
+#endif
+
+public:
+    /// MPI information
+    escript::JMPI MPIInfo;
+    /// number of spatial dimensions
+    int numDim;
+    /// Id[i] is the unique ID number of FEM node i
+    index_t* Id;
+    /// Tag[i] is the tag of node i
+    int* Tag;
+    /// vector of tags which are actually used
+    std::vector<int> tagsInUse;
+
+    /// globalDegreesOfFreedom[i] is the global degree of freedom assigned
+    /// to node i. This index is used to consider periodic boundary conditions
+    /// by assigning the same degree of freedom to different nodes.
+    index_t* globalDegreesOfFreedom;
+    /// Coordinates[INDEX2(k,i,numDim)] is the k-th coordinate of node i
+    double* Coordinates;
+    /// assigns each local node a global unique ID in a dense labeling
+    index_t* globalNodesIndex;
+
+    /// MPI distribution of nodes
+    escript::Distribution_ptr nodesDistribution;
+
+    /// MPI distribution of degrees of freedom
+    escript::Distribution_ptr dofDistribution;
+
+#ifdef ESYS_HAVE_PASO
+    paso::Connector_ptr degreesOfFreedomConnector;
 #endif
+    // these are the packed versions of Id
+    index_t* degreesOfFreedomId;
+
+    /// the status counts the updates done on the node coordinates.
+    /// The value is increased by 1 when the node coordinates are updated.
+    int status;
+};
+
+//
+// implementation of inline methods
+//
+
+inline index_t NodeFile::getFirstNode() const
+{
+    return nodesDistribution->getFirstComponent();
+}
+
+inline index_t NodeFile::getLastNode() const
+{
+    return nodesDistribution->getLastComponent();
+}
+
+inline dim_t NodeFile::getGlobalNumNodes() const
+{
+    return nodesDistribution->getGlobalNumComponents();
+}
+
+inline const index_t* NodeFile::borrowGlobalNodesIndex() const
+{
+    return globalNodesIndex;
+}
+
+inline dim_t NodeFile::getNumNodes() const
+{
+    return numNodes;
+}
+
+inline dim_t NodeFile::getNumDegreesOfFreedom() const
+{
+    return dofDistribution->getMyNumComponents();
+}
+
+inline dim_t NodeFile::getNumDegreesOfFreedomTargets() const
+{
+    return degreesOfFreedomMapping.numTargets;
+}
+
+inline const index_t* NodeFile::borrowNodesTarget() const
+{
+    return nodesMapping.map;
+}
+
+inline const index_t* NodeFile::borrowDegreesOfFreedomTarget() const
+{
+    return degreesOfFreedomMapping.map;
+}
+
+inline const index_t* NodeFile::borrowTargetNodes() const
+{
+    return nodesMapping.target;
+}
+
+inline const index_t* NodeFile::borrowTargetDegreesOfFreedom() const
+{
+    return degreesOfFreedomMapping.target;
+}
+
+inline void NodeFile::updateTagList()
+{
+    util::setValuesInUse(Tag, numNodes, tagsInUse, MPIInfo);
+}
+
+
+} // namespace dudley
+
+#endif // __DUDLEY_NODEFILE_H__
+
diff --git a/dudley/src/NodeFile_allocTable.cpp b/dudley/src/NodeFile_allocTable.cpp
deleted file mode 100644
index 992a543..0000000
--- a/dudley/src/NodeFile_allocTable.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-#include "Util.h"
-
-/************************************************************************************/
-
-/*  allocates the node table within an node file to hold numNodes of nodes. The LinearTo mapping, if it exists, */
-/*  is frees. use Dudley_Mesh_setLinearMesh to create a new one. */
-
-void Dudley_NodeFile_allocTable(Dudley_NodeFile * in, dim_t numNodes)
-{
-    index_t *Id2 = NULL, *Tag2 = NULL, *globalDegreesOfFreedom2 = NULL, *globalReducedDOFIndex2 = NULL,
-	*globalReducedNodesIndex2 = NULL, *globalNodesIndex2 = NULL, *reducedNodesId2 = NULL, *degreesOfFreedomId2 =
-	NULL, *reducedDegreesOfFreedomId2 = NULL;
-    double *Coordinates2 = NULL;
-    dim_t n, i;
-
-    /*  allocate memory: */
-    Id2 = new  index_t[numNodes];
-    Coordinates2 = new  double[numNodes * in->numDim];
-    Tag2 = new  index_t[numNodes];
-    globalDegreesOfFreedom2 = new  index_t[numNodes];
-    globalReducedDOFIndex2 = new  index_t[numNodes];
-    globalReducedNodesIndex2 = new  index_t[numNodes];
-    globalNodesIndex2 = new  index_t[numNodes];
-    reducedNodesId2 = new  index_t[numNodes];
-    degreesOfFreedomId2 = new  index_t[numNodes];
-    reducedDegreesOfFreedomId2 = new  index_t[numNodes];
-
-    /*  if fine, free the old table and replace by new: */
-    if (Dudley_checkPtr(Id2) || Dudley_checkPtr(Coordinates2) || Dudley_checkPtr(Tag2)
-	|| Dudley_checkPtr(globalDegreesOfFreedom2)
-	|| Dudley_checkPtr(globalReducedDOFIndex2)
-	|| Dudley_checkPtr(globalReducedNodesIndex2)
-	|| Dudley_checkPtr(globalNodesIndex2)
-	|| Dudley_checkPtr(reducedNodesId2) || Dudley_checkPtr(degreesOfFreedomId2))
-    {
-	delete[] Id2;
-	delete[] Coordinates2;
-	delete[] Tag2;
-	delete[] globalDegreesOfFreedom2;
-	delete[] globalReducedDOFIndex2;
-	delete[] globalReducedNodesIndex2;
-	delete[] globalNodesIndex2;
-	delete[] reducedNodesId2;
-	delete[] degreesOfFreedomId2;
-	delete[] reducedDegreesOfFreedomId2;
-    }
-    else
-    {
-	Dudley_NodeFile_freeTable(in);
-	in->Id = Id2;
-	in->Coordinates = Coordinates2;
-	in->globalDegreesOfFreedom = globalDegreesOfFreedom2;
-	in->Tag = Tag2;
-	in->globalReducedDOFIndex = globalReducedDOFIndex2;
-	in->globalReducedNodesIndex = globalReducedNodesIndex2;
-	in->globalNodesIndex = globalNodesIndex2;
-	in->reducedNodesId = reducedNodesId2;
-	in->degreesOfFreedomId = degreesOfFreedomId2;
-	in->reducedDegreesOfFreedomId = reducedDegreesOfFreedomId2;
-	in->numNodes = numNodes;
-	/* this initialization makes sure that data are located on the right processor */
-#pragma omp parallel for private(n,i) schedule(static)
-	for (n = 0; n < numNodes; n++)
-	{
-	    in->Id[n] = -1;
-	    for (i = 0; i < in->numDim; i++)
-		in->Coordinates[INDEX2(i, n, in->numDim)] = 0.;
-	    in->Tag[n] = -1;
-	    in->globalDegreesOfFreedom[n] = -1;
-	    in->globalReducedDOFIndex[n] = -1;
-	    in->globalReducedNodesIndex[n] = -1;
-	    in->globalNodesIndex[n] = -1;
-	    in->reducedNodesId[n] = -1;
-	    in->degreesOfFreedomId[n] = -1;
-	    in->reducedDegreesOfFreedomId[n] = -1;
-	}
-    }
-    return;
-}
-
-/*  frees the node table within an node file: */
-
-void Dudley_NodeFile_freeTable(Dudley_NodeFile * in)
-{
-    if (in != NULL)
-    {
-	delete[] in->Id;
-	delete[] in->Coordinates;
-	delete[] in->globalDegreesOfFreedom;
-	delete[] in->globalReducedDOFIndex;
-	delete[] in->globalReducedNodesIndex;
-	delete[] in->globalNodesIndex;
-	delete[] in->Tag;
-	delete[] in->reducedNodesId;
-	delete[] in->degreesOfFreedomId;
-	delete[] in->reducedDegreesOfFreedomId;
-	delete[] in->tagsInUse;
-	in->numTagsInUse = 0;
-	Dudley_NodeMapping_free(in->nodesMapping);
-	in->nodesMapping = NULL;
-	Dudley_NodeMapping_free(in->reducedNodesMapping);
-	in->reducedNodesMapping = NULL;
-	Dudley_NodeMapping_free(in->degreesOfFreedomMapping);
-	in->degreesOfFreedomMapping = NULL;
-	Dudley_NodeMapping_free(in->reducedDegreesOfFreedomMapping);
-	in->reducedDegreesOfFreedomMapping = NULL;
-    in->nodesDistribution.reset();
-    in->reducedNodesDistribution.reset();
-    in->degreesOfFreedomDistribution.reset();
-    in->reducedDegreesOfFreedomDistribution.reset();
-    in->degreesOfFreedomConnector.reset();
-    in->reducedDegreesOfFreedomConnector.reset();
-	in->numTagsInUse = 0;
-	in->numNodes = 0;
-    }
-}
-
-void Dudley_NodeFile_setTagsInUse(Dudley_NodeFile * in)
-{
-    index_t *tagsInUse = NULL;
-    dim_t numTagsInUse;
-    if (in != NULL)
-    {
-	Dudley_Util_setValuesInUse(in->Tag, in->numNodes, &numTagsInUse, &tagsInUse, in->MPIInfo);
-	if (Dudley_noError())
-	{
-	    delete[] in->tagsInUse;
-	    in->tagsInUse = tagsInUse;
-	    in->numTagsInUse = numTagsInUse;
-	}
-    }
-}
diff --git a/dudley/src/NodeFile_copyTable.cpp b/dudley/src/NodeFile_copyTable.cpp
deleted file mode 100644
index 05d4c09..0000000
--- a/dudley/src/NodeFile_copyTable.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/* copies node file in into node file out starting from offset          */
-/* the nodes offset to in->numNodes+offset-1 in out will be overwritten */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-
-/************************************************************************************/
-
-void Dudley_NodeFile_copyTable(int offset, Dudley_NodeFile * out, int idOffset, int dofOffset, Dudley_NodeFile * in)
-{
-    int i, n;
-    /* check dimension and file size */
-    if (out->numDim != in->numDim)
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_NodeFile_copyTable: dimensions of node files don't match");
-    }
-    if (out->numNodes < in->numNodes + offset)
-    {
-	Dudley_setError(MEMORY_ERROR, "Dudley_NodeFile_copyTable: node table is too small.");
-    }
-    if (Dudley_noError())
-    {
-#pragma omp parallel for private(i,n) schedule(static)
-	for (n = 0; n < in->numNodes; n++)
-	{
-	    out->Id[offset + n] = in->Id[n] + idOffset;
-	    out->Tag[offset + n] = in->Tag[n];
-	    out->globalDegreesOfFreedom[offset + n] = in->globalDegreesOfFreedom[n] + dofOffset;
-	    for (i = 0; i < out->numDim; i++)
-		out->Coordinates[INDEX2(i, offset + n, out->numDim)] = in->Coordinates[INDEX2(i, n, in->numDim)];
-	}
-    }
-}
diff --git a/dudley/src/NodeFile_createDenseLabelings.cpp b/dudley/src/NodeFile_createDenseLabelings.cpp
index 23942b7..a60592b 100644
--- a/dudley/src/NodeFile_createDenseLabelings.cpp
+++ b/dudley/src/NodeFile_createDenseLabelings.cpp
@@ -14,602 +14,247 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile                                   */
-
-/*   creates a dense labeling of the global degrees of freedom  */
-/*   and returns the new number of  global degrees of freedom  */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "NodeFile.h"
 
-/************************************************************************************/
+namespace dudley {
 
-dim_t Dudley_NodeFile_createDenseDOFLabeling(Dudley_NodeFile * in)
+dim_t NodeFile::createDenseDOFLabeling()
 {
-    index_t min_dof, max_dof, unset_dof = -1, set_dof = 1, dof_0, dof_1, *DOF_buffer = NULL, k;
-    Esys_MPI_rank buffer_rank, *distribution = NULL;
-    dim_t p, buffer_len, n, myDOFs, *offsets = NULL, *loc_offsets = NULL, new_numGlobalDOFs = 0, myNewDOFs;
-    bool *set_new_DOF = NULL;
-#ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
-#endif
+    const index_t UNSET_ID = -1, SET_ID = 1;
 
-    /* get the global range of node ids */
-    Dudley_NodeFile_setGlobalDOFRange(&min_dof, &max_dof, in);
+    // get the global range of DOF IDs
+    const std::pair<index_t,index_t> idRange(getGlobalDOFRange());
 
-    distribution = new  index_t[in->MPIInfo->size + 1];
-    offsets = new  dim_t[in->MPIInfo->size];
-    loc_offsets = new  dim_t[in->MPIInfo->size];
-    set_new_DOF = new  bool[in->numNodes];
+    // distribute the range of DOF IDs
+    std::vector<index_t> distribution(MPIInfo->size + 1);
+    dim_t bufferLen = MPIInfo->setDistribution(idRange.first, idRange.second,
+                                              &distribution[0]);
 
-    if (!
-	(Dudley_checkPtr(distribution) || Dudley_checkPtr(offsets) || Dudley_checkPtr(loc_offsets)
-	 || Dudley_checkPtr(set_new_DOF)))
-    {
-	/* distribute the range of node ids */
-	buffer_len = in->MPIInfo->setDistribution(min_dof, max_dof, distribution);
-	myDOFs = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];
-	/* allocate buffers */
-	DOF_buffer = new  index_t[buffer_len];
-	if (!Dudley_checkPtr(DOF_buffer))
-	{
-	    /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < buffer_len; n++)
-		DOF_buffer[n] = unset_dof;
+    index_t* DOF_buffer = new index_t[bufferLen];
+    // fill buffer by the UNSET_ID marker to check if nodes are defined
+#pragma omp parallel for
+    for (index_t n = 0; n < bufferLen; n++)
+        DOF_buffer[n] = UNSET_ID;
 
-	    /* fill the buffer by sending portions around in a circle */
-#ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
-#endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		if (p > 0)
-		{		/* the initial send can be skipped */
-#ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
-#endif
-		    in->MPIInfo->msg_tag_counter++;
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-		dof_0 = distribution[buffer_rank];
-		dof_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    k = in->globalDegreesOfFreedom[n];
-		    if ((dof_0 <= k) && (k < dof_1))
-		    {
-			DOF_buffer[k - dof_0] = set_dof;
-		    }
-		}
-	    }
-	    /* count the entries in the DOF_buffer */
-	    /* TODO: OMP parallel */
-	    myNewDOFs = 0;
-	    for (n = 0; n < myDOFs; ++n)
-	    {
-		if (DOF_buffer[n] == set_dof)
-		{
-		    DOF_buffer[n] = myNewDOFs;
-		    myNewDOFs++;
-		}
-	    }
-	    memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));
-	    loc_offsets[in->MPIInfo->rank] = myNewDOFs;
-#ifdef ESYS_MPI
-	    MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);
-	    new_numGlobalDOFs = 0;
-	    for (n = 0; n < in->MPIInfo->size; ++n)
-	    {
-		loc_offsets[n] = new_numGlobalDOFs;
-		new_numGlobalDOFs += offsets[n];
-	    }
-#else
-	    new_numGlobalDOFs = loc_offsets[0];
-	    loc_offsets[0] = 0;
-#endif
-#pragma omp parallel
-	    {
-#pragma omp for private(n) schedule(static)
-		for (n = 0; n < myDOFs; ++n)
-		    DOF_buffer[n] += loc_offsets[in->MPIInfo->rank];
-		/* now entries are collected from the buffer again by sending the entries around in a circle */
-#pragma omp for private(n) schedule(static)
-		for (n = 0; n < in->numNodes; ++n)
-		    set_new_DOF[n] = TRUE;
-	    }
+    // fill the buffer by sending portions around in a circle
 #ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
+    MPI_Status status;
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		dof_0 = distribution[buffer_rank];
-		dof_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    k = in->globalDegreesOfFreedom[n];
-		    if (set_new_DOF[n] && (dof_0 <= k) && (k < dof_1))
-		    {
-			in->globalDegreesOfFreedom[n] = DOF_buffer[k - dof_0];
-			set_new_DOF[n] = FALSE;
-		    }
-		}
-		if (p < in->MPIInfo->size - 1)
-		{		/* the last send can be skipped */
+    int buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
 #ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
+        if (p > 0) { // the initial send can be skipped
+            MPI_Sendrecv_replace(DOF_buffer, bufferLen, MPI_DIM_T, dest,
+                                 MPIInfo->counter(), source, MPIInfo->counter(),
+                                 MPIInfo->comm, &status);
+            MPIInfo->incCounter();
+        }
 #endif
-		    ESYS_MPI_INC_COUNTER(*(in->MPIInfo),1);
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-	    }
-	}
-	delete[] DOF_buffer;
+        buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
+        const index_t dof0 = distribution[buffer_rank];
+        const index_t dof1 = distribution[buffer_rank + 1];
+#pragma omp parallel for
+        for (index_t n = 0; n < numNodes; n++) {
+            const index_t k = globalDegreesOfFreedom[n];
+            if (dof0 <= k && k < dof1) {
+                DOF_buffer[k - dof0] = SET_ID;
+            }
+        }
     }
-    delete[] distribution;
-    delete[] loc_offsets;
-    delete[] offsets;
-    delete[] set_new_DOF;
-    return new_numGlobalDOFs;
-}
-
-void Dudley_NodeFile_assignMPIRankToDOFs(Dudley_NodeFile * in, Esys_MPI_rank * mpiRankOfDOF, index_t * distribution)
-{
-    index_t min_DOF, max_DOF, k;
-    dim_t n;
-    Esys_MPI_rank p, p_min = in->MPIInfo->size, p_max = -1;
-    /* first we calculate the min and max dof on this processor to reduce costs for searching */
-    Dudley_NodeFile_setDOFRange(&min_DOF, &max_DOF, in);
-
-    for (p = 0; p < in->MPIInfo->size; ++p)
-    {
-	if (distribution[p] <= min_DOF)
-	    p_min = p;
-	if (distribution[p] <= max_DOF)
-	    p_max = p;
+    // count the entries in the buffer
+    const dim_t myDOFs = distribution[MPIInfo->rank + 1] - distribution[MPIInfo->rank];
+    // TODO: OMP parallel
+    dim_t myNewDOFs = 0;
+    for (index_t n = 0; n < myDOFs; ++n) {
+        if (DOF_buffer[n] == SET_ID) {
+            DOF_buffer[n] = myNewDOFs;
+            myNewDOFs++;
+        }
     }
-#pragma omp parallel for private(n,k,p) schedule(static)
-    for (n = 0; n < in->numNodes; ++n)
-    {
-	k = in->globalDegreesOfFreedom[n];
-	for (p = p_min; p <= p_max; ++p)
-	{
-	    if (k < distribution[p + 1])
-	    {
-		mpiRankOfDOF[n] = p;
-		break;
-	    }
-	}
-    }
-}
 
-dim_t Dudley_NodeFile_createDenseReducedDOFLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask)
-{
-    index_t min_dof, max_dof, unset_dof = -1, set_dof = 1, dof_0, dof_1, *DOF_buffer = NULL, k;
-    Esys_MPI_rank buffer_rank, *distribution = NULL;
-    dim_t p, buffer_len, n, myDOFs, *offsets = NULL, *loc_offsets = NULL, globalNumReducedDOFs = 0, myNewDOFs;
+    std::vector<index_t> loc_offsets(MPIInfo->size);
+    std::vector<index_t> offsets(MPIInfo->size);
+    dim_t new_numGlobalDOFs;
+    bool* set_new_DOF = new bool[numNodes];
+
 #ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
+    new_numGlobalDOFs = 0;
+    loc_offsets[MPIInfo->rank] = myNewDOFs;
+    MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_DIM_T,
+                  MPI_SUM, MPIInfo->comm);
+    for (int n = 0; n < MPIInfo->size; ++n) {
+        loc_offsets[n] = new_numGlobalDOFs;
+        new_numGlobalDOFs += offsets[n];
+    }
+#else
+    new_numGlobalDOFs = myNewDOFs;
 #endif
 
-    /* get the global range of node ids */
-    Dudley_NodeFile_setGlobalDOFRange(&min_dof, &max_dof, in);
-
-    distribution = new  index_t[in->MPIInfo->size + 1];
-    offsets = new  dim_t[in->MPIInfo->size];
-    loc_offsets = new  dim_t[in->MPIInfo->size];
-
-    if (!(Dudley_checkPtr(distribution) || Dudley_checkPtr(offsets) || Dudley_checkPtr(loc_offsets)))
+#pragma omp parallel
     {
-	/* distribute the range of node ids */
-	buffer_len = in->MPIInfo->setDistribution(min_dof, max_dof, distribution);
-	myDOFs = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];
-	/* allocate buffers */
-	DOF_buffer = new  index_t[buffer_len];
-	if (!Dudley_checkPtr(DOF_buffer))
-	{
-	    /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < buffer_len; n++)
-		DOF_buffer[n] = unset_dof;
+#pragma omp for
+        for (index_t n = 0; n < myDOFs; ++n)
+            DOF_buffer[n] += loc_offsets[MPIInfo->rank];
+#pragma omp for
+        for (index_t n = 0; n < numNodes; ++n)
+            set_new_DOF[n] = true;
+    }
 
-	    /* fill the buffer by sending portions around in a circle */
-#ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
-#endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		if (p > 0)
-		{		/* the initial send can be skipped */
+    // now entries are collected from the buffer again by sending them around
+    // in a circle
 #ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
+    dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-		    in->MPIInfo->msg_tag_counter++;
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-		dof_0 = distribution[buffer_rank];
-		dof_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    if (reducedNodeMask[n] > -1)
-		    {
-			k = in->globalDegreesOfFreedom[n];
-			if ((dof_0 <= k) && (k < dof_1))
-			{
-			    DOF_buffer[k - dof_0] = set_dof;
-			}
-		    }
-		}
-	    }
-	    /* count the entries in the DOF_buffer */
-	    /* TODO: OMP parallel */
-	    myNewDOFs = 0;
-	    for (n = 0; n < myDOFs; ++n)
-	    {
-		if (DOF_buffer[n] == set_dof)
-		{
-		    DOF_buffer[n] = myNewDOFs;
-		    myNewDOFs++;
-		}
-	    }
-	    memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));
-	    loc_offsets[in->MPIInfo->rank] = myNewDOFs;
+    buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        const index_t dof0 = distribution[buffer_rank];
+        const index_t dof1 = distribution[buffer_rank + 1];
+#pragma omp parallel for
+        for (index_t n = 0; n < numNodes; n++) {
+            const index_t k = globalDegreesOfFreedom[n];
+            if (set_new_DOF[n] && dof0 <= k && k < dof1) {
+                globalDegreesOfFreedom[n] = DOF_buffer[k - dof0];
+                set_new_DOF[n] = false;
+            }
+        }
 #ifdef ESYS_MPI
-	    MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);
-	    globalNumReducedDOFs = 0;
-	    for (n = 0; n < in->MPIInfo->size; ++n)
-	    {
-		loc_offsets[n] = globalNumReducedDOFs;
-		globalNumReducedDOFs += offsets[n];
-	    }
-#else
-	    globalNumReducedDOFs = loc_offsets[0];
-	    loc_offsets[0] = 0;
-#endif
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < myDOFs; ++n)
-		DOF_buffer[n] += loc_offsets[in->MPIInfo->rank];
-	    /* now entries are collected from the buffer again by sending the entries around in a circle */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < in->numNodes; ++n)
-		in->globalReducedDOFIndex[n] = loc_offsets[0] - 1;
-#ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
-#endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		dof_0 = distribution[buffer_rank];
-		dof_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    if (reducedNodeMask[n] > -1)
-		    {
-			k = in->globalDegreesOfFreedom[n];
-			if ((dof_0 <= k) && (k < dof_1))
-			    in->globalReducedDOFIndex[n] = DOF_buffer[k - dof_0];
-		    }
-		}
-		if (p < in->MPIInfo->size - 1)
-		{		/* the last send can be skipped */
-#ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
+        if (p < MPIInfo->size - 1) { // the last send can be skipped
+            MPI_Sendrecv_replace(DOF_buffer, bufferLen, MPI_DIM_T, dest,
+                                 MPIInfo->counter(), source, MPIInfo->counter(),
+                                 MPIInfo->comm, &status);
+            MPIInfo->incCounter();
+        }
 #endif
-		    ESYS_MPI_INC_COUNTER(*(in->MPIInfo),1);
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-	    }
-	}
-	delete[] DOF_buffer;
+        buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
     }
-    delete[] distribution;
-    delete[] loc_offsets;
-    delete[] offsets;
-    return globalNumReducedDOFs;
+    delete[] DOF_buffer;
+    delete[] set_new_DOF;
+    return new_numGlobalDOFs;
 }
 
-dim_t Dudley_NodeFile_createDenseNodeLabeling(Dudley_NodeFile * in, index_t * node_distribution,
-					      const index_t * dof_distribution)
+dim_t NodeFile::createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
+                                   const std::vector<index_t>& dofDistribution)
 {
-    index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0,
-	dof_1, *Node_buffer = NULL;
-    dim_t n, my_buffer_len, buffer_len, globalNumNodes = 0, myNewNumNodes;
-    Esys_MPI_rank p, buffer_rank;
-    const index_t unset_nodeID = -1, set_nodeID = 1;
-    const dim_t header_len = 2;
-#ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
-#endif
-    Esys_MPI_rank myRank = in->MPIInfo->rank;
+    const index_t UNSET_ID = -1, SET_ID = 1;
+    const index_t myFirstDOF = dofDistribution[MPIInfo->rank];
+    const index_t myLastDOF = dofDistribution[MPIInfo->rank + 1];
 
-    /* find the range of node ids controlled by me */
-
-    myFirstDOF = dof_distribution[myRank];
-    myLastDOF = dof_distribution[myRank + 1];
-    max_id = -INDEX_T_MAX;
-    min_id = INDEX_T_MAX;
-#pragma omp parallel private(loc_max_id,loc_min_id)
+    // find the range of node IDs controlled by me
+    index_t min_id = escript::DataTypes::index_t_max();
+    index_t max_id = escript::DataTypes::index_t_min();
+#pragma omp parallel
     {
-	loc_max_id = max_id;
-	loc_min_id = min_id;
-#pragma omp for private(n,dof,id) schedule(static)
-	for (n = 0; n < in->numNodes; n++)
-	{
-	    dof = in->globalDegreesOfFreedom[n];
-	    id = in->Id[n];
-	    if ((myFirstDOF <= dof) && (dof < myLastDOF))
-	    {
-		loc_max_id = MAX(loc_max_id, id);
-		loc_min_id = MIN(loc_min_id, id);
-	    }
-	}
+        index_t loc_min_id = min_id;
+        index_t loc_max_id = max_id;
+#pragma omp for
+        for (index_t n = 0; n < numNodes; n++) {
+            const index_t dof = globalDegreesOfFreedom[n];
+            if (myFirstDOF <= dof && dof < myLastDOF) {
+                loc_min_id = std::min(loc_min_id, Id[n]);
+                loc_max_id = std::max(loc_max_id, Id[n]);
+            }
+        }
 #pragma omp critical
-	{
-	    max_id = MAX(loc_max_id, max_id);
-	    min_id = MIN(loc_min_id, min_id);
-	}
+        {
+            min_id = std::min(loc_min_id, min_id);
+            max_id = std::max(loc_max_id, max_id);
+        }
     }
-    /* allocate a buffer */
-    my_buffer_len = max_id >= min_id ? max_id - min_id + 1 : 0;
+    dim_t myBufferLen = (max_id >= min_id ? max_id - min_id + 1 : 0);
+    dim_t bufferLen;
 
 #ifdef ESYS_MPI
-    MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm);
+    MPI_Allreduce(&myBufferLen, &bufferLen, 1, MPI_DIM_T, MPI_MAX,
+                  MPIInfo->comm);
 #else
-    buffer_len = my_buffer_len;
+    bufferLen = myBufferLen;
 #endif
 
-    Node_buffer = new  index_t[buffer_len + header_len];
-    if (!Dudley_checkPtr(Node_buffer))
-    {
-	/* mark and count the nodes in use */
+    const dim_t headerLen = 2;
+
+    index_t* Node_buffer = new index_t[bufferLen + headerLen];
+    // mark and count the nodes in use
 #pragma omp parallel
-	{
-#pragma omp for private(n) schedule(static)
-	    for (n = 0; n < buffer_len + header_len; n++)
-		Node_buffer[n] = unset_nodeID;
-#pragma omp for private(n) schedule(static)
-	    for (n = 0; n < in->numNodes; n++)
-		in->globalNodesIndex[n] = -1;
-#pragma omp for private(n,dof,id) schedule(static)
-	    for (n = 0; n < in->numNodes; n++)
-	    {
-		dof = in->globalDegreesOfFreedom[n];
-		id = in->Id[n];
-		if ((myFirstDOF <= dof) && (dof < myLastDOF))
-		    Node_buffer[id - min_id + header_len] = set_nodeID;
-	    }
-	}
-	myNewNumNodes = 0;
-	for (n = 0; n < my_buffer_len; n++)
-	{
-	    if (Node_buffer[header_len + n] == set_nodeID)
-	    {
-		Node_buffer[header_len + n] = myNewNumNodes;
-		myNewNumNodes++;
-	    }
-	}
-	/* make the local number of nodes globally available */
+    {
+#pragma omp for
+        for (index_t n = 0; n < bufferLen + headerLen; n++)
+            Node_buffer[n] = UNSET_ID;
+#pragma omp for
+        for (index_t n = 0; n < numNodes; n++) {
+            globalNodesIndex[n] = -1;
+            const index_t dof = globalDegreesOfFreedom[n];
+            if (myFirstDOF <= dof && dof < myLastDOF)
+                Node_buffer[Id[n] - min_id + headerLen] = SET_ID;
+        }
+    }
+    dim_t myNewNumNodes = 0;
+    for (index_t n = 0; n < myBufferLen; n++) {
+        if (Node_buffer[headerLen + n] == SET_ID) {
+            Node_buffer[headerLen + n] = myNewNumNodes;
+            myNewNumNodes++;
+        }
+    }
+    // make the local number of nodes globally available
 #ifdef ESYS_MPI
-	MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT, in->MPIInfo->comm);
+    MPI_Allgather(&myNewNumNodes, 1, MPI_DIM_T, &nodeDistribution[0], 1,
+                  MPI_DIM_T, MPIInfo->comm);
 #else
-	node_distribution[0] = myNewNumNodes;
+    nodeDistribution[0] = myNewNumNodes;
 #endif
 
-	globalNumNodes = 0;
-	for (p = 0; p < in->MPIInfo->size; ++p)
-	{
-	    itmp = node_distribution[p];
-	    node_distribution[p] = globalNumNodes;
-	    globalNumNodes += itmp;
-	}
-	node_distribution[in->MPIInfo->size] = globalNumNodes;
+    dim_t globalNumNodes = 0;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        const dim_t itmp = nodeDistribution[p];
+        nodeDistribution[p] = globalNumNodes;
+        globalNumNodes += itmp;
+    }
+    nodeDistribution[MPIInfo->size] = globalNumNodes;
 
-	/* offset nodebuffer */
-	itmp = node_distribution[in->MPIInfo->rank];
-#pragma omp for private(n) schedule(static)
-	for (n = 0; n < my_buffer_len; n++)
-	    Node_buffer[n + header_len] += itmp;
+    // offset node buffer
+#pragma omp parallel for
+    for (index_t n = 0; n < myBufferLen; n++)
+        Node_buffer[n + headerLen] += nodeDistribution[MPIInfo->rank];
 
-	/* now we send this buffer around to assign global node index: */
+    // now we send this buffer around to assign global node index
 #ifdef ESYS_MPI
-	dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-	Node_buffer[0] = min_id;
-	Node_buffer[1] = max_id;
-	buffer_rank = in->MPIInfo->rank;
-	for (p = 0; p < in->MPIInfo->size; ++p)
-	{
-	    nodeID_0 = Node_buffer[0];
-	    nodeID_1 = Node_buffer[1];
-	    dof_0 = dof_distribution[buffer_rank];
-	    dof_1 = dof_distribution[buffer_rank + 1];
-	    if (nodeID_0 <= nodeID_1)
-	    {
-#pragma omp for private(n,dof,id) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    dof = in->globalDegreesOfFreedom[n];
-		    id = in->Id[n] - nodeID_0;
-		    if ((dof_0 <= dof) && (dof < dof_1) && (id >= 0) && (id <= nodeID_1 - nodeID_0))
-			in->globalNodesIndex[n] = Node_buffer[id + header_len];
-		}
-	    }
-	    if (p < in->MPIInfo->size - 1)
-	    {			/* the last send can be skipped */
+    Node_buffer[0] = min_id;
+    Node_buffer[1] = max_id;
+    int buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        const index_t nodeID0 = Node_buffer[0];
+        const index_t nodeID1 = Node_buffer[1];
+        const index_t dof0 = dofDistribution[buffer_rank];
+        const index_t dof1 = dofDistribution[buffer_rank + 1];
+        if (nodeID0 <= nodeID1) {
+#pragma omp parallel for
+            for (index_t n = 0; n < numNodes; n++) {
+                const index_t dof = globalDegreesOfFreedom[n];
+                const index_t id = Id[n] - nodeID0;
+                if (dof0 <= dof && dof < dof1 && id >= 0 &&
+                        id <= nodeID1 - nodeID0)
+                    globalNodesIndex[n] = Node_buffer[id + headerLen];
+            }
+        }
 #ifdef ESYS_MPI
-		MPI_Sendrecv_replace(Node_buffer, buffer_len + header_len, MPI_INT,
-				     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-				     in->MPIInfo->comm, &status);
+        if (p < MPIInfo->size - 1) { // the last send can be skipped
+            MPI_Status status;
+            MPI_Sendrecv_replace(Node_buffer, bufferLen + headerLen, MPI_DIM_T,
+                                 dest, MPIInfo->counter(), source,
+                                 MPIInfo->counter(), MPIInfo->comm, &status);
+            MPIInfo->incCounter();
+        }
 #endif
-		ESYS_MPI_INC_COUNTER(*(in->MPIInfo),1);
-	    }
-	    buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-	}
+        buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
     }
     delete[] Node_buffer;
     return globalNumNodes;
 }
 
-dim_t Dudley_NodeFile_createDenseReducedNodeLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask)
-{
-    index_t min_node, max_node, unset_node = -1, set_node = 1, node_0, node_1, *Nodes_buffer = NULL, k;
-    Esys_MPI_rank buffer_rank, *distribution = NULL;
-    dim_t p, buffer_len, n, myNodes, *offsets = NULL, *loc_offsets = NULL, globalNumReducedNodes = 0, myNewNodes;
-#ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
-#endif
-
-    /* get the global range of node ids */
-    Dudley_NodeFile_setGlobalNodeIDIndexRange(&min_node, &max_node, in);
-
-    distribution = new  index_t[in->MPIInfo->size + 1];
-    offsets = new  dim_t[in->MPIInfo->size];
-    loc_offsets = new  dim_t[in->MPIInfo->size];
-
-    if (!(Dudley_checkPtr(distribution) || Dudley_checkPtr(offsets) || Dudley_checkPtr(loc_offsets)))
-    {
-	/* distribute the range of node ids */
-	buffer_len = in->MPIInfo->setDistribution(min_node, max_node, distribution);
-	myNodes = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];
-	/* allocate buffers */
-	Nodes_buffer = new  index_t[buffer_len];
-	if (!Dudley_checkPtr(Nodes_buffer))
-	{
-	    /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < buffer_len; n++)
-		Nodes_buffer[n] = unset_node;
+} // namespace dudley
 
-	    /* fill the buffer by sending portions around in a circle */
-#ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
-#endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		if (p > 0)
-		{		/* the initial send can be skipped */
-#ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
-#endif
-		    in->MPIInfo->msg_tag_counter++;
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-		node_0 = distribution[buffer_rank];
-		node_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    if (reducedNodeMask[n] > -1)
-		    {
-			k = in->globalNodesIndex[n];
-			if ((node_0 <= k) && (k < node_1))
-			{
-			    Nodes_buffer[k - node_0] = set_node;
-			}
-		    }
-		}
-	    }
-	    /* count the entries in the Nodes_buffer */
-	    /* TODO: OMP parallel */
-	    myNewNodes = 0;
-	    for (n = 0; n < myNodes; ++n)
-	    {
-		if (Nodes_buffer[n] == set_node)
-		{
-		    Nodes_buffer[n] = myNewNodes;
-		    myNewNodes++;
-		}
-	    }
-	    memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));
-	    loc_offsets[in->MPIInfo->rank] = myNewNodes;
-#ifdef ESYS_MPI
-	    MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);
-	    globalNumReducedNodes = 0;
-	    for (n = 0; n < in->MPIInfo->size; ++n)
-	    {
-		loc_offsets[n] = globalNumReducedNodes;
-		globalNumReducedNodes += offsets[n];
-	    }
-#else
-	    globalNumReducedNodes = loc_offsets[0];
-	    loc_offsets[0] = 0;
-#endif
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < myNodes; ++n)
-		Nodes_buffer[n] += loc_offsets[in->MPIInfo->rank];
-	    /* now entries are collected from the buffer again by sending the entries around in a circle */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < in->numNodes; ++n)
-		in->globalReducedNodesIndex[n] = loc_offsets[0] - 1;
-#ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
-#endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		node_0 = distribution[buffer_rank];
-		node_1 = distribution[buffer_rank + 1];
-#pragma omp parallel for private(n,k) schedule(static)
-		for (n = 0; n < in->numNodes; n++)
-		{
-		    if (reducedNodeMask[n] > -1)
-		    {
-			k = in->globalNodesIndex[n];
-			if ((node_0 <= k) && (k < node_1))
-			    in->globalReducedNodesIndex[n] = Nodes_buffer[k - node_0];
-		    }
-		}
-		if (p < in->MPIInfo->size - 1)
-		{		/* the last send can be skipped */
-#ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
-#endif
-		    ESYS_MPI_INC_COUNTER(*(in->MPIInfo),1);
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-	    }
-	}
-	delete[] Nodes_buffer;
-    }
-    delete[] distribution;
-    delete[] loc_offsets;
-    delete[] offsets;
-    return globalNumReducedNodes;
-}
diff --git a/dudley/src/NodeFile_createMappings.cpp b/dudley/src/NodeFile_createMappings.cpp
new file mode 100644
index 0000000..58251e2
--- /dev/null
+++ b/dudley/src/NodeFile_createMappings.cpp
@@ -0,0 +1,253 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+/****************************************************************************/
+
+/*   Dudley: NodeFile : creates the mappings using the indexReducedNodes */
+/*                 no distribution is happening                          */
+
+/****************************************************************************/
+
+#include "NodeFile.h"
+
+namespace dudley {
+
+void NodeFile::createDOFMappingAndCoupling()
+{
+    const index_t myFirstDOF = dofDistribution->getFirstComponent();
+    const index_t myLastDOF = dofDistribution->getLastComponent();
+    const int mpiSize = MPIInfo->size;
+    const int myRank = MPIInfo->rank;
+
+    index_t min_DOF, max_DOF;
+    std::pair<index_t,index_t> DOF_range(util::getFlaggedMinMaxInt(
+                                            numNodes, globalDegreesOfFreedom, -1));
+
+    if (DOF_range.second < DOF_range.first) {
+        min_DOF = myFirstDOF;
+        max_DOF = myLastDOF - 1;
+    } else {
+        min_DOF = DOF_range.first;
+        max_DOF = DOF_range.second;
+    }
+
+    int p_min = mpiSize;
+    int p_max = -1;
+    if (max_DOF >= min_DOF) {
+        for (int p = 0; p < mpiSize; ++p) {
+            if (dofDistribution->first_component[p] <= min_DOF)
+                p_min = p;
+            if (dofDistribution->first_component[p] <= max_DOF)
+                p_max = p;
+        }
+    }
+
+    std::stringstream ss;
+    if (myFirstDOF<myLastDOF && !(min_DOF <= myFirstDOF && myLastDOF-1 <= max_DOF)) {
+        ss << "createDOFMappingAndCoupling: Local elements do not span local "
+              "degrees of freedom. min_DOF=" << min_DOF << ", myFirstDOF="
+           << myFirstDOF << ", myLastDOF-1=" << myLastDOF-1
+           << ", max_DOF=" << max_DOF << " on rank=" << MPIInfo->rank;
+    }
+    const std::string msg(ss.str());
+    int error = msg.length();
+    int gerror = error;
+    escript::checkResult(error, gerror, MPIInfo);
+    if (gerror > 0) {
+        char* gmsg;
+        escript::shipString(msg.c_str(), &gmsg, MPIInfo->comm);
+        throw DudleyException(gmsg);
+    }
+
+    const index_t UNUSED = -1;
+    const dim_t len_loc_dof = max_DOF - min_DOF + 1;
+    index_t* shared = new index_t[numNodes * (p_max - p_min + 1)];
+    index_t* locDOFMask = new index_t[len_loc_dof];
+    index_t* nodeMask = new index_t[numNodes];
+#ifdef BOUNDS_CHECK
+    ESYS_ASSERT(myLastDOF-min_DOF <= len_loc_dof, "BOUNDS_CHECK");
+#endif
+
+#pragma omp parallel
+    {
+#pragma omp for
+        for (index_t i = 0; i < len_loc_dof; ++i)
+            locDOFMask[i] = UNUSED;
+#pragma omp for
+        for (index_t i = 0; i < numNodes; ++i)
+            nodeMask[i] = UNUSED;
+#pragma omp for
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = globalDegreesOfFreedom[i];
+            if (k > -1) {
+#ifdef BOUNDS_CHECK
+                ESYS_ASSERT(k-min_DOF < len_loc_dof, "BOUNDS_CHECK");
+#endif
+                locDOFMask[k - min_DOF] = UNUSED - 1;
+            }
+        }
+#pragma omp for
+        for (index_t i = myFirstDOF - min_DOF; i < myLastDOF - min_DOF; ++i) {
+            locDOFMask[i] = i - myFirstDOF + min_DOF;
+        }
+    }
+
+    index_t* wanted_DOFs = new index_t[numNodes];
+    std::vector<index_t> rcv_len(mpiSize);
+    std::vector<index_t> snd_len(mpiSize);
+    std::vector<int> neighbour;
+    std::vector<index_t> offsetInShared;
+    dim_t n = 0;
+    dim_t lastn = n;
+
+    for (int p = p_min; p <= p_max; ++p) {
+        if (p != myRank) {
+            const index_t firstDOF = std::max(min_DOF, dofDistribution->first_component[p]);
+            const index_t lastDOF = std::min(max_DOF + 1, dofDistribution->first_component[p + 1]);
+#ifdef BOUNDS_CHECK
+            ESYS_ASSERT(lastDOF-min_DOF <= len_loc_dof, "BOUNDS_CHECK");
+#endif
+            for (index_t i = firstDOF - min_DOF; i < lastDOF - min_DOF; ++i) {
+                if (locDOFMask[i] == UNUSED - 1) {
+                    locDOFMask[i] = myLastDOF - myFirstDOF + n;
+                    wanted_DOFs[n] = i + min_DOF;
+                    ++n;
+                }
+            }
+            if (n > lastn) {
+                rcv_len[p] = n - lastn;
+                neighbour.push_back(p);
+                offsetInShared.push_back(lastn);
+                lastn = n;
+            }
+        } // if p!=myRank
+    } // for p
+
+    offsetInShared.push_back(lastn);
+
+    // assign new DOF labels to nodes
+#pragma omp parallel for
+    for (index_t i = 0; i < numNodes; ++i) {
+        const index_t k = globalDegreesOfFreedom[i];
+        if (k > -1)
+            nodeMask[i] = locDOFMask[k - min_DOF];
+    }
+
+    degreesOfFreedomMapping.assign(nodeMask, numNodes, UNUSED);
+
+    // define how to get DOF values for controlled but other processors
+#ifdef BOUNDS_CHECK
+    ESYS_ASSERT(numNodes == 0 || offsetInShared.back() < numNodes * (p_max - p_min + 1), "BOUNDS_CHECK");
+#endif
+#pragma omp parallel for
+    for (index_t i = 0; i < lastn; ++i)
+        shared[i] = myLastDOF - myFirstDOF + i;
+
+#ifdef ESYS_HAVE_PASO
+    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
+                                    myLastDOF - myFirstDOF, neighbour, shared,
+                                    offsetInShared));
+#endif
+
+    /////////////////////////////////
+    //   now we build the sender   //
+    /////////////////////////////////
+#ifdef ESYS_MPI
+    std::vector<MPI_Request> mpi_requests(mpiSize * 2);
+    std::vector<MPI_Status> mpi_stati(mpiSize * 2);
+    MPI_Alltoall(&rcv_len[0], 1, MPI_DIM_T, &snd_len[0], 1, MPI_DIM_T,
+                 MPIInfo->comm);
+    int count = 0;
+    for (int p = 0; p < neighbour.size(); p++) {
+        MPI_Isend(&wanted_DOFs[offsetInShared[p]],
+                offsetInShared[p+1] - offsetInShared[p],
+                MPI_DIM_T, neighbour[p], MPIInfo->counter() + myRank,
+                MPIInfo->comm, &mpi_requests[count]);
+        count++;
+    }
+#else
+    snd_len[0] = rcv_len[0];
+#endif
+    n = 0;
+    neighbour.clear();
+    offsetInShared.clear();
+#ifdef ESYS_MPI
+    for (int p = 0; p < mpiSize; p++) {
+        if (snd_len[p] > 0) {
+            MPI_Irecv(&shared[n], snd_len[p], MPI_DIM_T, p,
+                      MPIInfo->counter() + p, MPIInfo->comm,
+                      &mpi_requests[count]);
+            count++;
+            neighbour.push_back(p);
+            offsetInShared.push_back(n);
+            n += snd_len[p];
+        }
+    }
+    MPIInfo->incCounter(MPIInfo->size);
+    MPI_Waitall(count, &mpi_requests[0], &mpi_stati[0]);
+#endif
+    offsetInShared.push_back(n);
+
+    // map global IDs to local IDs
+#pragma omp parallel for
+    for (index_t i = 0; i < n; ++i) {
+        shared[i] = locDOFMask[shared[i] - min_DOF];
+    }
+
+#ifdef ESYS_HAVE_PASO
+    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
+                                    myLastDOF - myFirstDOF, neighbour, shared,
+                                    offsetInShared));
+    degreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+#endif
+
+    delete[] wanted_DOFs;
+    delete[] nodeMask;
+    delete[] shared;
+    delete[] locDOFMask;
+}
+
+void NodeFile::createNodeMappings(const IndexVector& dofDist,
+                                  const IndexVector& nodeDist)
+{
+    // ==== distribution of Nodes ====
+    nodesDistribution.reset(new escript::Distribution(MPIInfo, nodeDist));
+
+    // ==== distribution of DOFs ====
+    dofDistribution.reset(new escript::Distribution(MPIInfo, dofDist));
+
+    index_t* nodeMask = new index_t[numNodes];
+    const index_t UNUSED = -1;
+
+    // ==== nodes mapping (dummy) ====
+#pragma omp parallel for
+    for (index_t i = 0; i < numNodes; ++i)
+        nodeMask[i] = i;
+    nodesMapping.assign(nodeMask, numNodes, UNUSED);
+
+    // ==== mapping between nodes and DOFs + DOF connector ====
+    createDOFMappingAndCoupling();
+
+    // get the IDs for DOFs
+#pragma omp parallel for
+    for (index_t i = 0; i < degreesOfFreedomMapping.numTargets; ++i)
+        degreesOfFreedomId[i] = Id[degreesOfFreedomMapping.map[i]];
+
+    delete[] nodeMask;
+}
+
+} // namespace dudley
+
diff --git a/dudley/src/NodeFile_createTrilinosGraph.cpp b/dudley/src/NodeFile_createTrilinosGraph.cpp
new file mode 100644
index 0000000..7286efb
--- /dev/null
+++ b/dudley/src/NodeFile_createTrilinosGraph.cpp
@@ -0,0 +1,78 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifdef ESYS_HAVE_TRILINOS
+
+#include "DudleyDomain.h"
+#include "IndexList.h"
+
+using namespace esys_trilinos;
+
+namespace dudley {
+
+void NodeFile::createTrilinosGraph(const IndexList* indexList)
+{
+    const index_t* gNI = borrowGlobalNodesIndex();
+    const index_t* dofMap = borrowDegreesOfFreedomTarget();
+
+    const index_t myNumTargets = getNumDegreesOfFreedom();
+    const index_t numTargets = getNumDegreesOfFreedomTargets();
+    const index_t* target = borrowTargetDegreesOfFreedom();
+    IndexVector myRows(myNumTargets);
+    IndexVector columns(numTargets);
+
+#pragma omp parallel
+    {
+#pragma omp for nowait
+        for (size_t i = 0; i < myRows.size(); i++) {
+            myRows[i] = gNI[dofMap[i]];
+        }
+#pragma omp for
+        for (size_t i = 0; i < columns.size(); i++) {
+            columns[target[i]] = gNI[i];
+        }
+    } // end parallel section
+
+    Teuchos::ArrayRCP<size_t> rowPtr(myNumTargets + 1);
+    for (size_t i = 0; i < myNumTargets; i++) {
+        rowPtr[i+1] = rowPtr[i] + indexList[i].count(0, numTargets);
+    }
+
+    Teuchos::ArrayRCP<LO> colInd(rowPtr[myNumTargets]);
+
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumTargets; i++) {
+        indexList[i].toArray(&colInd[rowPtr[i]], 0, numTargets, 0);
+        std::sort(&colInd[rowPtr[i]], &colInd[rowPtr[i+1]]);
+    }
+
+    TrilinosMap_ptr rowMap(new MapType(getGlobalNumNodes(), myRows,
+                0, TeuchosCommFromEsysComm(MPIInfo->comm)));
+
+    TrilinosMap_ptr colMap(new MapType(getGlobalNumNodes(), columns,
+                0, TeuchosCommFromEsysComm(MPIInfo->comm)));
+
+    GraphType* graph = new GraphType(rowMap, colMap, rowPtr, colInd);
+    Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    params->set("Optimize Storage", true);
+    graph->fillComplete(rowMap, rowMap, params);
+    m_graph.reset(graph);
+}
+
+} // namespace dudley
+
+#endif // ESYS_HAVE_TRILINOS
+
diff --git a/dudley/src/NodeFile_gather.cpp b/dudley/src/NodeFile_gather.cpp
index a3b1f96..01934c5 100644
--- a/dudley/src/NodeFile_gather.cpp
+++ b/dudley/src/NodeFile_gather.cpp
@@ -14,181 +14,182 @@
 *
 *****************************************************************************/
 
-/************************************************************************************
- *
- *   Dudley: Mesh: NodeFile                                   
- *   gathers the NodeFile out from the NodeFile in using the entries 
- *   in index[0:out->numNodes-1] which are between min_index and max_index (exclusive) 
- *   the node index[i]
- *
- ************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "NodeFile.h"
 
-/************************************************************************************/
+#include <escript/index.h>
+
+using escript::DataTypes::real_t;
+
+namespace dudley {
 
-void Dudley_NodeFile_gatherEntries(dim_t n, index_t * index, index_t min_index, index_t max_index,
-				   index_t * Id_out, index_t * Id_in,
-				   index_t * Tag_out, index_t * Tag_in,
-				   index_t * globalDegreesOfFreedom_out, index_t * globalDegreesOfFreedom_in,
-				   dim_t numDim, double *Coordinates_out, double *Coordinates_in)
+// helper function
+static void gatherEntries(dim_t n, const index_t* index,
+                          index_t min_index, index_t max_index,
+                          index_t* Id_out, const index_t* Id_in,
+                          int* Tag_out, const int* Tag_in,
+                          index_t* globalDegreesOfFreedom_out,
+                          const index_t* globalDegreesOfFreedom_in,
+                          int numDim, real_t* Coordinates_out,
+                          const real_t* Coordinates_in)
 {
-    dim_t i;
-    register index_t k;
-    const index_t range = max_index - min_index;
-    const size_t numDim_size = (size_t) numDim * sizeof(double);
-#pragma omp parallel for private(i,k) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-	k = index[i] - min_index;
-	if ((k >= 0) && (k < range))
-	{
-	    Id_out[i] = Id_in[k];
-	    Tag_out[i] = Tag_in[k];
-	    globalDegreesOfFreedom_out[i] = globalDegreesOfFreedom_in[k];
-	    memcpy(&(Coordinates_out[INDEX2(0, i, numDim)]), &(Coordinates_in[INDEX2(0, k, numDim)]), numDim_size);
-	}
+    const dim_t range = max_index - min_index;
+    const size_t numDim_size = numDim * sizeof(real_t);
+#pragma omp parallel for
+    for (index_t i = 0; i < n; i++) {
+        const index_t k = index[i] - min_index;
+        if (k >= 0 && k < range) {
+            Id_out[i] = Id_in[k];
+            Tag_out[i] = Tag_in[k];
+            globalDegreesOfFreedom_out[i] = globalDegreesOfFreedom_in[k];
+            memcpy(&Coordinates_out[INDEX2(0, i, numDim)],
+                   &Coordinates_in[INDEX2(0, k, numDim)], numDim_size);
+        }
     }
 }
 
-void Dudley_NodeFile_gather(index_t * index, Dudley_NodeFile * in, Dudley_NodeFile * out)
+// helper function
+static void scatterEntries(dim_t n, const index_t* index,
+                           index_t min_index, index_t max_index,
+                           index_t* Id_out, const index_t* Id_in,
+                           int* Tag_out, const int* Tag_in,
+                           index_t* globalDegreesOfFreedom_out,
+                           const index_t* globalDegreesOfFreedom_in,
+                           int numDim, real_t* Coordinates_out,
+                           const real_t* Coordinates_in)
 {
-    index_t min_id, max_id;
-    Dudley_NodeFile_setGlobalIdRange(&min_id, &max_id, in);
-    Dudley_NodeFile_gatherEntries(out->numNodes, index, min_id, max_id,
-				  out->Id, in->Id,
-				  out->Tag, in->Tag,
-				  out->globalDegreesOfFreedom, in->globalDegreesOfFreedom,
-				  out->numDim, out->Coordinates, in->Coordinates);
+    const dim_t range = max_index - min_index;
+    const size_t numDim_size = numDim * sizeof(real_t);
+
+#pragma omp parallel for
+    for (index_t i = 0; i < n; i++) {
+        const index_t k = index[i] - min_index;
+        if (k >= 0 && k < range) {
+            Id_out[k] = Id_in[i];
+            Tag_out[k] = Tag_in[i];
+            globalDegreesOfFreedom_out[k] = globalDegreesOfFreedom_in[i];
+            memcpy(&Coordinates_out[INDEX2(0, k, numDim)],
+                   &Coordinates_in[INDEX2(0, i, numDim)], numDim_size);
+        }
+    }
 }
 
-void Dudley_NodeFile_gather_global(index_t * index, Dudley_NodeFile * in, Dudley_NodeFile * out)
+void NodeFile::gather(const index_t* index, const NodeFile* in)
 {
-    index_t min_id, max_id, undefined_node;
-    Esys_MPI_rank buffer_rank, *distribution = NULL;
-    index_t *Id_buffer = NULL, *Tag_buffer = NULL, *globalDegreesOfFreedom_buffer = NULL;
-    double *Coordinates_buffer = NULL;
-    dim_t p, buffer_len, n;
-    char error_msg[100];
-#ifdef ESYS_MPI
-    Esys_MPI_rank dest, source;
-    MPI_Status status;
-#endif
+    const std::pair<index_t,index_t> idRange(in->getGlobalIdRange());
+    gatherEntries(numNodes, index, idRange.first, idRange.second, Id, in->Id,
+             Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
+             numDim, Coordinates, in->Coordinates);
+}
+
+void NodeFile::gather_global(const index_t* index, const NodeFile* in)
+{
+    // get the global range of node IDs
+    const std::pair<index_t,index_t> idRange(in->getGlobalIdRange());
+    const index_t UNDEFINED = idRange.first - 1;
+    std::vector<index_t> distribution(in->MPIInfo->size + 1);
+
+    // distribute the range of node IDs
+    dim_t buffer_len = MPIInfo->setDistribution(idRange.first, idRange.second,
+                                                &distribution[0]);
+
+    // allocate buffers
+    index_t* Id_buffer = new index_t[buffer_len];
+    int* Tag_buffer = new int[buffer_len];
+    index_t* globalDegreesOfFreedom_buffer = new index_t[buffer_len];
+    real_t* Coordinates_buffer = new real_t[buffer_len * numDim];
 
-    /* get the global range of node ids */
-    Dudley_NodeFile_setGlobalIdRange(&min_id, &max_id, in);
-    undefined_node = min_id - 1;
-
-    distribution = new  index_t[in->MPIInfo->size + 1];
-
-    if (!Dudley_checkPtr(distribution))
-    {
-	/* distribute the range of node ids */
-	buffer_len = in->MPIInfo->setDistribution(min_id, max_id, distribution);
-	/* allocate buffers */
-	Id_buffer = new  index_t[buffer_len];
-	Tag_buffer = new  index_t[buffer_len];
-	globalDegreesOfFreedom_buffer = new  index_t[buffer_len];
-	Coordinates_buffer = new  double[buffer_len * out->numDim];
-	if (!(Dudley_checkPtr(Id_buffer) || Dudley_checkPtr(Tag_buffer) ||
-	      Dudley_checkPtr(globalDegreesOfFreedom_buffer) || Dudley_checkPtr(Coordinates_buffer)))
-	{
-	    /* fill Id_buffer by the undefined_node marker to check if nodes are defined */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < buffer_len; n++)
-		Id_buffer[n] = undefined_node;
-
-	    /* fill the buffer by sending portions around in a circle */
+    // fill Id_buffer by the UNDEFINED marker to check if nodes are
+    // defined
+#pragma omp parallel for
+    for (index_t n = 0; n < buffer_len; n++)
+        Id_buffer[n] = UNDEFINED;
+
+    // fill the buffer by sending portions around in a circle
 #ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
+    MPI_Status status;
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		if (p > 0)
-		{		/* the initial send can be skipped */
+    int buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
 #ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter + 1, source,
-					 in->MPIInfo->msg_tag_counter + 1, in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len, MPI_INT, dest,
-					 in->MPIInfo->msg_tag_counter + 2, source, in->MPIInfo->msg_tag_counter + 2,
-					 in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(Coordinates_buffer, buffer_len * out->numDim, MPI_DOUBLE, dest,
-					 in->MPIInfo->msg_tag_counter + 3, source, in->MPIInfo->msg_tag_counter + 3,
-					 in->MPIInfo->comm, &status);
+        if (p > 0) { // the initial send can be skipped
+            MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_DIM_T, dest,
+                        MPIInfo->counter(), source, MPIInfo->counter(),
+                        MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
+                        MPIInfo->counter() + 1, source,
+                        MPIInfo->counter() + 1, MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
+                        MPI_DIM_T, dest, MPIInfo->counter() + 2, source,
+                        MPIInfo->counter() + 2, MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(Coordinates_buffer, buffer_len * numDim,
+                        MPI_DOUBLE, dest, MPIInfo->counter() + 3, source,
+                        MPIInfo->counter() + 3, MPIInfo->comm, &status);
+            MPIInfo->incCounter(4);
+        }
 #endif
-		    ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-		Dudley_NodeFile_scatterEntries(in->numNodes, in->Id,
-					       distribution[buffer_rank], distribution[buffer_rank + 1],
-					       Id_buffer, in->Id,
-					       Tag_buffer, in->Tag,
-					       globalDegreesOfFreedom_buffer, in->globalDegreesOfFreedom,
-					       out->numDim, Coordinates_buffer, in->Coordinates);
-	    }
-	    /* now entries are collected from the buffer again by sending the entries around in a circle */
+        buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
+        scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
+                       distribution[buffer_rank + 1], Id_buffer, in->Id,
+                       Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
+                       in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
+                       in->Coordinates);
+    }
+    // now entries are collected from the buffer again by sending the entries
+    // around in a circle
 #ifdef ESYS_MPI
-	    dest = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank + 1);
-	    source = esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank - 1);
+    dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-	    buffer_rank = in->MPIInfo->rank;
-	    for (p = 0; p < in->MPIInfo->size; ++p)
-	    {
-		Dudley_NodeFile_gatherEntries(out->numNodes, index,
-					      distribution[buffer_rank], distribution[buffer_rank + 1],
-					      out->Id, Id_buffer,
-					      out->Tag, Tag_buffer,
-					      out->globalDegreesOfFreedom, globalDegreesOfFreedom_buffer,
-					      out->numDim, out->Coordinates, Coordinates_buffer);
-		if (p < in->MPIInfo->size - 1)
-		{		/* the last send can be skipped */
+    buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        gatherEntries(numNodes, index, distribution[buffer_rank],
+                      distribution[buffer_rank + 1], Id, Id_buffer,
+                      Tag, Tag_buffer, globalDegreesOfFreedom,
+                      globalDegreesOfFreedom_buffer, numDim,
+                      Coordinates, Coordinates_buffer);
 #ifdef ESYS_MPI
-		    MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
-					 in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT,
-					 dest, in->MPIInfo->msg_tag_counter + 1, source,
-					 in->MPIInfo->msg_tag_counter + 1, in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len, MPI_INT, dest,
-					 in->MPIInfo->msg_tag_counter + 2, source, in->MPIInfo->msg_tag_counter + 2,
-					 in->MPIInfo->comm, &status);
-		    MPI_Sendrecv_replace(Coordinates_buffer, buffer_len * out->numDim, MPI_DOUBLE, dest,
-					 in->MPIInfo->msg_tag_counter + 3, source, in->MPIInfo->msg_tag_counter + 3,
-					 in->MPIInfo->comm, &status);
+        if (p < MPIInfo->size - 1) { // the last send can be skipped
+            MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_DIM_T, dest,
+                              MPIInfo->counter(), source,
+                              MPIInfo->counter(), MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
+                              MPIInfo->counter() + 1, source,
+                              MPIInfo->counter() + 1, MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
+                              MPI_DIM_T, dest, MPIInfo->counter() + 2, source,
+                              MPIInfo->counter() + 2, MPIInfo->comm, &status);
+            MPI_Sendrecv_replace(Coordinates_buffer, buffer_len * numDim,
+                              MPI_DOUBLE, dest, MPIInfo->counter() + 3, source,
+                              MPIInfo->counter() + 3, MPIInfo->comm, &status);
+            MPIInfo->incCounter(4);
+        }
 #endif
-		    ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
-		}
-		buffer_rank = esysUtils::mod_rank(in->MPIInfo->size, buffer_rank - 1);
-	    }
-	    /* check if all nodes are set: */
-#pragma omp parallel for private(n) schedule(static)
-	    for (n = 0; n < out->numNodes; ++n)
-	    {
-		if (out->Id[n] == undefined_node)
-		{
-		    sprintf(error_msg,
-			    "Dudley_NodeFile_gather_global: Node id %d at position %d is referenced but is not defined.",
-			    out->Id[n], n);
-		    Dudley_setError(VALUE_ERROR, error_msg);
-		}
-	    }
-
-	}
-	delete[] Id_buffer;
-	delete[] Tag_buffer;
-	delete[] globalDegreesOfFreedom_buffer;
-	delete[] Coordinates_buffer;
+        buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
+    }
+    delete[] Id_buffer;
+    delete[] Tag_buffer;
+    delete[] globalDegreesOfFreedom_buffer;
+    delete[] Coordinates_buffer;
+#if DOASSERT
+    // check if all nodes are set
+    index_t err = -1;
+#pragma omp parallel for
+    for (index_t n = 0; n < numNodes; ++n) {
+        if (Id[n] == UNDEFINED) {
+#pragma omp critical
+            err = n;
+        }
+    }
+    if (err >= 0) {
+        std::stringstream ss;
+        ss << "NodeFile::gather_global: Node id " << Id[err]
+            << " at position " << err << " is referenced but not defined.";
+        throw escript::AssertException(ss.str());
     }
-    delete[] distribution;
-    /* make sure that the error is global */
-    esysUtils::Esys_MPIInfo_noError(in->MPIInfo);
+#endif // DOASSERT
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/NodeFile_scatter.cpp b/dudley/src/NodeFile_scatter.cpp
deleted file mode 100644
index 6686376..0000000
--- a/dudley/src/NodeFile_scatter.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/*   scatters the NodeFile in into NodeFile out using index[0:in->numNodes-1].  */
-/*   index has to be between 0 and out->numNodes-1. */
-/*   coloring is chosen for the worst case */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-
-/************************************************************************************/
-
-void Dudley_NodeFile_scatterEntries(dim_t n, index_t * index, index_t min_index, index_t max_index,
-				    index_t * Id_out, index_t * Id_in,
-				    index_t * Tag_out, index_t * Tag_in,
-				    index_t * globalDegreesOfFreedom_out, index_t * globalDegreesOfFreedom_in,
-				    dim_t numDim, double *Coordinates_out, double *Coordinates_in)
-{
-    dim_t i;
-    register index_t k;
-    const index_t range = max_index - min_index;
-    const size_t numDim_size = (size_t) numDim * sizeof(double);
-
-#pragma omp parallel for private(i,k) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-	k = index[i] - min_index;
-	if ((k >= 0) && (k < range))
-	{
-	    Id_out[k] = Id_in[i];
-	    Tag_out[k] = Tag_in[i];
-	    globalDegreesOfFreedom_out[k] = globalDegreesOfFreedom_in[i];
-	    memcpy(&(Coordinates_out[INDEX2(0, k, numDim)]), &(Coordinates_in[INDEX2(0, i, numDim)]), numDim_size);
-	}
-    }
-}
-
-void Dudley_NodeFile_scatter(index_t * index, Dudley_NodeFile * in, Dudley_NodeFile * out)
-{
-    Dudley_NodeFile_scatterEntries(out->numNodes, index, 0, in->numNodes,
-				   out->Id, in->Id,
-				   out->Tag, in->Tag,
-				   out->globalDegreesOfFreedom, in->globalDegreesOfFreedom,
-				   out->numDim, out->Coordinates, in->Coordinates);
-}
diff --git a/dudley/src/NodeFile_setCoordinates.cpp b/dudley/src/NodeFile_setCoordinates.cpp
deleted file mode 100644
index d5a9c26..0000000
--- a/dudley/src/NodeFile_setCoordinates.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/* copies the array newX into self->coordinates */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-#include "Util.h"
-
-/************************************************************************************/
-
-void Dudley_NodeFile_setCoordinates(Dudley_NodeFile * self, const escript::Data* newX)
-{
-    char error_msg[LenErrorMsg_MAX];
-    size_t numDim_size;
-    int n;
-    if (getDataPointSize(newX) != self->numDim)
-    {
-	sprintf(error_msg, "Dudley_NodeFile_setCoordinates: dimension of new coordinates has to be %d.", self->numDim);
-	Dudley_setError(VALUE_ERROR, error_msg);
-    }
-    else if (!numSamplesEqual(newX, 1, self->numNodes))
-    {
-	sprintf(error_msg, "Dudley_NodeFile_setCoordinates: number of given nodes must to be %d.", self->numNodes);
-	Dudley_setError(VALUE_ERROR, error_msg);
-    }
-    else
-    {
-	numDim_size = self->numDim * sizeof(double);
-	Dudley_increaseStatus(self);
-#pragma omp parallel private(n)
-	{
-
-#pragma omp for schedule(static)
-	    for (n = 0; n < self->numNodes; n++)
-	    {
-		memcpy(&(self->Coordinates[INDEX2(0, n, self->numDim)]), getSampleDataROFast(newX, n), numDim_size);
-	    }
-	}
-    }
-}
diff --git a/dudley/src/NodeFile_setIdRange.cpp b/dudley/src/NodeFile_setIdRange.cpp
deleted file mode 100644
index 6a75559..0000000
--- a/dudley/src/NodeFile_setIdRange.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/*   returns the maximum and minimum node id number of nodes: */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-#include "Util.h"
-
-/************************************************************************************/
-
-void Dudley_NodeFile_setGlobalIdRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    index_t min_id_local, max_id_local;
-#ifdef ESYS_MPI
-    index_t global_id_range[2], id_range[2];
-#endif
-
-    min_id_local = Dudley_Util_getMinInt(1, in->numNodes, in->Id);
-    max_id_local = Dudley_Util_getMaxInt(1, in->numNodes, in->Id);
-
-#ifdef ESYS_MPI
-    id_range[0] = -min_id_local;
-    id_range[1] = max_id_local;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
-    *min_id = -global_id_range[0];
-    *max_id = global_id_range[1];
-#else
-    *min_id = min_id_local;
-    *max_id = max_id_local;
-#endif
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-void Dudley_NodeFile_setIdRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    *min_id = Dudley_Util_getMinInt(1, in->numNodes, in->Id);
-    *max_id = Dudley_Util_getMaxInt(1, in->numNodes, in->Id);
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-void Dudley_NodeFile_setGlobalDOFRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    index_t min_id_local, max_id_local;
-#ifdef ESYS_MPI
-    index_t global_id_range[2], id_range[2];
-#endif
-
-    min_id_local = Dudley_Util_getMinInt(1, in->numNodes, in->globalDegreesOfFreedom);
-    max_id_local = Dudley_Util_getMaxInt(1, in->numNodes, in->globalDegreesOfFreedom);
-
-#ifdef ESYS_MPI
-    id_range[0] = -min_id_local;
-    id_range[1] = max_id_local;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
-    *min_id = -global_id_range[0];
-    *max_id = global_id_range[1];
-#else
-    *min_id = min_id_local;
-    *max_id = max_id_local;
-#endif
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-void Dudley_NodeFile_setDOFRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    *min_id = Dudley_Util_getMinInt(1, in->numNodes, in->globalDegreesOfFreedom);
-    *max_id = Dudley_Util_getMaxInt(1, in->numNodes, in->globalDegreesOfFreedom);
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-void Dudley_NodeFile_setReducedDOFRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    *min_id = Dudley_Util_getFlaggedMinInt(1, in->numNodes, in->globalReducedDOFIndex, -1);
-    *max_id = Dudley_Util_getFlaggedMaxInt(1, in->numNodes, in->globalReducedDOFIndex, -1);
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-index_t Dudley_NodeFile_maxGlobalDegreeOfFreedomIndex(Dudley_NodeFile * in)
-{
-    index_t min_id, max_id;
-    Dudley_NodeFile_setGlobalDOFRange(&min_id, &max_id, in);
-    return max_id;
-}
-
-index_t Dudley_NodeFile_maxGlobalReducedDegreeOfFreedomIndex(Dudley_NodeFile * in)
-{
-    index_t min_id, max_id;
-    Dudley_NodeFile_setGlobalReducedDegreeOfFreedomRange(&min_id, &max_id, in);
-    return max_id;
-}
-
-void Dudley_NodeFile_setGlobalReducedDegreeOfFreedomRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    index_t min_id_local, max_id_local;
-#ifdef ESYS_MPI
-    index_t global_id_range[2], id_range[2];
-#endif
-
-    min_id_local = Dudley_Util_getFlaggedMaxInt(1, in->numNodes, in->globalReducedDOFIndex, -1);
-    max_id_local = Dudley_Util_getFlaggedMinInt(1, in->numNodes, in->globalReducedDOFIndex, -1);
-
-#ifdef ESYS_MPI
-    id_range[0] = -min_id_local;
-    id_range[1] = max_id_local;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
-    *min_id = -global_id_range[0];
-    *max_id = global_id_range[1];
-#else
-    *min_id = min_id_local;
-    *max_id = max_id_local;
-#endif
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-index_t Dudley_NodeFile_maxGlobalNodeIDIndex(Dudley_NodeFile * in)
-{
-    index_t min_id, max_id;
-    Dudley_NodeFile_setGlobalNodeIDIndexRange(&min_id, &max_id, in);
-    return max_id;
-}
-
-void Dudley_NodeFile_setGlobalNodeIDIndexRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    index_t min_id_local, max_id_local;
-#ifdef ESYS_MPI
-    index_t global_id_range[2], id_range[2];
-#endif
-
-    max_id_local = Dudley_Util_getMaxInt(1, in->numNodes, in->globalNodesIndex);
-    min_id_local = Dudley_Util_getMinInt(1, in->numNodes, in->globalNodesIndex);
-
-#ifdef ESYS_MPI
-    id_range[0] = -min_id_local;
-    id_range[1] = max_id_local;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
-    *min_id = -global_id_range[0];
-    *max_id = global_id_range[1];
-#else
-    *min_id = min_id_local;
-    *max_id = max_id_local;
-#endif
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
-
-index_t Dudley_NodeFile_maxGlobalReducedNodeIDIndex(Dudley_NodeFile * in)
-{
-    index_t min_id, max_id;
-    Dudley_NodeFile_setGlobalReducedNodeIDIndexRange(&min_id, &max_id, in);
-    return max_id;
-}
-
-void Dudley_NodeFile_setGlobalReducedNodeIDIndexRange(index_t * min_id, index_t * max_id, Dudley_NodeFile * in)
-{
-    index_t min_id_local, max_id_local;
-#ifdef ESYS_MPI
-    index_t global_id_range[2], id_range[2];
-#endif
-
-    max_id_local = Dudley_Util_getFlaggedMaxInt(1, in->numNodes, in->globalReducedNodesIndex, -1);
-    min_id_local = Dudley_Util_getFlaggedMinInt(1, in->numNodes, in->globalReducedNodesIndex, -1);
-
-#ifdef ESYS_MPI
-    id_range[0] = -min_id_local;
-    id_range[1] = max_id_local;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, in->MPIInfo->comm);
-    *min_id = -global_id_range[0];
-    *max_id = global_id_range[1];
-#else
-    *min_id = min_id_local;
-    *max_id = max_id_local;
-#endif
-    if (*max_id < *min_id)
-    {
-	*max_id = 0;
-	*min_id = -1;
-    }
-}
diff --git a/dudley/src/NodeFile_setTags.cpp b/dudley/src/NodeFile_setTags.cpp
deleted file mode 100644
index 91dd58a..0000000
--- a/dudley/src/NodeFile_setTags.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: Mesh: NodeFile */
-
-/*  set tags to newTag where mask>0 */
-
-/************************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "NodeFile.h"
-#include "Util.h"
-
-/************************************************************************************/
-
-void Dudley_NodeFile_setTags(Dudley_NodeFile * self, const int newTag, const escript::Data* mask)
-{
-    register dim_t n;
-    dim_t numNodes;
-    register __const double *mask_array;
-    Dudley_resetError();
-
-    if (self == NULL)
-	return;
-    numNodes = self->numNodes;
-    if (1 != getDataPointSize(mask))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_NodeFile_setTags: number of components of mask is 1.");
-    }
-    else if (!numSamplesEqual(mask, 1, numNodes))
-    {
-	Dudley_setError(TYPE_ERROR, "Dudley_NodeFile_setTags: illegal number of samples of mask Data object");
-    }
-
-    /* now we can start */
-
-    if (Dudley_noError())
-    {
-#pragma omp parallel private(n,mask_array)
-	{
-#pragma omp for schedule(static)
-	    for (n = 0; n < numNodes; n++)
-	    {
-		mask_array = getSampleDataRO(mask, n);
-		if (mask_array[0] > 0)
-		    self->Tag[n] = newTag;
-	    }
-	}
-	Dudley_NodeFile_setTagsInUse(self);
-    }
-}
-
-/*
-* $Log$
-*
-*/
diff --git a/dudley/src/NodeMapping.cpp b/dudley/src/NodeMapping.cpp
deleted file mode 100644
index d60da45..0000000
--- a/dudley/src/NodeMapping.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include "NodeMapping.h"
-#include "Util.h"
-#include "esysUtils/mem.h"
-
-Dudley_NodeMapping *Dudley_NodeMapping_alloc(dim_t numNodes, index_t * target, index_t unused)
-{
-    dim_t i;
-    index_t min_target, numTargets, max_target;
-    Dudley_NodeMapping *out = NULL;
-    /*  allocate the return value */
-    min_target = Dudley_Util_getFlaggedMinInt(1, numNodes, target, unused);
-    if (min_target < 0)
-    {
-	Dudley_setError(VALUE_ERROR, "Dudley_NodeMapping_alloc: target has negative entry.");
-	return NULL;
-    }
-    /* now we assume min_target=0! */
-    max_target = Dudley_Util_getFlaggedMaxInt(1, numNodes, target, unused);
-    numTargets = min_target <= max_target ? max_target + 1 : 0;
-    out = new Dudley_NodeMapping;
-    if (!Dudley_checkPtr(out))
-    {
-	out->reference_counter = 1;
-	out->unused = unused;
-	out->numNodes = numNodes;
-	out->numTargets = numTargets;
-	out->map = new  index_t[numTargets];
-	out->target = new  index_t[numNodes];
-	if (!(Dudley_checkPtr(out->target) || Dudley_checkPtr(out->map)))
-	{
-#pragma omp parallel
-	    {
-#pragma omp for private(i)
-		for (i = 0; i < numTargets; ++i)
-		    out->map[i] = -1;
-#pragma omp for private(i)
-		for (i = 0; i < numNodes; ++i)
-		{
-		    out->target[i] = target[i];
-		    if (target[i] != unused)
-			out->map[out->target[i]] = i;
-		}
-#pragma omp for private(i)
-		for (i = 0; i < numTargets; ++i)
-		{
-		    if (out->map[i] == -1)
-		    {
-			Dudley_setError(VALUE_ERROR,
-					"Dudley_NodeMapping_alloc: target does not define a continuous labeling.");
-		    }
-		}
-	    }
-	}
-	if (!Dudley_noError())
-	{
-	    Dudley_NodeMapping_free(out);
-	}
-
-    }
-    return out;
-}
-
-void Dudley_NodeMapping_free(Dudley_NodeMapping * in)
-{
-    if (in != NULL)
-    {
-	in->reference_counter--;
-	if (in->reference_counter <= 0)
-	{
-	    delete[] in->target;
-	    delete[] in->map;
-	    delete in;
-	}
-    }
-}
-
-Dudley_NodeMapping *Dudley_NodeMapping_getReference(Dudley_NodeMapping * in)
-{
-    if (in != NULL)
-	in->reference_counter++;
-    return in;
-}
-
diff --git a/dudley/src/NodeMapping.h b/dudley/src/NodeMapping.h
index e1195a0..5225780 100644
--- a/dudley/src/NodeMapping.h
+++ b/dudley/src/NodeMapping.h
@@ -14,28 +14,91 @@
 *
 *****************************************************************************/
 
-/*                                                                                                                     */
-/* NodeMapping provides a mapping from the local nodes typically to the degrees of freedom,                            */
-/*    the reduced degrees of freedom or the reduced node set                                                           */
-/*                                                                                                                     */
-
-#ifndef INC_DUDLEY_NODEMAPPING
-#define INC_DUDLEY_NODEMAPPING
-
-#include "esysUtils/Esys_MPI.h"
-
-struct Dudley_NodeMapping {
-    dim_t numNodes;		/* number of FEM nodes */
-    index_t *target;		/* target[i] defines the target if FEM  node i =0,...,numNodes */
-    index_t unused;		/* target[i]=unused defines that no target is defined for FEM  node i */
-    dim_t numTargets;		/* number of targets */
-    index_t *map;		/* maps the target nodes back to the FEM nodes: target[map[i]]=i */
-    dim_t reference_counter;
+#ifndef __DUDLEY_NODEMAPPING_H__
+#define __DUDLEY_NODEMAPPING_H__
+
+#include "Util.h"
+
+namespace dudley {
+
+/// NodeMapping provides a mapping from the local nodes typically to the
+/// degrees of freedom, the reduced degrees of freedom or the reduced node set
+struct NodeMapping
+{
+    NodeMapping() : numNodes(0), target(NULL), numTargets(0), map(NULL) {}
+
+    /// resets both map and target
+    void clear()
+    {
+        delete[] map;
+        delete[] target;
+        target = NULL;
+        map = NULL;
+        numNodes = 0;
+        numTargets = 0;
+    }
+
+    /// initializes a node mapping. The target array is copied and a reverse
+    /// map created.
+    /// theTarget[i]=unused means that no target is defined for FEM node i.
+    void assign(const index_t* theTarget, dim_t nNodes, index_t unused)
+    {
+        clear();
+
+        if (nNodes == 0)
+            return;
+
+        numNodes = nNodes;
+
+        std::pair<index_t,index_t> range(
+            util::getFlaggedMinMaxInt(numNodes, theTarget, unused));
+        if (range.first < 0) {
+            throw escript::ValueError("NodeMapping: target has negative entry.");
+        }
+        numTargets = range.first<=range.second ? range.second+1 : 0;
+
+        target = new index_t[numNodes];
+        map = new index_t[numTargets];
+
+        bool err = false;
+#pragma omp parallel
+        {
+#pragma omp for
+            for (index_t i=0; i<numNodes; ++i) {
+                target[i] = theTarget[i];
+                if (target[i] != unused)
+                    map[target[i]] = i;
+            }
+            // sanity check
+#pragma omp for
+            for (index_t i=0; i<numTargets; ++i) {
+                if (map[i] == -1) {
+#pragma omp critical
+                    err = true;
+                }
+            }
+        }
+        if (err)
+            throw escript::ValueError("NodeMapping: target does not define a continuous labeling.");
+    }
+
+    /// returns the number of target nodes (number of items in the map array)
+    inline dim_t getNumTargets() const { return numTargets; }
+
+    /// size of `target` (number of FEM nodes)
+    dim_t numNodes;
+
+    /// target[i] defines the target of FEM node i=0,...,numNodes
+    index_t* target;
+
+    /// size of `map` (number of target nodes, e.g. DOF, reduced DOF, etc.)
+    dim_t numTargets;
+
+    /// maps the target nodes back to the FEM nodes: target[map[i]]=i
+    index_t* map;
 };
-typedef struct Dudley_NodeMapping Dudley_NodeMapping;
 
-Dudley_NodeMapping *Dudley_NodeMapping_alloc(dim_t numNodes, index_t *target, index_t unused);
-void Dudley_NodeMapping_free(Dudley_NodeMapping *);
-Dudley_NodeMapping *Dudley_NodeMapping_getReference(Dudley_NodeMapping *in);
+} // namespace dudley
+
+#endif // __DUDLEY_NODEMAPPING_H__
 
-#endif
diff --git a/dudley/src/SConscript b/dudley/src/SConscript
index c278c86..d6852e0 100644
--- a/dudley/src/SConscript
+++ b/dudley/src/SConscript
@@ -1,5 +1,4 @@
 
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -15,165 +14,112 @@
 #
 ##############################################################################
 
-
-import os
 Import('*')
 
-local_env = env.Clone()
-py_wrapper_local_env = env.Clone()
-local_unroll_env = env.Clone()
-local_unroll_env.Append(CFLAGS = env['dudley_assemble_flags'])
-
-
-# Remove the sharedlibrary prefix on all platform - we don't want 'lib' mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'dudley'
 
 sources = """
-	Assemble_AverageElementData.cpp
-	Assemble_CopyElementData.cpp
-	Assemble_CopyNodalData.cpp
-	Assemble_LumpedSystem.cpp
-	Assemble_NodeCoordinates.cpp
-	Assemble_PDE.cpp
-        Assemble_PDE_Points.cpp
-	Assemble_PDE_Single2_2D.cpp
-	Assemble_PDE_Single2_3D.cpp
-	Assemble_PDE_System2_2D.cpp
-	Assemble_PDE_System2_3D.cpp
-	Assemble_addToSystemMatrix.cpp
-	Assemble_getAssembleParameters.cpp
-	Assemble_getSize.cpp
-	Assemble_integrate.cpp
-	Assemble_interpolate.cpp
-	Assemble_setNormal.cpp
-	ElementFile.cpp
-	ElementFile_allocTable.cpp
-	ElementFile_copyTable.cpp
-	ElementFile_createColoring.cpp
-	ElementFile_distributeByRankOfDOF.cpp
-	ElementFile_gather.cpp
-	ElementFile_jacobeans.cpp
-	ElementFile_markNodes.cpp
-	ElementFile_optimizeOrdering.cpp
-	ElementFile_relableNodes.cpp
-	ElementFile_scatter.cpp
-	ElementFile_setCoordinates.cpp
-	ElementFile_setNodeRange.cpp
-	ElementFile_setTags.cpp
-	ElementType.cpp
-	Dudley.cpp
-	IndexList.cpp
-	Mesh.cpp
-	Mesh_createNodeFileMappings.cpp
-	Mesh_distributeByRankOfDOF.cpp
-	Mesh_findMatchingFaces.cpp
-	Mesh_getPattern.cpp
-	Mesh_markNodes.cpp
-	Mesh_optimizeDOFDistribution.cpp
-	Mesh_optimizeDOFLabeling.cpp
-	Mesh_prepare.cpp
-	Mesh_print.cpp
-	Mesh_read.cpp
-	Mesh_readGmsh.cpp
-	Mesh_relableElementNodes.cpp
-	Mesh_resolveNodeIds.cpp
-	Mesh_setCoordinates.cpp
-	Mesh_tagmaps.cpp
-	Mesh_tet4.cpp
-	Mesh_tri3.cpp
-	Mesh_write.cpp
-	NodeFile.cpp
-	NodeFile_allocTable.cpp
-	NodeFile_copyTable.cpp
-	NodeFile_createDenseLabelings.cpp
-	NodeFile_gather.cpp
-	NodeFile_scatter.cpp
-	NodeFile_setCoordinates.cpp
-	NodeFile_setIdRange.cpp
-	NodeFile_setTags.cpp
-	NodeMapping.cpp
-	ShapeTable.cpp
-	TagMap.cpp
-	Util.cpp
-	CPPAdapter/DudleyAdapterException.cpp
-	CPPAdapter/DudleyError.cpp
-	CPPAdapter/MeshAdapter.cpp
-	CPPAdapter/MeshAdapterFactory.cpp
+    Assemble_AverageElementData.cpp
+    Assemble_CopyElementData.cpp
+    Assemble_CopyNodalData.cpp
+    Assemble_LumpedSystem.cpp
+    Assemble_NodeCoordinates.cpp
+    Assemble_PDE.cpp
+    Assemble_PDE_Points.cpp
+    Assemble_PDE_Single_2D.cpp
+    Assemble_PDE_Single_3D.cpp
+    Assemble_PDE_System_2D.cpp
+    Assemble_PDE_System_3D.cpp
+    Assemble_addToSystemMatrix.cpp
+    Assemble_getAssembleParameters.cpp
+    Assemble_getNormal.cpp
+    Assemble_getSize.cpp
+    Assemble_integrate.cpp
+    Assemble_interpolate.cpp
+    DomainFactory.cpp
+    DudleyDomain.cpp
+    ElementFile.cpp
+    ElementFile_createColoring.cpp
+    ElementFile_distributeByRankOfDOF.cpp
+    ElementFile_jacobians.cpp
+    IndexList.cpp
+    Mesh_distributeByRankOfDOF.cpp
+    Mesh_getPattern.cpp
+    Mesh_optimizeDOFDistribution.cpp
+    Mesh_optimizeDOFLabeling.cpp
+    Mesh_read.cpp
+    Mesh_readGmsh.cpp
+    Mesh_resolveNodeIds.cpp
+    Mesh_tet4.cpp
+    Mesh_tri3.cpp
+    Mesh_write.cpp
+    NodeFile.cpp
+    NodeFile_createDenseLabelings.cpp
+    NodeFile_createMappings.cpp
+    NodeFile_createTrilinosGraph.cpp
+    NodeFile_gather.cpp
+    ShapeTable.cpp
+    Util.cpp
 """.split()
 unroll_sources= """
-	Assemble_gradient.cpp
-	Assemble_jacobeans.cpp
+    Assemble_gradient.cpp
+    Assemble_jacobians.cpp
 """.split()
 
-
 headers = """
-	Assemble.h
-	Dudley.h
-	DudleyVersion.h
-	ElementFile.h
-	ElementType.h
-	IndexList.h
-	Mesh.h
-	NodeFile.h
-	NodeMapping.h
-	TriangularMesh.h
-	ShapeTable.h
-	TagMap.h
-	Util.h
-""".split()
-cppadapter_headers = """
-	CPPAdapter/DudleyAdapterException.h
-	CPPAdapter/DudleyError.h
-	CPPAdapter/MeshAdapter.h
-	CPPAdapter/MeshAdapterFactory.h
-	CPPAdapter/system_dep.h
+    Assemble.h
+    DomainFactory.h
+    Dudley.h
+    DudleyDomain.h
+    DudleyException.h
+    DudleyVersion.h
+    ElementFile.h
+    ElementType.h
+    IndexList.h
+    NodeFile.h
+    NodeMapping.h
+    ShapeTable.h
+    Util.h
 """.split()
 
-local_env.Prepend(LIBS = ['pasowrap', 'escript', 'paso', 'esysUtils'])
+local_env = env.Clone()
 
-if IS_WINDOWS :
-  local_env.Append(CPPDEFINES = ['DUDLEY_EXPORTS'])
+unroll_env = env.Clone()
+unroll_env.Append(CCFLAGS = env['dudley_assemble_flags'])
+un = [ unroll_env.SharedObject(x) for x in unroll_sources]
 
-module_name = 'dudley'
+if IS_WINDOWS :
+    local_env.Append(CPPDEFINES = ['DUDLEY_EXPORTS'])
 
-un = [ local_unroll_env.SharedObject(x) for x in unroll_sources]
+# collect dependencies for other modules
+dudleylibs = []
+dudleylibs += env['escript_libs']
+if env['parmetis']:
+    dudleylibs += env['parmetis_libs']
+if env['paso']:
+    dudleylibs += env['paso_libs']
+if env['trilinos']:
+    dudleylibs += env['trilinoswrap_libs']
 
-lib = local_env.SharedLibrary(module_name, sources+un)
-env.Alias('build_dudley_lib', lib)
+local_env.PrependUnique(LIBS = dudleylibs)
 
-include_path = Dir('dudley', local_env['incinstall'])
-cppadapter_include_path = Dir('CppAdapter', include_path)
+env['dudley_libs'] = [module_name] + dudleylibs
 
-hdr_inst1 = local_env.Install(include_path, headers )
-hdr_inst2 = local_env.Install(cppadapter_include_path, cppadapter_headers )
-env.Alias('install_dudley_headers', [hdr_inst1, hdr_inst2])
+include_path = Dir(module_name, local_env['incinstall'])
+hdr_inst = local_env.Install(include_path, headers)
 
+lib = local_env.SharedLibrary(module_name, sources+un)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_dudley_lib', lib_inst)
 
 ### Python wrapper ###
-if not env['build_shared']:
-    py_wrapper_local_env.Prepend(LIBS = ['dudley', 'pasowrap', 'escript', 'esysUtils'])
-else:
-    py_wrapper_local_env.Prepend(LIBS = ['dudley', 'pasowrap', 'escript', 'paso', 'esysUtils'])
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'CPPAdapter/dudleycpp.cpp')
-env.Alias('build_dudleycpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_dudleycpp_lib', mod_inst)
-
-# configure python module
-local_env.SConscript(dirs = ['#/dudley/py_src'], variant_dir='py', duplicate=0)
-
-# configure unit tests
-local_env.SConscript(dirs = ['#/dudley/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
+py_env = env.Clone()
+py_env.PrependUnique(LIBS = env['dudley_libs'])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'dudleycpp.cpp')
+
+mod_path = Dir(module_name, local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
+
+build = env.Alias('build_dudley', [hdr_inst, lib, py_lib])
+env.Alias('install_dudley', [build, lib_inst, mod_inst])
 
diff --git a/dudley/src/ShapeTable.cpp b/dudley/src/ShapeTable.cpp
index b025222..1aaa479 100644
--- a/dudley/src/ShapeTable.cpp
+++ b/dudley/src/ShapeTable.cpp
@@ -14,132 +14,113 @@
 *****************************************************************************/
 
 #include "ShapeTable.h"
-#include "esysUtils/mem.h"
-#include <stdlib.h>
 
-/* Joel Fenwick - derived from info in Finley's Quadrature and shape files
+namespace dudley {
 
-This method is not threadsafe unless the initial call has completed
-Evaluates the shape functions at nodes (This is the S value from the finley ShapeFunctions
-The dim argument is the dimension of the element not the dimension of the embedding space.
-the reduced arg is whether the elements are reduced or not
-*/
+// Joel Fenwick - derived from info in Finley's Quadrature and shape files
+
+// This method is not thread-safe unless the initial call has completed.
+// Evaluates the shape functions at nodes (This is the S value from the finley
+// ShapeFunctions). The dim argument is the dimension of the element not the
+// dimension of the embedding space. The reduced arg is whether the elements
+// are reduced or not
 bool getQuadShape(dim_t dim, bool reduced, const double **shapearr)
 {
 #define _dudley_s_alpha 0.58541019662496852
 #define _dudley_s_beta  0.1381966011250105
 
-/* {Line, TRI, TET} X {single_quad_point, more} X max number of quadpoints */
+    // {Line, TRI, TET} X {single_quad_point, more} X max number of quadpoints
     static const double _dudley_V[3 * 2][12] = {
-	{0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},	/* Line single */
-	{(1. - .577350269189626) / 2., (1. + .577350269189626) / 2., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},	/* Line 2 points */
-	{1 / 3., 1 / 3., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},	/* Tri single */
-	{0.5, 0, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0},	/* Tri 3 points */
-	{0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0},	/* Tet single */
-	{_dudley_s_beta, _dudley_s_beta, _dudley_s_beta,
-	 _dudley_s_alpha, _dudley_s_beta, _dudley_s_beta,
-	 _dudley_s_beta, _dudley_s_alpha, _dudley_s_beta,
-	 _dudley_s_beta, _dudley_s_beta, _dudley_s_alpha}	/* Tet 4 points */
+        {0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Line single
+        {(1. - .577350269189626) / 2., (1. + .577350269189626) / 2., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},     // Line 2 points
+        {1 / 3., 1 / 3., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Tri single
+        {0.5, 0, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0, 0},   // Tri 3 points
+        {0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0},  // Tet single
+        {_dudley_s_beta, _dudley_s_beta, _dudley_s_beta,
+         _dudley_s_alpha, _dudley_s_beta, _dudley_s_beta,
+         _dudley_s_beta, _dudley_s_alpha, _dudley_s_beta,
+         _dudley_s_beta, _dudley_s_beta, _dudley_s_alpha} // Tet 4 points
     };
 
 #undef _dudley_s_alpha
 #undef _dudley_s_beta
 
-    static double **arr = 0;
-
-    if (arr == 0)
-    {
-	int i;
-	arr = new double*[8];	/* point occupies two slots to make things simpler */
-	arr[0] = new double[1];
-	arr[0][0] = 1.;		/* point */
-	arr[1] = arr[0];
-	arr[2] = new double[4];	/* Line Single */
-	arr[3] = new double[4];	/* Line 2 */
-
-/*
-	for (i = 0; i < 2; ++i)
-	{
-	    arr[2][2 * i] = 1 - _dudley_V[0][i];
-	    arr[3][2 * i] = 1 - _dudley_V[1][i];
-
-	    arr[2][2 * i + 1] = _dudley_V[0][i];
-	    arr[3][2 * i + 1] = _dudley_V[1][i];
-	}
-*/
-
-	for (i = 0; i < 2; ++i)
-	{
-	    arr[2][2 * i] = 1 - _dudley_V[0][i];
-	    arr[2][2 * i + 1] = _dudley_V[0][i];
-	}
-	for (i = 0; i < 2; ++i)
-	{
-	    arr[3][2 * i] = 1 - _dudley_V[1][i];
-	    arr[3][2 * i + 1] = _dudley_V[1][i];
-	}
-
-
-
-	arr[4] = new double[3];	/* Tri single */
-	arr[4][0] = 1. - _dudley_V[2][0] - _dudley_V[2][1];
-	arr[4][1] = _dudley_V[2][0];
-	arr[4][2] = _dudley_V[2][1];
-
-	arr[5] = new double[9];	/* Tri 3 */
-	for (i = 0; i < 3; ++i)
-	{
-	    arr[5][3 * i] = 1 - _dudley_V[3][2 * i] - _dudley_V[3][2 * i + 1];
-	    arr[5][3 * i + 1] = _dudley_V[3][2 * i];
-	    arr[5][3 * i + 2] = _dudley_V[3][2 * i + 1];
-	}
-	arr[6] = new  double[4];	/* Tet single */
-	arr[6][0] = 1 - _dudley_V[4][0] - _dudley_V[4][1] - _dudley_V[4][2];
-	arr[6][1] = _dudley_V[4][0];
-	arr[6][2] = _dudley_V[4][1];
-	arr[6][3] = _dudley_V[4][2];
-
-	arr[7] = new double[16];	/* Tet 4 */
-	for (i = 0; i < 4; ++i)
-	{
-	    double x = _dudley_V[5][3 * i];
-	    double y = _dudley_V[5][3 * i + 1];
-	    double z = _dudley_V[5][3 * i + 2];
-	    arr[7][4 * i] = 1 - x - y - z;
-	    arr[7][4 * i + 1] = x;
-	    arr[7][4 * i + 2] = y;
-	    arr[7][4 * i + 3] = z;
-	}
-    }				/* end if */
-
-    if ((dim > -1) && (dim < 4))
-    {
-	*shapearr = arr[(!reduced) ? (2 * dim + 1) : (2 * dim)];
-	return 1;
+    static double **arr = NULL;
+
+    if (!arr) {
+        int i;
+        arr = new double*[8];   // point occupies two slots to make things simpler
+        arr[0] = new double[1];
+        arr[0][0] = 1.;         // point
+        arr[1] = arr[0];
+        arr[2] = new double[4]; // Line Single
+        arr[3] = new double[4]; // Line 2
+
+        for (i = 0; i < 2; ++i) {
+            arr[2][2 * i] = 1 - _dudley_V[0][i];
+            arr[2][2 * i + 1] = _dudley_V[0][i];
+            arr[3][2 * i] = 1 - _dudley_V[1][i];
+            arr[3][2 * i + 1] = _dudley_V[1][i];
+        }
+
+        arr[4] = new double[3]; // Tri single
+        arr[4][0] = 1. - _dudley_V[2][0] - _dudley_V[2][1];
+        arr[4][1] = _dudley_V[2][0];
+        arr[4][2] = _dudley_V[2][1];
+
+        arr[5] = new double[9]; // Tri 3
+        for (i = 0; i < 3; ++i) {
+            arr[5][3 * i] = 1 - _dudley_V[3][2 * i] - _dudley_V[3][2 * i + 1];
+            arr[5][3 * i + 1] = _dudley_V[3][2 * i];
+            arr[5][3 * i + 2] = _dudley_V[3][2 * i + 1];
+        }
+        arr[6] = new double[4]; // Tet single
+        arr[6][0] = 1 - _dudley_V[4][0] - _dudley_V[4][1] - _dudley_V[4][2];
+        arr[6][1] = _dudley_V[4][0];
+        arr[6][2] = _dudley_V[4][1];
+        arr[6][3] = _dudley_V[4][2];
+
+        arr[7] = new double[16]; // Tet 4
+        for (i = 0; i < 4; ++i) {
+            const double x = _dudley_V[5][3 * i];
+            const double y = _dudley_V[5][3 * i + 1];
+            const double z = _dudley_V[5][3 * i + 2];
+            arr[7][4 * i] = 1 - x - y - z;
+            arr[7][4 * i + 1] = x;
+            arr[7][4 * i + 2] = y;
+            arr[7][4 * i + 3] = z;
+        }
+    } // end if
+
+    if (dim > -1 && dim < 4) {
+        *shapearr = arr[(!reduced) ? (2 * dim + 1) : (2 * dim)];
+        return 1;
     }
-    *shapearr = 0;
+    *shapearr = NULL;
     return 0;
 }
 
-const char *getElementName(Dudley_ElementTypeId id)
+const char *getElementName(ElementTypeId id)
 {
-    switch (id)
-    {
-    case Dudley_Point1:
-	return "Point1";
-    case Dudley_Line2:
-	return "Line2";
-    case Dudley_Tri3:
-	return "Tri3";
-    case Dudley_Tet4:
-	return "Tet4";
-    case Dudley_Line2Face:
-	return "Line2Face";
-    case Dudley_Tri3Face:
-	return "Tri3Face";
-    case Dudley_Tet4Face:
-	return "Tet4Face";
-    default:
-	return "noElement";
+    switch (id) {
+        case Dudley_Point1:
+            return "Point1";
+        case Dudley_Line2:
+            return "Line2";
+        case Dudley_Tri3:
+            return "Tri3";
+        case Dudley_Tet4:
+            return "Tet4";
+        case Dudley_Line2Face:
+            return "Line2Face";
+        case Dudley_Tri3Face:
+            return "Tri3Face";
+        case Dudley_Tet4Face:
+            return "Tet4Face";
+        default:
+            return "noElement";
     }
 }
+
+} // namespace dudley
+
diff --git a/dudley/src/ShapeTable.h b/dudley/src/ShapeTable.h
index 51630fc..c4c864b 100644
--- a/dudley/src/ShapeTable.h
+++ b/dudley/src/ShapeTable.h
@@ -17,50 +17,68 @@
 /* Shape Function info
 These tables are a much simplified version of content from finley's ShapeFunctions files
 
-This file is not to be included in .h files - only .c files should have any use for it
+This file is not to be included in .h files - only .cpp files should have any use for it
 */
 
-#ifndef SHAPETABLE_DUDLEY
-#define SHAPETABLE_DUDLEY
-
-#include "esysUtils/types.h"	
+#ifndef __DUDLEY_SHAPETABLE_H__
+#define __DUDLEY_SHAPETABLE_H__
 
+#include "Dudley.h"
 #include "ElementType.h"
 
-/* These are constructed from dsdv in ShapeFunction.c in finley
-   The first two are just there for functions that want a pointer
-*/
-static const double DTDV_0D[1][1] = { {0} };
+namespace dudley {
+
+// These are constructed from dsdv in ShapeFunction.cpp in finley.
+// The first one is just there for functions that want a pointer
 static const double DTDV_1D[2][2] = { {-1., 1}, {-1., 1.} };
 
-/* The repetition here is a hack to prevent out of bounds access */
-static const double DTDV_2D[3 * 3][2] = { {-1, 1}, {0, -1.}, {0, 1},
-{-1, 1}, {0, -1.}, {0, 1},
-{-1, 1}, {0, -1.}, {0, 1}
+// The repetition here is a hack to prevent out of bounds access
+static const double DTDV_2D[3 * 3][2] = {
+    {-1, 1}, {0, -1.}, {0, 1},
+    {-1, 1}, {0, -1.}, {0, 1},
+    {-1, 1}, {0, -1.}, {0, 1}
+};
+
+static const double DTDV_3D[4][3] = {
+    {-1, -1, -1},
+    { 1,  0,  0},
+    { 0,  1,  0},
+    { 0,  0,  1}
 };
-static const double DTDV_3D[4][3] = { {-1, -1, -1}, {1, 0, 0}, {0, 1, 0}, {0, 0, 1} };
 
-/* Index by the following by Dudley_ElementTypeID
- * The number of local dimensions (as opposed to dimension of the embedding space) */
-static const dim_t localDims[8] = { 0, 1, 2, 3, 0, 1, 2, 0 };
-static const dim_t Dims[8] = { 0, 1, 2, 3, 1, 2, 3, 0 };
+// Index the following by ElementTypeID
+// The number of local dimensions (as opposed to dimension of the embedding
+// space)
+static const int localDims[8] = { 0, 1, 2, 3, 0, 1, 2, 0 };
+static const int Dims[8] = { 0, 1, 2, 3, 1, 2, 3, 0 };
 
-/* the following lists are only used for face elements defined by numNodesOnFace>0 */
-static const dim_t numNodesOnFaceMap[8] = { 1, 2, 3, 4, 1, 2, 4, -1 };	/* if the element is allowed as a face element, numNodesOnFace defines the number of nodes defining the face */
-static const dim_t shiftNodesMap[8][4] = { {0}, {1, 0}, {1, 2, 0}, {-1}, {0, 1, 2}, {1, 0, 2}, {1, 2, 0, 3}, {0} };	/* defines a permutation of the nodes which rotates the nodes on the face */
-static const dim_t reverseNodesMap[8][4] = { {-1}, {-1}, {0, 2, 1}, {-1}, {-1}, {-1}, {0, 2, 1, 3}, {0} };	/* reverses the order of the nodes on a face. the permutation has keep 0 fixed. */
-					      /* shiftNodes={-1} or reverseNodes={-1} are ignored. */
+// the following lists are only used for face elements defined by
+// numNodesOnFace>0
 
-/* [0] is reduced quadrature, [1] is full quadrature */
-/* in order the positions are POINT, LINE, TRI, TET */
+// if the element is allowed as a face element, numNodesOnFace defines the
+// number of nodes defining the face
+static const int numNodesOnFaceMap[8] = { 1, 2, 3, 4, 1, 2, 4, -1 };
+
+// defines a permutation of the nodes which rotates the nodes on the face
+static const int shiftNodesMap[8][4] = { {0}, {1, 0}, {1, 2, 0}, {-1}, {0, 1, 2}, {1, 0, 2}, {1, 2, 0, 3}, {0} };
+
+// reverses the order of the nodes on a face. the permutation has keep 0 fixed.
+// shiftNodes={-1} or reverseNodes={-1} are ignored.
+static const int reverseNodesMap[8][4] = { {-1}, {-1}, {0, 2, 1}, {-1}, {-1}, {-1}, {0, 2, 1, 3}, {0} };
+
+// [0] is reduced quadrature, [1] is full quadrature
+// in order the positions are POINT, LINE, TRI, TET
 static const double QuadWeight[4][2] = { {0, 0}, {1., 0.5}, {0.5, 1. / 6}, {1. / 6, 1. / 24} };
 
-/* number of quadrature points per element */
-static const dim_t QuadNums[4][2] = { {0, 0}, {1, 2}, {1, 3}, {1, 4} };
+// number of quadrature points per element
+static const int QuadNums[4][2] = { {0, 0}, {1, 2}, {1, 3}, {1, 4} };
 
-/*shape functions at quadrature nodes */
+// shape functions at quadrature nodes
 bool getQuadShape(dim_t sim, bool reduced, const double **shapearr);
 
-const char *getElementName(Dudley_ElementTypeId id);
+const char* getElementName(ElementTypeId id);
+
+} // namespace dudley
+
+#endif // __DUDLEY_SHAPETABLE_H__
 
-#endif
diff --git a/dudley/src/TagMap.cpp b/dudley/src/TagMap.cpp
deleted file mode 100644
index 909a4dd..0000000
--- a/dudley/src/TagMap.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/* Dudley: simple mapping from names to tag keys via a linked list */
-
-/************************************************************************************/
-
-#include "TagMap.h"
-#include "esysUtils/mem.h"
-#include "string.h"
-
-/************************************************************************************/
-
-void Dudley_TagMap_insert(Dudley_TagMap ** tag_map, const char *name, index_t tag_key)
-{
-    Dudley_TagMap *map = NULL;
-    if (strlen(name) < 1)
-    {
-	Dudley_setError(VALUE_ERROR, "empty tag name.");
-	return;
-    }
-    if (strchr(name, 32) != NULL)
-    {				/* check for space */
-	Dudley_setError(VALUE_ERROR, "tag name may not contain a space.");
-	return;
-    }
-    if (*tag_map == NULL)
-    {
-	map = new Dudley_TagMap;
-	if (Dudley_checkPtr(map))
-	    return;
-	map->name = new char[strlen(name) + 1];
-	if (Dudley_checkPtr(map->name))
-	{
-	    delete map;
-	}
-	else
-	{
-	    strcpy(map->name, name);
-	    map->tag_key = tag_key;
-	    map->next = NULL;
-	    *tag_map = map;
-	}
-    }
-    else
-    {
-	if (strcmp((*tag_map)->name, name) == 0)
-	{
-	    (*tag_map)->tag_key = tag_key;
-	}
-	else
-	{
-	    Dudley_TagMap_insert(&((*tag_map)->next), name, tag_key);
-	}
-    }
-}
-
-index_t Dudley_TagMap_getTag(Dudley_TagMap * tag_map, const char *name)
-{
-    char error_msg[LenErrorMsg_MAX];
-    if (tag_map == NULL)
-    {
-	sprintf(error_msg, "Dudley_TagMap_getTag: unknown tag name %s.", name);
-	Dudley_setError(VALUE_ERROR, error_msg);
-	return -1;
-    }
-    else
-    {
-	if (strcmp(tag_map->name, name) == 0)
-	{
-	    return tag_map->tag_key;
-	}
-	else
-	{
-	    return Dudley_TagMap_getTag(tag_map->next, name);
-	}
-    }
-}
-
-bool Dudley_TagMap_isValidTagName(Dudley_TagMap * tag_map, const char *name)
-{
-    if (tag_map == NULL)
-    {
-	return false;
-    }
-    else
-    {
-	if (strcmp(tag_map->name, name) == 0)
-	{
-	    return true;
-	}
-	else
-	{
-	    return Dudley_TagMap_isValidTagName(tag_map->next, name);
-	}
-    }
-}
-
-/* deallocates the Dudley_TagMap in by recursive calls */
-
-void Dudley_TagMap_free(Dudley_TagMap * in)
-{
-    if (in != NULL)
-    {
-	Dudley_TagMap_free(in->next);
-	delete[] in->name;
-	delete in;
-    }
-    return;
-}
diff --git a/dudley/src/TagMap.h b/dudley/src/TagMap.h
deleted file mode 100644
index 4a9e3a0..0000000
--- a/dudley/src/TagMap.h
+++ /dev/null
@@ -1,38 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/* Dudley: simple link list to privide clear names for a tag keys */
-
-/************************************************************************************/
-
-#ifndef INC_DUDLEY_TAGMAP
-#define INC_DUDLEY_TAGMAP
-
-#include "Dudley.h"
-
-typedef struct Dudley_TagMap {
-    char *name;
-    index_t tag_key;
-    struct Dudley_TagMap *next;
-} Dudley_TagMap;
-
-void Dudley_TagMap_insert(Dudley_TagMap **, const char *name, index_t tag_key);
-index_t Dudley_TagMap_getTag(Dudley_TagMap *, const char *name);
-bool Dudley_TagMap_isValidTagName(Dudley_TagMap *, const char *name);
-void Dudley_TagMap_free(Dudley_TagMap *);
-#endif				/* #ifndef INC_DUDLEY_TAGMAP */
diff --git a/dudley/src/TriangularMesh.h b/dudley/src/TriangularMesh.h
deleted file mode 100644
index 5eb3be6..0000000
--- a/dudley/src/TriangularMesh.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/************************************************************************************/
-
-/*   Dudley: header file for generates triangular meshes for 1D,2D,3D. */
-
-/************************************************************************************/
-
-#ifndef INC_DUDLEY_TRIANGULARMESH
-#define INC_DUDLEY_TRIANGULARMESH
-
-/************************************************************************************/
-
-#include "Mesh.h"
-
-Dudley_Mesh *Dudley_TriangularMesh_Tri3(dim_t * numElements, double *Length, index_t order, index_t reduced_order,
-					bool optimize, esysUtils::JMPI& mpi_info);
-Dudley_Mesh *Dudley_TriangularMesh_Tet4(dim_t * numElements, double *Length, index_t order, index_t reduced_order,
-					bool optimize, esysUtils::JMPI& mpi_info);
-
-#endif				/* #ifndef INC_DUDLEY_TRIANGULARMESH */
diff --git a/dudley/src/Util.cpp b/dudley/src/Util.cpp
index 6d95c66..304189c 100644
--- a/dudley/src/Util.cpp
+++ b/dudley/src/Util.cpp
@@ -14,749 +14,247 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
-
-/*   Some utility routines: */
-
-/************************************************************************************/
-#include "esysUtils/maths.h"
 #include "Util.h"
 
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "esysUtils/index.h"
-#include "esysUtils/mem.h"
-#include <limits.h>
-#include "string.h"  /* for memcpy*/
-
-/************************************************************************************/
-
-/*   returns true if any of the values in the short array values is not equal to zero */
-
-bool Dudley_Util_anyNonZeroDouble(dim_t N, double *values)
-{
-    dim_t q;
-    for (q = 0; q < N; ++q)
-	if (ABS(values[q]) > 0)
-	    return TRUE;
-    return FALSE;
-}
-
-/************************************************************************************/
-
-/*   gathers double values out from in by index: */
-
-/*        out(1:numData,1:len)=in(1:numData,index(1:len)) */
-
-void Dudley_Util_Gather_double(dim_t len, index_t * index, dim_t numData, double *in, double *out)
-{
-    dim_t s, i;
-    for (s = 0; s < len; s++)
-    {
-	for (i = 0; i < numData; i++)
-	{
-	    out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)];
-	}
-    }
-}
-
-/************************************************************************************/
-
-/*   gathers maybelong values out from in by index: */
-
-/*        out(1:numData,1:len)=in(1:numData,index(1:len)) */
-
-void Dudley_Util_Gather_int(dim_t len, index_t * index, dim_t numData, index_t * in, index_t * out)
-{
-    dim_t s, i;
-    for (s = 0; s < len; s++)
-    {
-	for (i = 0; i < numData; i++)
-	{
-	    out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)];
-	}
-    }
-}
-
-/************************************************************************************/
-
-/*   adds a vector in into out using and index. */
-
-/*        out(1:numData,index[p])+=in(1:numData,p) where p = {k=1...len , index[k]<upperBound}*/
-
-void Dudley_Util_AddScatter(const dim_t len, const index_t * index, const dim_t numData, const double *in, double *out, const index_t upperBound)
-{
-    dim_t i, s;
-    for (s = 0; s < len; s++)
-    {
-	for (i = 0; i < numData; i++)
-	{
-	    if (index[s] < upperBound)
-	    {
-		out[INDEX2(i, index[s], numData)] += in[INDEX2(i, s, numData)];
-	    }
-	}
-    }
-}
-
-/*    multiplies two matrices */
+#include <escript/index.h>
 
-/*          A(1:A1,1:A2)=B(1:A1,1:B2)*C(1:B2,1:A2) */
-
-void Dudley_Util_SmallMatMult(dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C)
-{
-    dim_t i, j, s;
-    register double rtmp;
-    for (i = 0; i < A1; i++)
-    {
-	for (j = 0; j < A2; j++)
-	{
-	    rtmp = 0;
-	    for (s = 0; s < B2; s++)
-	    {
-		rtmp += B[INDEX2(i, s, A1)] * C[INDEX2(s, j, B2)];
-	    }
-	    A[INDEX2(i, j, A1)] = rtmp;
-	}
-    }
-}
+#include <algorithm> // std::sort
 
-/*    multiplies two sets of matrices: */
+namespace dudley {
+namespace util {
 
-/*        A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2,i) i=1,len */
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
 
-void Dudley_Util_SmallMatSetMult(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C)
+/// comparison function for sortValueAndIndex
+bool ValueAndIndexCompare(const std::pair<int,int> &i, const std::pair<int, int> &j)
 {
-    dim_t q, i, j, s;
-    register double rtmp;
-    for (q = 0; q < len; q++)
-    {
-	for (i = 0; i < A1; i++)
-	{
-	    for (j = 0; j < A2; j++)
-	    {
-		rtmp = 0;
-		for (s = 0; s < B2; s++)
-		    rtmp += B[INDEX3(i, s, q, A1, B2)] * C[INDEX3(s, j, q, B2, A2)];
-		A[INDEX3(i, j, q, A1, A2)] = rtmp;
-	    }
-	}
-    }
+    // to ensure we have a strict ordering as required by std
+    if (i.first == j.first)
+        return i.second < j.second;
+    return i.first < j.first;
 }
 
-/*    multiplies a set of matrices with a single matrix: */
-
-/*        A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2) i=1,len */
-
-void Dudley_Util_SmallMatSetMult1(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C)
-{
-    dim_t q, i, j, s;
-    register double rtmp;
-    for (q = 0; q < len; q++)
-    {
-	for (i = 0; i < A1; i++)
-	{
-	    for (j = 0; j < A2; j++)
-	    {
-		rtmp = 0;
-		for (s = 0; s < B2; s++)
-		    rtmp += B[INDEX3(i, s, q, A1, B2)] * C[INDEX2(s, j, B2)];
-		A[INDEX3(i, j, q, A1, A2)] = rtmp;
-	    }
-	}
-    }
-}
-
-/*    inverts the set of dim x dim matrices A(:,:,1:len) with dim=1,2,3 */
-/*    the determinant is returned. */
-
-void Dudley_Util_InvertSmallMat(dim_t len, dim_t dim, double *A, double *invA, double *det)
-{
-    dim_t q;
-    register double D, A11, A12, A13, A21, A22, A23, A31, A32, A33;
-
-    switch (dim)
-    {
-    case 1:
-	for (q = 0; q < len; q++)
-	{
-	    D = A[q];
-	    if (ABS(D) > 0)
-	    {
-		det[q] = D;
-		D = 1. / D;
-		invA[q] = D;
-	    }
-	    else
-	    {
-		Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix");
-		return;
-	    }
-	}
-	break;
-
-    case 2:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 2, 2)];
-	    A12 = A[INDEX3(0, 1, q, 2, 2)];
-	    A21 = A[INDEX3(1, 0, q, 2, 2)];
-	    A22 = A[INDEX3(1, 1, q, 2, 2)];
-
-	    D = A11 * A22 - A12 * A21;
-	    if (ABS(D) > 0)
-	    {
-		det[q] = D;
-		D = 1. / D;
-		invA[INDEX3(0, 0, q, 2, 2)] = A22 * D;
-		invA[INDEX3(1, 0, q, 2, 2)] = -A21 * D;
-		invA[INDEX3(0, 1, q, 2, 2)] = -A12 * D;
-		invA[INDEX3(1, 1, q, 2, 2)] = A11 * D;
-	    }
-	    else
-	    {
-		Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix");
-		return;
-	    }
-	}
-	break;
-
-    case 3:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 3, 3)];
-	    A21 = A[INDEX3(1, 0, q, 3, 3)];
-	    A31 = A[INDEX3(2, 0, q, 3, 3)];
-	    A12 = A[INDEX3(0, 1, q, 3, 3)];
-	    A22 = A[INDEX3(1, 1, q, 3, 3)];
-	    A32 = A[INDEX3(2, 1, q, 3, 3)];
-	    A13 = A[INDEX3(0, 2, q, 3, 3)];
-	    A23 = A[INDEX3(1, 2, q, 3, 3)];
-	    A33 = A[INDEX3(2, 2, q, 3, 3)];
-
-	    D = A11 * (A22 * A33 - A23 * A32) + A12 * (A31 * A23 - A21 * A33) + A13 * (A21 * A32 - A31 * A22);
-	    if (ABS(D) > 0)
-	    {
-		det[q] = D;
-		D = 1. / D;
-		invA[INDEX3(0, 0, q, 3, 3)] = (A22 * A33 - A23 * A32) * D;
-		invA[INDEX3(1, 0, q, 3, 3)] = (A31 * A23 - A21 * A33) * D;
-		invA[INDEX3(2, 0, q, 3, 3)] = (A21 * A32 - A31 * A22) * D;
-		invA[INDEX3(0, 1, q, 3, 3)] = (A13 * A32 - A12 * A33) * D;
-		invA[INDEX3(1, 1, q, 3, 3)] = (A11 * A33 - A31 * A13) * D;
-		invA[INDEX3(2, 1, q, 3, 3)] = (A12 * A31 - A11 * A32) * D;
-		invA[INDEX3(0, 2, q, 3, 3)] = (A12 * A23 - A13 * A22) * D;
-		invA[INDEX3(1, 2, q, 3, 3)] = (A13 * A21 - A11 * A23) * D;
-		invA[INDEX3(2, 2, q, 3, 3)] = (A11 * A22 - A12 * A21) * D;
-	    }
-	    else
-	    {
-		Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": Non-regular matrix");
-		return;
-	    }
-	}
-	break;
-
-    }
-    return;
-}
-
-/*    sets the derterminat of a set of dim x dim matrices A(:,:,1:len) with dim=1,2,3 */
-
-void Dudley_Util_DetOfSmallMat(dim_t len, dim_t dim, double *A, double *det)
+void sortValueAndIndex(ValueAndIndexList& array)
 {
-    dim_t q;
-    register double A11, A12, A13, A21, A22, A23, A31, A32, A33;
-
-    switch (dim)
-    {
-    case 1:
-	for (q = 0; q < len; q++)
-	{
-	    det[q] = A[q];
-	}
-	break;
-
-    case 2:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 2, 2)];
-	    A12 = A[INDEX3(0, 1, q, 2, 2)];
-	    A21 = A[INDEX3(1, 0, q, 2, 2)];
-	    A22 = A[INDEX3(1, 1, q, 2, 2)];
-
-	    det[q] = A11 * A22 - A12 * A21;
-	}
-	break;
-
-    case 3:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 3, 3)];
-	    A21 = A[INDEX3(1, 0, q, 3, 3)];
-	    A31 = A[INDEX3(2, 0, q, 3, 3)];
-	    A12 = A[INDEX3(0, 1, q, 3, 3)];
-	    A22 = A[INDEX3(1, 1, q, 3, 3)];
-	    A32 = A[INDEX3(2, 1, q, 3, 3)];
-	    A13 = A[INDEX3(0, 2, q, 3, 3)];
-	    A23 = A[INDEX3(1, 2, q, 3, 3)];
-	    A33 = A[INDEX3(2, 2, q, 3, 3)];
-
-	    det[q] = A11 * (A22 * A33 - A23 * A32) + A12 * (A31 * A23 - A21 * A33) + A13 * (A21 * A32 - A31 * A22);
-	}
-	break;
-
-    }
-    return;
+    std::sort(array.begin(), array.end(), ValueAndIndexCompare);
 }
 
-/*    returns the normalized vector Normal[dim,len] orthogonal to A(:,0,q) and A(:,1,q) in the case of dim=3  */
-/*    or the vector A(:,0,q) in the case of dim=2                                             */
-
-void Dudley_NormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *Normal)
+void gather(int len, const index_t* index, int numData, const double* in,
+            double* out)
 {
-    dim_t q;
-    register double A11, A12, CO_A13, A21, A22, CO_A23, A31, A32, CO_A33, length, invlength;
-
-    switch (dim)
-    {
-    case 1:
-	for (q = 0; q < len; q++)
-	    Normal[q] = 1;
-	break;
-    case 2:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 2, dim1)];
-	    A21 = A[INDEX3(1, 0, q, 2, dim1)];
-	    length = sqrt(A11 * A11 + A21 * A21);
-	    if (length <= 0)
-	    {
-		Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero.");
-		return;
-	    }
-	    else
-	    {
-		invlength = 1. / length;
-		Normal[INDEX2(0, q, 2)] = A21 * invlength;
-		Normal[INDEX2(1, q, 2)] = -A11 * invlength;
-	    }
-	}
-	break;
-    case 3:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 3, dim1)];
-	    A21 = A[INDEX3(1, 0, q, 3, dim1)];
-	    A31 = A[INDEX3(2, 0, q, 3, dim1)];
-	    A12 = A[INDEX3(0, 1, q, 3, dim1)];
-	    A22 = A[INDEX3(1, 1, q, 3, dim1)];
-	    A32 = A[INDEX3(2, 1, q, 3, dim1)];
-	    CO_A13 = A21 * A32 - A31 * A22;
-	    CO_A23 = A31 * A12 - A11 * A32;
-	    CO_A33 = A11 * A22 - A21 * A12;
-	    length = sqrt(CO_A13 * CO_A13 + CO_A23 * CO_A23 + CO_A33 * CO_A33);
-	    if (length <= 0)
-	    {
-		Dudley_setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero.");
-		return;
-	    }
-	    else
-	    {
-		invlength = 1. / length;
-		Normal[INDEX2(0, q, 3)] = CO_A13 * invlength;
-		Normal[INDEX2(1, q, 3)] = CO_A23 * invlength;
-		Normal[INDEX2(2, q, 3)] = CO_A33 * invlength;
-	    }
-
-	}
-	break;
-
+    for (int s = 0; s < len; s++) {
+        for (int i = 0; i < numData; i++) {
+            out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)];
+        }
     }
-    return;
 }
 
-/*    return the length of the vector which is orthogonal to the vectors A(:,0,q) and A(:,1,q) in the case of dim=3 */
-/*    or the vector A(:,0,q) in the case of dim=2                                                                   */
-
-void Dudley_LengthOfNormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *length)
+template<typename Scalar>
+void addScatter(int len, const index_t* index, int numData,
+                const Scalar* in, Scalar* out, index_t upperBound)
 {
-    dim_t q;
-    double A11, A12, CO_A13, A21, A22, CO_A23, A31, A32, CO_A33;
-
-    switch (dim)
-    {
-    case 1:
-	for (q = 0; q < len; q++)
-	    length[q] = 1;
-	break;
-    case 2:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 2, dim1)];
-	    A21 = A[INDEX3(1, 0, q, 2, dim1)];
-	    length[q] = sqrt(A11 * A11 + A21 * A21);
-	}
-	break;
-    case 3:
-	for (q = 0; q < len; q++)
-	{
-	    A11 = A[INDEX3(0, 0, q, 3, dim1)];
-	    A21 = A[INDEX3(1, 0, q, 3, dim1)];
-	    A31 = A[INDEX3(2, 0, q, 3, dim1)];
-	    A12 = A[INDEX3(0, 1, q, 3, dim1)];
-	    A22 = A[INDEX3(1, 1, q, 3, dim1)];
-	    A32 = A[INDEX3(2, 1, q, 3, dim1)];
-	    CO_A13 = A21 * A32 - A31 * A22;
-	    CO_A23 = A31 * A12 - A11 * A32;
-	    CO_A33 = A11 * A22 - A21 * A12;
-	    length[q] = sqrt(CO_A13 * CO_A13 + CO_A23 * CO_A23 + CO_A33 * CO_A33);
-	}
-	break;
-
+    for (int s = 0; s < len; s++) {
+        for (int i = 0; i < numData; i++) {
+            if (index[s] < upperBound) {
+                out[INDEX2(i, index[s], numData)] += in[INDEX2(i, s, numData)];
+            }
+        }
     }
-    return;
 }
 
-/* inverts the map map of length len */
-/* there is no range checking! */
-/* at output Map[invMap[i]]=i for i=0:lenInvMap */
+template
+void addScatter<real_t>(int len, const index_t* index, int numData,
+                                 const real_t* in, real_t* out, index_t upperBound);
+template
+void addScatter<cplx_t>(int len, const index_t* index, int numData,
+                                 const cplx_t* in, cplx_t* out, index_t upperBound);
 
-void Dudley_Util_InvertMap(dim_t lenInvMap, index_t * invMap, dim_t lenMap, index_t * Map)
+void smallMatMult(int A1, int A2, double* A, int B2, const double* B,
+                  const double* C)
 {
-    dim_t i;
-    for (i = 0; i < lenInvMap; i++)
-	invMap[i] = 0;
-    for (i = 0; i < lenMap; i++)
-    {
-	if (Map[i] >= 0)
-	    invMap[Map[i]] = i;
+    for (int i = 0; i < A1; i++) {
+        for (int j = 0; j < A2; j++) {
+            double sum = 0.;
+            for (int s = 0; s < B2; s++)
+                sum += B[INDEX2(i,s,A1)] * C[INDEX2(s,j,B2)];
+            A[INDEX2(i,j,A1)] = sum;
+        }
     }
 }
 
-/* orders a Dudley_Util_ValueAndIndex array by value */
-/* it is assumed that n is large */
-
-int Dudley_Util_ValueAndIndex_compar(const void *arg1, const void *arg2)
-{
-    Dudley_Util_ValueAndIndex *e1, *e2;
-    e1 = (Dudley_Util_ValueAndIndex *) arg1;
-    e2 = (Dudley_Util_ValueAndIndex *) arg2;
-    if (e1->value < e2->value)
-	return -1;
-    if (e1->value > e2->value)
-	return 1;
-    if (e1->index < e2->index)
-	return -1;
-    if (e1->index > e2->index)
-	return 1;
-    return 0;
-}
-
-void Dudley_Util_sortValueAndIndex(dim_t n, Dudley_Util_ValueAndIndex * array)
+void smallMatSetMult1(int len, int A1, int A2, double* A, int B2,
+                      const double* B, const double* C)
 {
-    /* OMP : needs parallelization ! */
-    qsort(array, n, sizeof(Dudley_Util_ValueAndIndex), Dudley_Util_ValueAndIndex_compar);
-}
-
-/************************************************************************************/
-
-/* calculates the minimum value from a dim X N integer array */
-
-index_t Dudley_Util_getMinInt(dim_t dim, dim_t N, index_t * values)
-{
-    dim_t i, j;
-    index_t out, out_local;
-    out = INDEX_T_MAX;
-    if (values != NULL && dim * N > 0)
-    {
-	out = values[0];
-#pragma omp parallel private(out_local)
-	{
-	    out_local = out;
-#pragma omp for private(i,j) schedule(static)
-	    for (j = 0; j < N; j++)
-	    {
-		for (i = 0; i < dim; i++)
-		    out_local = MIN(out_local, values[INDEX2(i, j, dim)]);
-	    }
-#pragma omp critical
-	    out = MIN(out_local, out);
-	}
+    for (int q = 0; q < len; q++) {
+        for (int i = 0; i < A1; i++) {
+            for (int j = 0; j < A2; j++) {
+                double sum = 0.;
+                for (int s = 0; s < B2; s++)
+                    sum += B[INDEX3(i,s,q,A1,B2)] * C[INDEX2(s,j,B2)];
+                A[INDEX3(i,j,q,A1,A2)] = sum;
+            }
+        }
     }
-    return out;
 }
 
-/* calculates the maximum value from a dim X N integer array */
-
-index_t Dudley_Util_getMaxInt(dim_t dim, dim_t N, index_t * values)
+void normalVector(int len, int dim, int dim1, const double* A, double* Normal)
 {
-    dim_t i, j;
-    index_t out, out_local;
-    out = -INDEX_T_MAX;
-    if (values != NULL && dim * N > 0)
-    {
-	out = values[0];
-#pragma omp parallel private(out_local)
-	{
-	    out_local = out;
-#pragma omp for private(i,j) schedule(static)
-	    for (j = 0; j < N; j++)
-	    {
-		for (i = 0; i < dim; i++)
-		{
-		    out_local = MAX(out_local, values[INDEX2(i, j, dim)]);
-
-		}
-	    }
-#pragma omp critical
-	    out = MAX(out_local, out);
-	}
+    int q;
+
+    switch (dim) {
+        case 1:
+            for (q = 0; q < len; q++)
+                Normal[q] = 1.;
+            break;
+        case 2:
+            for (q = 0; q < len; q++) {
+                const double A11 = A[INDEX3(0,0,q,2,dim1)];
+                const double A21 = A[INDEX3(1,0,q,2,dim1)];
+                const double length = sqrt(A11*A11+A21*A21);
+                if (length <= 0) {
+                    throw DudleyException("normalVector: area equals zero.");
+                } else {
+                    const double invlength = 1./length;
+                    Normal[INDEX2(0,q,2)] =  A21*invlength;
+                    Normal[INDEX2(1,q,2)] = -A11*invlength;
+                }
+            }
+            break;
+        case 3:
+            for (q = 0; q < len; q++) {
+                const double A11 = A[INDEX3(0,0,q,3,dim1)];
+                const double A21 = A[INDEX3(1,0,q,3,dim1)];
+                const double A31 = A[INDEX3(2,0,q,3,dim1)];
+                const double A12 = A[INDEX3(0,1,q,3,dim1)];
+                const double A22 = A[INDEX3(1,1,q,3,dim1)];
+                const double A32 = A[INDEX3(2,1,q,3,dim1)];
+                const double CO_A13 = A21*A32-A31*A22;
+                const double CO_A23 = A31*A12-A11*A32;
+                const double CO_A33 = A11*A22-A21*A12;
+                const double length = sqrt(CO_A13*CO_A13 + CO_A23*CO_A23
+                                           + CO_A33*CO_A33);
+                if (length <= 0) {
+                    throw DudleyException("normalVector: area equals zero.");
+                } else {
+                    const double invlength = 1./length;
+                    Normal[INDEX2(0,q,3)] = CO_A13*invlength;
+                    Normal[INDEX2(1,q,3)] = CO_A23*invlength;
+                    Normal[INDEX2(2,q,3)] = CO_A33*invlength;
+                }
+            }
+            break;
     }
-    return out;
 }
 
-/************************************************************************************/
-
-/* calculates the minimum value from a dim X N integer array */
-
-index_t Dudley_Util_getFlaggedMinInt(dim_t dim, dim_t N, index_t * values, index_t ignore)
+IndexPair getMinMaxInt(int dim, dim_t N, const index_t* values)
 {
-    dim_t i, j;
-    index_t out, out_local;
-    out = INDEX_T_MAX;
-    if (values != NULL && dim * N > 0)
-    {
-	out = values[0];
-#pragma omp parallel private(out_local)
-	{
-	    out_local = out;
-#pragma omp for private(i,j) schedule(static)
-	    for (j = 0; j < N; j++)
-	    {
-		for (i = 0; i < dim; i++)
-		    if (values[INDEX2(i, j, dim)] != ignore)
-			out_local = MIN(out_local, values[INDEX2(i, j, dim)]);
-	    }
+    index_t vmin = escript::DataTypes::index_t_max();
+    index_t vmax = escript::DataTypes::index_t_min();
+    if (values && dim*N > 0) {
+        vmin = vmax = values[0];
+#pragma omp parallel
+        {
+            index_t vmin_local = vmin;
+            index_t vmax_local = vmax;
+#pragma omp for
+            for (index_t j = 0; j < N; j++) {
+                for (int i = 0; i < dim; i++) {
+                    vmin_local = std::min(vmin_local, values[INDEX2(i,j,dim)]);
+                    vmax_local = std::max(vmax_local, values[INDEX2(i,j,dim)]);
+                }
+            }
 #pragma omp critical
-	    out = MIN(out_local, out);
-	}
+            {
+                vmin = std::min(vmin_local, vmin);
+                vmax = std::max(vmax_local, vmax);
+            }
+        }
     }
-    return out;
+    return IndexPair(vmin,vmax);
 }
 
-/* calculates the maximum value from a dim X N integer array */
-
-index_t Dudley_Util_getFlaggedMaxInt(dim_t dim, dim_t N, index_t * values, index_t ignore)
+IndexPair getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore)
 {
-    dim_t i, j;
-    index_t out, out_local;
-    out = -INDEX_T_MAX;
-    if (values != NULL && dim * N > 0)
-    {
-	out = values[0];
-#pragma omp parallel private(out_local)
-	{
-	    out_local = out;
-#pragma omp for private(i,j) schedule(static)
-	    for (j = 0; j < N; j++)
-	    {
-		for (i = 0; i < dim; i++)
-		    if (values[INDEX2(i, j, dim)] != ignore)
-			out_local = MAX(out_local, values[INDEX2(i, j, dim)]);
-	    }
+    index_t vmin = escript::DataTypes::index_t_max();
+    index_t vmax = escript::DataTypes::index_t_min();
+    if (values && N > 0) {
+        vmin = vmax = values[0];
+#pragma omp parallel
+        {
+            index_t vmin_local = vmin;
+            index_t vmax_local = vmax;
+#pragma omp for
+            for (index_t i = 0; i < N; i++) {
+                if (values[i] != ignore) {
+                    vmin_local = std::min(vmin_local, values[i]);
+                    vmax_local = std::max(vmax_local, values[i]);
+                }
+            }
 #pragma omp critical
-	    out = MAX(out_local, out);
-	}
-    }
-    return out;
-}
-
-/* set the index of the positive entries in mask. The length of index is returned. */
-
-dim_t Dudley_Util_packMask(dim_t N, index_t * mask, index_t * index)
-{
-    dim_t out, k;
-    out = 0;
-    /*OMP */
-    for (k = 0; k < N; k++)
-    {
-	if (mask[k] >= 0)
-	{
-	    index[out] = k;
-	    out++;
-	}
+            {
+                vmin = std::min(vmin_local, vmin);
+                vmax = std::max(vmax_local, vmax);
+            }
+        }
     }
-    return out;
-}
-
-/* returns true if array contains value */
-bool Dudley_Util_isAny(dim_t N, index_t * array, index_t value)
-{
-    bool out = FALSE;
-    dim_t i;
-#pragma omp parallel for private(i) schedule(static) reduction(||:out)
-    for (i = 0; i < N; i++)
-	out = out || (array[i] == value);
-    return out;
+    return IndexPair(vmin,vmax);
 }
 
-/* calculates the cummulative sum in array and returns the total sum */
-index_t Dudley_Util_cumsum(dim_t N, index_t * array)
+std::vector<index_t> packMask(const std::vector<short>& mask)
 {
-    index_t out = 0, tmp;
-    dim_t i;
-#ifdef _OPENMP
-    index_t *partial_sums = NULL, sum;
-    partial_sums = new  index_t[omp_get_max_threads()];
-#pragma omp parallel private(sum,i,tmp)
-    {
-	sum = 0;
-#pragma omp for schedule(static)
-	for (i = 0; i < N; ++i)
-	    sum += array[i];
-	partial_sums[omp_get_thread_num()] = sum;
-#pragma omp barrier
-#pragma omp master
-	{
-	    out = 0;
-	    for (i = 0; i < omp_get_max_threads(); ++i)
-	    {
-		tmp = out;
-		out += partial_sums[i];
-		partial_sums[i] = tmp;
-	    }
-	}
-#pragma omp barrier
-	sum = partial_sums[omp_get_thread_num()];
-#pragma omp for schedule(static)
-	for (i = 0; i < N; ++i)
-	{
-	    tmp = sum;
-	    sum += array[i];
-	    array[i] = tmp;
-	}
+    std::vector<index_t> index;
+    for (index_t k = 0; k < mask.size(); k++) {
+        if (mask[k] >= 0) {
+            index.push_back(k);
+        }
     }
-    delete[] partial_sums;
-#else
-    for (i = 0; i < N; ++i)
-    {
-	tmp = out;
-	out += array[i];
-	array[i] = tmp;
-    }
-#endif
-    return out;
+    return index;
 }
 
-void Dudley_Util_setValuesInUse(const index_t * values, const dim_t numValues, dim_t * numValuesInUse,
-				index_t ** valuesInUse, esysUtils::JMPI& mpiinfo)
+void setValuesInUse(const int* values, dim_t numValues,
+                    std::vector<int>& valuesInUse, escript::JMPI mpiinfo)
 {
-    dim_t i;
-    index_t lastFoundValue = INDEX_T_MIN, minFoundValue, local_minFoundValue, *newValuesInUse = NULL;
-    register index_t itmp;
-    bool allFound = FALSE;
-    dim_t nv = 0;
-
-    while (!allFound)
-    {
-	/* 
-	 *  find smallest value bigger than lastFoundValue 
-	 */
-	minFoundValue = INDEX_T_MAX;
-#pragma omp parallel private(local_minFoundValue)
-	{
-	    local_minFoundValue = minFoundValue;
-#pragma omp for private(i,itmp) schedule(static)
-	    for (i = 0; i < numValues; i++)
-	    {
-		itmp = values[i];
-		if ((itmp > lastFoundValue) && (itmp < local_minFoundValue))
-		    local_minFoundValue = itmp;
-	    }
+    const int MAX_VALUE = std::numeric_limits<int>::max();
+    int lastFoundValue = std::numeric_limits<int>::min();
+    bool allFound = false;
+
+    valuesInUse.clear();
+
+    while (!allFound) {
+        // find smallest value bigger than lastFoundValue
+        int minFoundValue = MAX_VALUE;
+#pragma omp parallel
+        {
+            int local_minFoundValue = minFoundValue;
+#pragma omp for
+            for (index_t i = 0; i < numValues; i++) {
+                const int val = values[i];
+                if (val > lastFoundValue && val < local_minFoundValue)
+                    local_minFoundValue = val;
+            }
 #pragma omp critical
-	    {
-		if (local_minFoundValue < minFoundValue)
-		    minFoundValue = local_minFoundValue;
-	    }
-
-	}
+            {
+                if (local_minFoundValue < minFoundValue)
+                    minFoundValue = local_minFoundValue;
+            }
+        }
 #ifdef ESYS_MPI
-	local_minFoundValue = minFoundValue;
-	MPI_Allreduce(&local_minFoundValue, &minFoundValue, 1, MPI_INT, MPI_MIN, mpiinfo->comm);
+        int local_minFoundValue = minFoundValue;
+        MPI_Allreduce(&local_minFoundValue, &minFoundValue, 1, MPI_INT,
+                      MPI_MIN, mpiinfo->comm);
 #endif
-	/* if we found a new tag we need to add this too the valuesInUseList */
 
-	if (minFoundValue < INDEX_T_MAX)
-	{
-	    newValuesInUse = new index_t[nv + 1];
-	    if (*valuesInUse != NULL)
-	    {
-		memcpy(newValuesInUse, *valuesInUse, sizeof(index_t) * nv);
-		delete[] *valuesInUse;
-	    }
-	    newValuesInUse[nv] = minFoundValue;
-	    *valuesInUse = newValuesInUse;
-	    newValuesInUse = NULL;
-	    nv++;
-	    lastFoundValue = minFoundValue;
-	}
-	else
-	{
-	    allFound = TRUE;
-	}
+        // if we found a new value we need to add this to valuesInUse
+        if (minFoundValue < MAX_VALUE) {
+            valuesInUse.push_back(minFoundValue);
+            lastFoundValue = minFoundValue;
+        } else {
+            allFound = true;
+        }
     }
-    *numValuesInUse = nv;
 }
 
-#ifdef ESYS_MPI
-void Dudley_printDoubleArray(FILE * fid, dim_t n, double *array, char *name)
-{
-    index_t i;
-
-    if (name)
-	fprintf(fid, "%s [ ", name);
-    else
-	fprintf(fid, "[ ");
-    for (i = 0; i < (n < 60 ? n : 60); i++)
-	fprintf(fid, "%g ", array[i]);
-    if (n >= 30)
-	fprintf(fid, "... ");
-    fprintf(fid, "]\n");
-}
-
-void Dudley_printIntArray(FILE * fid, dim_t n, int *array, char *name)
-{
-    index_t i;
+} // namespace util
+} // namespace dudley
 
-    if (name)
-	fprintf(fid, "%s [ ", name);
-    else
-	fprintf(fid, "[ ");
-    for (i = 0; i < (n < 60 ? n : 60); i++)
-	fprintf(fid, "%d ", array[i]);
-    if (n >= 30)
-	fprintf(fid, "... ");
-    fprintf(fid, "]\n");
-}
-
-void Dudley_printMaskArray(FILE * fid, dim_t n, int *array, char *name)
-{
-    index_t i;
-
-    if (name)
-	fprintf(fid, "%s [ ", name);
-    else
-	fprintf(fid, "[ ");
-    for (i = 0; i < (n < 60 ? n : 60); i++)
-	if (array[i] != -1)
-	    fprintf(fid, "%3d ", array[i]);
-	else
-	    fprintf(fid, "  * ");
-    if (n >= 30)
-	fprintf(fid, "... ");
-    fprintf(fid, "]\n");
-}
-#endif
diff --git a/dudley/src/Util.h b/dudley/src/Util.h
index 7600419..3828400 100644
--- a/dudley/src/Util.h
+++ b/dudley/src/Util.h
@@ -14,57 +14,66 @@
 *
 *****************************************************************************/
 
-/************************************************************************************/
+/// Some utility routines
 
-/*   Some utility routines: */
+#ifndef __DUDLEY_UTIL_H__
+#define __DUDLEY_UTIL_H__
 
-/************************************************************************************/
+#include "Dudley.h"
 
-#ifndef INC_DUDLEY_UTIL
-#define INC_DUDLEY_UTIL
+#include <escript/Data.h>
 
-#include "Dudley.h"
+namespace dudley {
+namespace util {
+
+typedef std::pair<index_t,index_t> IndexPair;
+typedef std::vector<IndexPair> ValueAndIndexList;
+
+/// orders a ValueAndIndexList by value.
+void sortValueAndIndex(ValueAndIndexList& array);
+
+/// gathers values into array `out` from array `in` using `index`:
+///   out(1:numData, 1:len) := in(1:numData, index(1:len))
+void gather(int len, const index_t* index, int numData, const double* in,
+            double* out);
+
+/// adds array `in` into `out` using an `index`:
+///   out(1:numData,index[p])+=in(1:numData,p) where
+///   p={k=1...len, index[k]<upperBound}
+template<typename Scalar>
+void addScatter(int len, const index_t* index, int numData,
+                const Scalar* in, Scalar* out, index_t upperBound);
+
+/// multiplies two matrices: A(1:A1,1:A2) := B(1:A1,1:B2)*C(1:B2,1:A2)
+void smallMatMult(int A1, int A2, double* A, int B2, const double* B,
+                  const double* C);
+
+/// multiplies a set of matrices with a single matrix:
+///   A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2) for i=1,len
+void smallMatSetMult1(int len, int A1, int A2, double* A, int B2,
+                      const double* B, const double* C);
+
+/// returns the normalized vector normal[dim,len] orthogonal to A(:,0,q) and
+/// A(:,1,q) in the case of dim=3, or the vector A(:,0,q) in the case of dim=2
+void normalVector(int len, int dim, int dim1, const double* A, double* normal);
+
+/// calculates the minimum and maximum value from an integer array of length
+/// N x dim
+IndexPair getMinMaxInt(int dim, dim_t N, const index_t* values);
+
+/// calculates the minimum and maximum value from an integer array of length N
+/// disregarding the value `ignore`
+IndexPair getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore);
+
+/// extracts the positive entries in `mask` returning a contiguous vector of
+/// those entries
+std::vector<index_t> packMask(const std::vector<short>& mask);
+
+void setValuesInUse(const int* values, dim_t numValues,
+                    std::vector<int>& valuesInUse, escript::JMPI mpiInfo);
+
+} // namespace util
+} // namespace dudley
+
+#endif // __DUDLEY_UTIL_H__
 
-/************************************************************************************/
-
-void Dudley_Util_Gather_double(dim_t len, index_t * index, dim_t numData, double *in, double *out);
-void Dudley_Util_Gather_int(dim_t len, index_t * index, dim_t numData, index_t * in, index_t * out);
-void Dudley_Util_AddScatter(const dim_t len, const index_t * index, const dim_t numData, const double *in, double *out, const index_t upperBound);
-void Dudley_Util_SmallMatMult(dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C);
-void Dudley_Util_SmallMatSetMult(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C);
-void Dudley_Util_SmallMatSetMult1(dim_t len, dim_t A1, dim_t A2, double *A, dim_t B2, const double *B, const double *C);
-void Dudley_Util_InvertSmallMat(dim_t len, dim_t dim, double *A, double *invA, double *det);
-void Dudley_Util_DetOfSmallMat(dim_t len, dim_t dim, double *A, double *det);
-void Dudley_NormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *Normal);
-void Dudley_LengthOfNormalVector(dim_t len, dim_t dim, dim_t dim1, double *A, double *length);
-void Dudley_Util_InvertMap(dim_t, index_t *, dim_t, index_t *);
-index_t Dudley_Util_getMaxInt(dim_t dim, dim_t N, index_t * values);
-index_t Dudley_Util_getMinInt(dim_t dim, dim_t N, index_t * values);
-index_t Dudley_Util_getFlaggedMaxInt(dim_t dim, dim_t N, index_t * values, index_t ignore);
-index_t Dudley_Util_getFlaggedMinInt(dim_t dim, dim_t N, index_t * values, index_t ignore);
-dim_t Dudley_Util_packMask(dim_t N, index_t * mask, index_t * index);
-bool Dudley_Util_isAny(dim_t N, index_t * array, index_t value);
-index_t Dudley_Util_cumsum(dim_t, index_t *);
-bool Dudley_Util_anyNonZeroDouble(dim_t N, double *values);
-void Dudley_Util_setValuesInUse(const index_t * values, const dim_t numValues, dim_t * numValuesInUse,
-				index_t ** valuesInUse, esysUtils::JMPI& mpiinfo);
-
-#ifdef ESYS_MPI
-void Dudley_printDoubleArray(FILE * fid, dim_t n, double *array, char *name);
-void Dudley_printIntArray(FILE * fid, dim_t n, int *array, char *name);
-void Dudley_printMaskArray(FILE * fid, dim_t n, int *array, char *name);
-#endif
-
-/* Dudley_Util_orderValueAndIndex is used to sort items by a value */
-/* index points to the location of the original item array. */
-/* it can be used to reorder the array */
-struct Dudley_Util_ValueAndIndex {
-    index_t index;
-    index_t value;
-};
-typedef struct Dudley_Util_ValueAndIndex Dudley_Util_ValueAndIndex;
-
-void Dudley_Util_sortValueAndIndex(dim_t n, Dudley_Util_ValueAndIndex * array);
-int Dudley_Util_ValueAndIndex_compar(const void *, const void *);
-
-#endif				/* #ifndef INC_UTIL_UTIL */
diff --git a/dudley/src/dudleycpp.cpp b/dudley/src/dudleycpp.cpp
new file mode 100644
index 0000000..9c230b2
--- /dev/null
+++ b/dudley/src/dudleycpp.cpp
@@ -0,0 +1,191 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <dudley/Dudley.h>
+#include <dudley/DomainFactory.h>
+#include <dudley/DudleyDomain.h>
+
+#include <escript/ExceptionTranslators.h>
+
+#include <boost/python.hpp>
+#include <boost/python/def.hpp>
+#include <boost/python/module.hpp>
+#include <boost/python/detail/defaults_gen.hpp>
+#include <boost/version.hpp>
+
+using namespace boost::python;
+
+BOOST_PYTHON_MODULE(dudleycpp)
+{
+// This feature was added in boost v1.34
+#if ((BOOST_VERSION/100)%1000 > 34) || (BOOST_VERSION/100000 >1)
+    // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
+    docstring_options docopt(true, true, false);
+#endif
+
+    scope().attr("__doc__") = "To use this module, please import esys.dudley";
+
+    // register escript's default translators
+    REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS;
+    register_exception_translator<dudley::DudleyException>(&escript::RuntimeErrorTranslator);
+
+  def("LoadMesh", dudley::DudleyDomain::load,
+      (arg("fileName") = "file.nc"), ":rtype: `DudleyDomain`");
+
+  def("ReadMesh", dudley::readMesh,
+      (arg("fileName")="file.fly", arg("integrationOrder")=-1, arg("reducedIntegrationOrder")=-1, arg("optimize")=true)
+	,"Read a mesh from a fly file. For MPI parallel runs fan out the mesh to multiple processes.\n\n"
+":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
+":param integrationOrder: order of the quadrature scheme. Ignored.\n"
+":type integrationOrder: ``int``\n"
+":param reducedIntegrationOrder: order of reduced quadrature scheme. Ignored.\n"
+":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``");
+
+  def("ReadGmsh", dudley::readGmsh,
+      (arg("fileName") = "file.msh",
+       arg("numDim"), 
+       arg("integrationOrder") = -1, 
+       arg("reducedIntegrationOrder") = -1, 
+       arg("optimize") = true)
+,"Read a gmsh mesh file\n\n"
+":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
+":param integrationOrder: order of the quadrature scheme. Always 2.\n"
+":type integrationOrder: ``int``\n"
+":param reducedIntegrationOrder: order of reduced quadrature scheme. Always 0.\n"
+":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``\n");
+
+  def ("__Brick_driver", dudley::brick_driver, arg("args"));
+  def ("__Rectangle_driver", dudley::rectangle_driver, arg("args"));
+
+  class_<dudley::DudleyDomain, bases<escript::AbstractContinuousDomain> >
+      ("DudleyDomain", "A concrete class representing a dudley domain. For more details, please consult the C++ documentation.", no_init)
+      .def(init<const dudley::DudleyDomain&>())
+      .def("write", &dudley::DudleyDomain::write, args("filename"),
+"Write the current mesh to a file with the given name.")
+      .def("print_mesh_info", &dudley::DudleyDomain::Print_Mesh_Info, (arg("full")=false),
+":param full:\n:type full: ``bool``")
+      .def("dump", &dudley::DudleyDomain::dump, args("fileName")
+,"dumps the mesh to a file with the given name.")
+      .def("getDescription", &dudley::DudleyDomain::getDescription,
+":return: a description for this domain\n:rtype: ``string``")
+      .def("getDim", &dudley::DudleyDomain::getDim,":rtype: ``int``")
+      .def("getDataShape", &dudley::DudleyDomain::getDataShape, args("functionSpaceCode"),
+":return: a pair (dps, ns) where dps=the number of data points per sample, and ns=the number of samples\n:rtype: ``tuple``")
+      .def("getNumDataPointsGlobal", &dudley::DudleyDomain::getNumDataPointsGlobal,
+":return: the number of data points summed across all MPI processes\n"
+":rtype: ``int``")
+      .def("addPDEToSystem", &dudley::DudleyDomain::addPDEToSystem,
+args("mat", "rhs","A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
+"adds a PDE onto the stiffness matrix mat and a rhs\n\n"
+":param mat:\n:type mat: `OperatorAdapter`\n:param rhs:\n:type rhs: `Data`\n"
+":param A:\n:type A: `Data`\n"
+":param B:\n:type B: `Data`\n"
+":param C:\n:type C: `Data`\n"
+":param D:\n:type D: `Data`\n"
+":param X:\n:type X: `Data`\n"
+":param Y:\n:type Y: `Data`\n"
+":param d:\n:type d: `Data`\n"
+":param d_contact:\n:type d_contact: `Data`\n"
+":param y_contact:\n:type y_contact: `Data`\n"
+)
+      .def("addPDEToLumpedSystem", &dudley::DudleyDomain::addPDEToLumpedSystem,
+args("mat", "D", "d"),
+"adds a PDE onto the lumped stiffness matrix\n\n"
+":param mat:\n:type mat: `Data`\n"
+":param D:\n:type D: `Data`\n"
+":param d:\n:type d: `Data`\n"
+":param useHRZ:\n:type useHRZ: bool\n"
+)
+      .def("addPDEToRHS", &dudley::DudleyDomain::addPDEToRHS, 
+args("rhs", "X", "Y", "y", "y_contact"),
+"adds a PDE onto the stiffness matrix mat and a rhs\n\n"
+":param rhs:\n:type rhs: `Data`\n"
+":param X:\n:type X: `Data`\n"
+":param Y:\n:type Y: `Data`\n"
+":param y:\n:type y: `Data`\n"
+":param y_contact:\n:type y_contact: `Data`"
+)
+      .def("addPDEToTransportProblem", &dudley::DudleyDomain::addPDEToTransportProblem,
+args( "tp", "source", "M", "A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
+":param tp:\n:type tp: `AbstractTransportProblem`\n"
+":param source:\n:type source: `Data`\n"
+":param M:\n:type M: `Data`\n"
+":param A:\n:type A: `Data`\n"
+":param B:\n:type B: `Data`\n"
+":param C:\n:type C: `Data`\n"
+":param D:\n:type D: `Data`\n"
+":param X:\n:type X: `Data`\n"
+":param Y:\n:type Y: `Data`\n"
+":param d:\n:type d: `Data`\n"
+":param y:\n:type y: `Data`\n"
+":param d_contact:\n:type d_contact: `Data`\n"
+":param y_contact:\n:type y_contact: `Data`\n"
+)
+      .def("newOperator", &dudley::DudleyDomain::newSystemMatrix,
+args("row_blocksize", "row_functionspace", "column_blocksize", "column_functionspace", "type"),
+"creates a stiffness matrix and initializes it with zeros\n\n"
+":param row_blocksize:\n:type row_blocksize: ``int``\n"
+":param row_functionspace:\n:type row_functionspace: `FunctionSpace`\n"
+":param column_blocksize:\n:type column_blocksize: ``int``\n"
+":param column_functionspace:\n:type column_functionspace: `FunctionSpace`\n"
+":param type:\n:type type: ``int``\n"
+)
+      .def("newTransportProblem", &dudley::DudleyDomain::newTransportProblem,
+args("theta", "blocksize", "functionspace", "type"),
+"creates a TransportProblem\n\n"
+":param theta:\n:type theta: ``float``\n"
+":param blocksize:\n:type blocksize: ``int``\n"
+":param functionspace:\n:type functionspace: `FunctionSpace`\n"
+":param type:\n:type type: ``int``\n"
+)
+      .def("getSystemMatrixTypeId", &dudley::DudleyDomain::getSystemMatrixTypeId,
+args("options"),
+":return: the identifier of the matrix type to be used for the global stiffness matrix when particular solver options are used.\n"
+":rtype: ``int``\n"
+":param options:\n:type options: `SolverBuddy`\n"
+)
+      .def("getTransportTypeId", &dudley::DudleyDomain::getTransportTypeId,
+args("solver", "preconditioner", "package", "symmetry"),
+":return: the identifier of the transport problem type to be used when a particular solver, preconditioner, package and symmetric matrix is used.\n"
+":rtype: ``int``\n"
+":param solver:\n:type solver: ``int``\n"
+":param preconditioner:\n:type preconditioner: ``int``\n"
+":param package:\n:type package: ``int``\n"
+":param symmetry:\n:type symmetry: ``int``\n"
+)
+      .def("setX", &dudley::DudleyDomain::setNewX,
+args("arg"), "assigns new location to the domain\n\n:param arg:\n:type arg: `Data`")
+      .def("getX", &dudley::DudleyDomain::getX, ":return: locations in the FEM nodes\n\n"
+":rtype: `Data`")
+      .def("getNormal", &dudley::DudleyDomain::getNormal,
+":return: boundary normals at the quadrature point on the face elements\n"
+":rtype: `Data`")
+      .def("getSize", &dudley::DudleyDomain::getSize,":return: the element size\n"
+":rtype: `Data`")
+      .def("setTagMap", &dudley::DudleyDomain::setTagMap,args("name","tag"),
+"Give a tag number a name.\n\n:param name: Name for the tag\n:type name: ``string``\n"
+":param tag: numeric id\n:type tag: ``int``\n:note: Tag names must be unique within a domain")
+      .def("getTag", &dudley::DudleyDomain::getTag,args("name"),":return: tag id for "
+"``name``\n:rtype: ``string``")
+      .def("isValidTagName", &dudley::DudleyDomain::isValidTagName,args("name"),
+":return: True is ``name`` corresponds to a tag\n:rtype: ``bool``")
+      .def("showTagNames", &dudley::DudleyDomain::showTagNames,":return: A space separated list of tag names\n:rtype: ``string``")
+      .def("getMPISize", &dudley::DudleyDomain::getMPISize,":return: the number of processes used for this `Domain`\n:rtype: ``int``")
+      .def("getMPIRank", &dudley::DudleyDomain::getMPIRank,":return: the rank of this process\n:rtype: ``int``")
+      .def("MPIBarrier", &dudley::DudleyDomain::MPIBarrier,"Wait until all processes have reached this point")
+      .def("onMasterProcessor", &dudley::DudleyDomain::onMasterProcessor,":return: True if this code is executing on the master process\n:rtype: `bool`");
+}
+
diff --git a/dudley/test/MeshAdapterTestCase.cpp b/dudley/test/DudleyDomainTestCase.cpp
similarity index 50%
rename from dudley/test/MeshAdapterTestCase.cpp
rename to dudley/test/DudleyDomainTestCase.cpp
index 5512e2c..ea08137 100644
--- a/dudley/test/MeshAdapterTestCase.cpp
+++ b/dudley/test/DudleyDomainTestCase.cpp
@@ -14,16 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <escript/AbstractContinuousDomain.h>
 
+#include "DudleyDomainTestCase.h"
 
-#include "MeshAdapterTestCase.h"
-
-#include "dudley/CppAdapter/MeshAdapter.h"
-#include "dudley/CppAdapter/MeshAdapterFactory.h"
-
-#include "escript/AbstractContinuousDomain.h"
+#include <dudley/DomainFactory.h>
 
 #include <cppunit/TestCaller.h>
 #include <boost/scoped_ptr.hpp>
@@ -32,20 +27,19 @@ using namespace escript;
 using namespace dudley;
 using namespace CppUnit;
 
-void MeshAdapterTestCase::testAll()
+void DudleyDomainTestCase::testAll()
 {
-    // test construction of a mesh using the brick factory method
-    //   boost::scoped_ptr<AbstractContinuousDomain> myMesh(brick());
-    esysUtils::JMPI info=esysUtils::makeInfo(MPI_COMM_WORLD);
-	brick(info); // brick now returns a Domain_ptr which will auto delete
+    JMPI info = makeInfo(MPI_COMM_WORLD);
+	Domain_ptr dom(brick(info));
+    CPPUNIT_ASSERT(dom->getDim() == 3);
 }
 
-TestSuite* MeshAdapterTestCase::suite()
+TestSuite* DudleyDomainTestCase::suite()
 {
-    TestSuite *testSuite = new TestSuite("MeshAdapterTestCase");
+    TestSuite *testSuite = new TestSuite("DudleyDomainTestCase");
 
-    testSuite->addTest(new TestCaller<MeshAdapterTestCase>(
-                "testAll",&MeshAdapterTestCase::testAll));
+    testSuite->addTest(new TestCaller<DudleyDomainTestCase>(
+                "testAll", &DudleyDomainTestCase::testAll));
     return testSuite;
 }
 
diff --git a/escriptcore/test/DataBlocks2DTestCase.h b/dudley/test/DudleyDomainTestCase.h
similarity index 80%
copy from escriptcore/test/DataBlocks2DTestCase.h
copy to dudley/test/DudleyDomainTestCase.h
index 9d278a5..0c9f07e 100644
--- a/escriptcore/test/DataBlocks2DTestCase.h
+++ b/dudley/test/DudleyDomainTestCase.h
@@ -15,13 +15,13 @@
 *****************************************************************************/
 
 
-#if !defined  DataBlocks2DTestCase_20040405_H
-#define  DataBlocks2DTestCase_20040405_H
+#ifndef __DUDLEY_DOMAIN_TESTCASE_H__
+#define __DUDLEY_DOMAIN_TESTCASE_H__
 
 #include <cppunit/TestFixture.h>
 #include <cppunit/TestSuite.h>
 
-class DataBlocks2DTestCase : public CppUnit::TestFixture
+class DudleyDomainTestCase : public CppUnit::TestFixture
 {
 public:
   void testAll();
@@ -29,5 +29,5 @@ public:
   static CppUnit::TestSuite* suite();
 };
 
-#endif
+#endif // __DUDLEY_DOMAIN_TESTCASE_H__
 
diff --git a/dudley/test/SConscript b/dudley/test/SConscript
index 9708ccb..47f9c68 100644
--- a/dudley/test/SConscript
+++ b/dudley/test/SConscript
@@ -14,17 +14,16 @@
 #
 ##############################################################################
 
-
 Import('*')
 local_env = env.Clone()
 
 if local_env['cppunit']:
     # get the test source file names
-    sources = Glob('*.cpp')+Glob('*.c')
-    testname='dudley_UnitTest'
+    sources = Glob('*.cpp')
+    testname = 'dudley_UnitTest'
 
     # build the executable
-    local_env.Prepend(LIBS=['dudley', 'pasowrap', 'paso', 'escript', 'esysUtils']+env['cppunit_libs'])
+    local_env.PrependUnique(LIBS=env['dudley_libs']+env['cppunit_libs'])
     program = local_env.Program(testname, sources)
 
     # run the tests - but only if test_targets are stale
@@ -35,10 +34,10 @@ if local_env['cppunit']:
     Alias("run_tests", testname+'.passed')
 
     # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/dudley/test", ('./'+testname,))
+    from grouptest import GroupTest
+    tgroup=GroupTest("dudleycpp", "$BINRUNNER ", (), "", "$BUILD_DIR/dudley/test", ('./'+testname,))
     TestGroups.append(tgroup)
 
 # configure python unit tests
-local_env.SConscript(dirs = ['#/dudley/test/python'], variant_dir='python', duplicate=0, exports=['py_wrapper_lib'])
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/dudley/test/dudley_UnitTests.cpp b/dudley/test/dudley_UnitTests.cpp
index 4ea53eb..18f7fcf 100644
--- a/dudley/test/dudley_UnitTests.cpp
+++ b/dudley/test/dudley_UnitTests.cpp
@@ -14,18 +14,17 @@
 *
 *****************************************************************************/
 
+#include <escript/EsysMPI.h>
+
+#include "DudleyDomainTestCase.h"
 
-#include "MeshAdapterTestCase.h"
 #include <cppunit/CompilerOutputter.h>
 #include <cppunit/TestResult.h>
 #include <cppunit/TestResultCollector.h>
 #include <cppunit/TestRunner.h>
 
-
 using namespace CppUnit;
 
-#include "esysUtils/Esys_MPI.h"
-
 int main(int argc, char* argv[])
 {
 #ifdef ESYS_MPI
@@ -39,9 +38,9 @@ int main(int argc, char* argv[])
     TestResultCollector result;
     controller.addListener(&result);
     TestRunner runner;
-    runner.addTest(MeshAdapterTestCase::suite());
+    runner.addTest(DudleyDomainTestCase::suite());
     runner.run(controller);
-    CompilerOutputter outputter( &result, std::cerr );
+    CompilerOutputter outputter(&result, std::cerr);
     outputter.write();
 #ifdef ESYS_MPI
     MPI_Finalize();
diff --git a/dudley/test/python/FCT_benchmark.py b/dudley/test/python/FCT_benchmark.py
old mode 100755
new mode 100644
diff --git a/dudley/test/python/SConscript b/dudley/test/python/SConscript
index 0716d38..84787e7 100644
--- a/dudley/test/python/SConscript
+++ b/dudley/test/python/SConscript
@@ -14,71 +14,51 @@
 #
 ##############################################################################
 
-
-import os
+from os.path import splitext
 Import('*')
 
 local_env = env.Clone()
 
-# 
 #  files defining test runs (passing in a release)
-# 
-testruns = []
-testruns += ['run_escriptOnDudley.py']
-testruns += ['run_inputOutput.py']
-testruns += ['run_linearPDEsOnDudley1.py']
-testruns += ['run_linearPDEsOnDudley2.py']
-testruns += ['run_nlpde2dOnDudley.py']
-testruns += ['run_nlpde3dOnDudley.py']
-testruns += ['run_models.py']
-testruns += ['run_simplesolve.py']
-testruns += ['run_utilOnDudley.py']
-# 
+testruns = Glob('run_*.py', strings=True)
+
 #  files defining a few tests for a quick test
-# 
 scalable_tests = []
 scalable_tests += ['run_inputOutput.py']
-scalable_tests += ['run_simplesolve.py']
-#
+scalable_tests += ['run_pasoSolversOnDudley.py']
+scalable_tests += ['run_trilinosSolversOnDudley.py']
+
 # files defining tests run locally (not as part of a release)
-#
 localtestruns = [x for x in Glob('*.py', strings=True) if not x.startswith('run_')]
 
-#
 # all test 
-#
 alltestruns = testruns + localtestruns
-#
-# test files are just compiled:
 
+# test files are just compiled
 test_pyc = env.PyCompile(alltestruns)
 env.Alias('build_py_tests', test_pyc)
 
-#Add Unit Test to target alias
+# add unit test to target alias
 local_env.PrependENVPath('PYTHONPATH', Dir('.').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/dudley/test/python').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/escriptcore/test/python').abspath)
-local_env['ENV']['DUDLEY_TEST_DATA']=Dir('.').srcnode().abspath
-local_env['ENV']['DUDLEY_WORKDIR']=Dir('.').abspath
+local_env['ENV']['DUDLEY_TEST_DATA'] = Dir('.').srcnode().abspath
+local_env['ENV']['DUDLEY_WORKDIR'] = Dir('.').abspath
 # needed for a test from the util base class in escript
-local_env['ENV']['ESCRIPT_WORKDIR']=Dir('.').abspath
-env.Alias('local_py_tests',[os.path.splitext(x)[0]+'.passed' for x in alltestruns])
-env.Alias('py_tests', [os.path.splitext(x)[0]+'.passed' for x in testruns ])
-env.Alias('scalable_tests', [os.path.splitext(x)[0]+'.passed' for x in scalable_tests ])
+local_env['ENV']['ESCRIPT_WORKDIR'] = Dir('.').abspath
+env.Alias('local_py_tests', [splitext(x)[0]+'.passed' for x in alltestruns])
+env.Alias('py_tests', [splitext(x)[0]+'.passed' for x in testruns])
+env.Alias('scalable_tests', [splitext(x)[0]+'.passed' for x in scalable_tests])
 
-#
-# run all tests:
-#
 # run all tests
 program = local_env.RunPyUnitTest(alltestruns)
-Depends(program, py_wrapper_lib)
-Depends(program, 'build_py_tests')
+Requires(program, ['install_escript','build_py_tests'])
 if env['usempi']:
-    Depends(program, env['prefix']+"/lib/pythonMPI")
+    Requires(program, ['install_pythonMPI'])
 
-# Add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("DUDLEY_TEST_DATA","$BATCH_ROOT/dudley/test/python"),('DUDLEY_WORKDIR','$BUILD_DIR/dudley/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/dudley/test/python","$BATCH_ROOT/dudley/test/python",testruns)
+# add a group of tests
+from grouptest import GroupTest
+tgroup=GroupTest("dudley", "$PYTHONRUNNER ", (("DUDLEY_TEST_DATA","$BATCH_ROOT/dudley/test/python"),('DUDLEY_WORKDIR','$BUILD_DIR/dudley/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/dudley/test/python", "$BATCH_ROOT/dudley/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/dudley/test/python")
 TestGroups.append(tgroup)
 
diff --git a/dudley/test/python/axisymm-splitB.py b/dudley/test/python/axisymm-splitB.py
old mode 100755
new mode 100644
diff --git a/dudley/test/python/blocktest.py b/dudley/test/python/blocktest.py
old mode 100755
new mode 100644
diff --git a/dudley/test/python/data_meshes/brick_8x10x12.fly b/dudley/test/python/data_meshes/brick_8x10x12.fly
index b76ece8..d6082d4 100644
--- a/dudley/test/python/data_meshes/brick_8x10x12.fly
+++ b/dudley/test/python/data_meshes/brick_8x10x12.fly
@@ -2850,3 +2850,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/dudley/test/python/data_meshes/rectangle_8x10.fly b/dudley/test/python/data_meshes/rectangle_8x10.fly
index 3aabbc8..54fff8b 100644
--- a/dudley/test/python/data_meshes/rectangle_8x10.fly
+++ b/dudley/test/python/data_meshes/rectangle_8x10.fly
@@ -224,3 +224,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/dudley/test/python/data_meshes/tagtest2.fly b/dudley/test/python/data_meshes/tagtest2.fly
index 29307d4..6f6676b 100644
--- a/dudley/test/python/data_meshes/tagtest2.fly
+++ b/dudley/test/python/data_meshes/tagtest2.fly
@@ -19,3 +19,4 @@ tag2 8
 tag3 6
 tag4 7
 All 10
+
diff --git a/dudley/test/python/data_meshes/tet10.fly b/dudley/test/python/data_meshes/tet10.fly
index 5601ac3..c56ac99 100644
--- a/dudley/test/python/data_meshes/tet10.fly
+++ b/dudley/test/python/data_meshes/tet10.fly
@@ -63,3 +63,4 @@ Tri6 12
 15 24 7 6 8 32 24 25
 Tri6_Contact 0
 Point1 0
+
diff --git a/dudley/test/python/data_meshes/tet4.fly b/dudley/test/python/data_meshes/tet4.fly
index 491f872..692552a 100644
--- a/dudley/test/python/data_meshes/tet4.fly
+++ b/dudley/test/python/data_meshes/tet4.fly
@@ -37,3 +37,4 @@ Tri3 12
 15 24 7 6 8
 Tri3_Contact 0
 Point1 0
+
diff --git a/dudley/test/python/data_meshes/tet_2D_order1.fly b/dudley/test/python/data_meshes/tet_2D_order1.fly
index d519039..ab4e279 100644
--- a/dudley/test/python/data_meshes/tet_2D_order1.fly
+++ b/dudley/test/python/data_meshes/tet_2D_order1.fly
@@ -52,3 +52,4 @@ Line2 12
 30 20 11 10
 31 20 0 11
 Point1 0
+
diff --git a/dudley/test/python/data_meshes/tet_3D_order1.fly b/dudley/test/python/data_meshes/tet_3D_order1.fly
index 54c2b53..3c83a40 100644
--- a/dudley/test/python/data_meshes/tet_3D_order1.fly
+++ b/dudley/test/python/data_meshes/tet_3D_order1.fly
@@ -82,4 +82,5 @@ Tri3 36
 44 1 17 16 18 
 58 100 19 18 16
 53 10 17 18 19 
-Point1 0
\ No newline at end of file
+Point1 0
+
diff --git a/dudley/test/python/data_meshes/tri3.fly b/dudley/test/python/data_meshes/tri3.fly
index 53f3646..2009b8f 100644
--- a/dudley/test/python/data_meshes/tri3.fly
+++ b/dudley/test/python/data_meshes/tri3.fly
@@ -20,3 +20,4 @@ tag2 8
 tag3 6
 tag4 7
 All 10
+
diff --git a/dudley/test/python/linearElastic.py b/dudley/test/python/linearElastic.py
old mode 100755
new mode 100644
diff --git a/dudley/test/python/run_escriptOnDudley.py b/dudley/test/python/run_escriptOnDudley.py
index 4c8771b..479e948 100644
--- a/dudley/test/python/run_escriptOnDudley.py
+++ b/dudley/test/python/run_escriptOnDudley.py
@@ -30,7 +30,7 @@ from esys.escriptcore.testing import *
 from esys.escript import *
 from esys.dudley import Rectangle, Brick
 from test_objects import Test_Dump, Test_SetDataPointValue, Test_saveCSV, \
-        Test_TableInterpolation, Test_Domain, Test_GlobalMinMax, Test_Lazy
+        Test_TableInterpolation, Test_Domain, Test_Lazy
 from test_shared import Test_Shared
 
 try:
@@ -38,7 +38,7 @@ try:
 except KeyError:
      DUDLEY_WORKDIR='.'
 
-NE=4 # number elements, must be even
+NE = max(4, getMPISizeWorld())
 
 class Test_SharedOnDudley(Test_Shared):
   def setUp(self):
@@ -51,7 +51,7 @@ class Test_SharedOnDudley(Test_Shared):
 class Test_DomainOnDudley(Test_Domain):
    def setUp(self):
        self.boundary_tag_list = [1, 2, 10, 20]
-       self.domain =Rectangle(NE,NE+1)
+       self.domain = Rectangle(NE,NE+1)
        self.rdomain=self.domain
 
    def tearDown(self):
@@ -63,7 +63,7 @@ class Test_DomainOnDudley(Test_Domain):
        domain=Rectangle(NE,NE)
        x=domain.getX()
        z=interpolate(x, Function(domain))
-       self.assertRaises(RuntimeError, domain.setX, z)
+       self.assertRaises(ValueError, domain.setX, z)
        del x
        del z
        del domain
@@ -100,25 +100,38 @@ class Test_DomainOnDudley(Test_Domain):
        if getMPISizeWorld() == 1: self.assertTrue(len(tags)==len(ref_tags), "tags list has wrong length.")
        for i in tags: self.assertTrue(i in ref_tags,"tag %s is missing."%i)
 
-class Test_DataOpsOnDudley(Test_Dump, Test_SetDataPointValue, Test_GlobalMinMax, Test_Lazy):
+class Test_DumpOnDudley(Test_Dump):
    def setUp(self):
-       self.domain =Rectangle(NE,NE+1)
-       self.domain_with_different_number_of_samples =Rectangle(2*NE,NE+1)
-       self.domain_with_different_number_of_data_points_per_sample =Rectangle(2*NE,NE+1,integrationOrder=2)
-       self.domain_with_different_sample_ordering =Rectangle(NE,NE+1, optimize=True)
-       self.filename_base=DUDLEY_WORKDIR
-       self.mainfs=Function(self.domain)
-       self.otherfs=Solution(self.domain)
+       self.domain = Rectangle(NE,NE+1)
+       self.domain_with_different_number_of_samples = Rectangle(2*NE,NE+1)
+       self.domain_with_different_number_of_data_points_per_sample = Rectangle(2*NE,NE+1,integrationOrder=2)
+       self.domain_with_different_sample_ordering = Rectangle(NE, NE+1, optimize=True)
+       self.filename_base = DUDLEY_WORKDIR
 
    def tearDown(self):
        del self.domain
        del self.domain_with_different_number_of_samples
        del self.domain_with_different_number_of_data_points_per_sample
        del self.domain_with_different_sample_ordering
+
+class Test_SetDataPointValueOnDudley(Test_SetDataPointValue):
+   def setUp(self):
+       self.domain = Rectangle(NE,NE+1)
+
+   def tearDown(self):
+       del self.domain
+
+class Test_LazyOnDudley(Test_Lazy):
+   def setUp(self):
+       self.domain = Rectangle(NE,NE+1)
+       self.mainfs = Function(self.domain)
+       self.otherfs = Solution(self.domain)
+
+   def tearDown(self):
+       del self.domain
        del self.mainfs
        del self.otherfs
 
-
 class Test_TableInterpolationOnDudley(Test_TableInterpolation):
     def setUp(self):
         self.domain=Brick(4,4,4)
@@ -140,13 +153,13 @@ class Test_CSVOnDudley(Test_saveCSV):
         NE0=NE
         NE1=NE+1
         self.domain=Rectangle(NE0,NE1)
-        self.functionspaces=[ContinuousFunction, ReducedContinuousFunction]
+        self.functionspaces=[ ContinuousFunction ]
         # number of total data points for each function space
-        self.linecounts=[ (NE0+1)*(NE1+1)+1, (NE0+1)*(NE1+1)+1 ]
+        self.linecounts=[ (NE0+1)*(NE1+1)+1 ]
         # number of masked points, i.e. where X[0] is non-zero
-        self.linecounts_masked=[ NE0*(NE1+1)+1, NE0*(NE1+1)+1 ]
+        self.linecounts_masked=[ NE0*(NE1+1)+1 ]
         # expected values in first line of masked data = [ X[:], X[0] ]
-        self.firstline=[ [1./NE0, 0., 1./NE0], [1./NE0, 0., 1./NE0] ]
+        self.firstline=[ [1./NE0, 0., 1./NE0] ]
 
         if getMPISizeWorld() == 1:
             self.functionspaces += [ Function, ReducedFunction,
diff --git a/dudley/test/python/run_inputOutput.py b/dudley/test/python/run_inputOutput.py
index ab464ba..93f9855 100644
--- a/dudley/test/python/run_inputOutput.py
+++ b/dudley/test/python/run_inputOutput.py
@@ -38,7 +38,7 @@ Test suite for input and output of meshes and data objects
 import esys.escriptcore.utestselect as unittest, sys
 from esys.escriptcore.testing import *
 from esys.escript import *
-from esys.dudley import Rectangle, Brick, LoadMesh, ReadMesh, ReadGmsh, ReadGmsh
+from esys.dudley import Rectangle, Brick, LoadMesh, ReadMesh, ReadGmsh
 
 try:
      DUDLEY_WORKDIR=os.environ['DUDLEY_WORKDIR']
@@ -59,7 +59,7 @@ NE0 = 7 * getMPISizeWorld()
 NE1 = 11
 NE2 = 5
 
-class Test_InputOutput(unittest.TestCase):
+class Test_InputOutputOnDudley(unittest.TestCase):
 
      # Check that two domains are equal using Fourier integrals
      # We cannot compare the X coordinates since they are on different domains
@@ -88,24 +88,12 @@ class Test_InputOutput(unittest.TestCase):
         mydomain2 = Rectangle(n0=NE0, n1=NE1, order=1, l0=1., l1=1., optimize=True)
         self.domainsEqual(mydomain1, mydomain2)
 
-     # Does optimize=True change Rectangle for order=-1?
-     def test_Rectangle_optimize_macro(self):
-        mydomain1 = Rectangle(n0=NE0, n1=NE1, order=-1, l0=1., l1=1., optimize=False)
-        mydomain2 = Rectangle(n0=NE0, n1=NE1, order=-1, l0=1., l1=1., optimize=True)
-        self.domainsEqual(mydomain1, mydomain2)
-
      # Does optimize=True change Brick for order=1?
      def test_Brick_optimize_order1(self):
         mydomain1 = Brick(n0=NE0, n1=NE1, n2=NE2, order=1, l0=1., l1=1., l2=1., optimize=False)
         mydomain2 = Brick(n0=NE0, n1=NE1, n2=NE2, order=1, l0=1., l1=1., l2=1., optimize=True)
         self.domainsEqual(mydomain1, mydomain2)
 
-     # Does optimize=True change Brick for order=-1?
-     def test_Brick_optimize_macro(self):
-        mydomain1 = Brick(n0=NE0, n1=NE1, n2=NE2, order=-1, l0=1., l1=1., l2=1., optimize=False)
-        mydomain2 = Brick(n0=NE0, n1=NE1, n2=NE2, order=-1, l0=1., l1=1., l2=1., optimize=True)
-        self.domainsEqual(mydomain1, mydomain2)
-
      @unittest.skipIf(not loadIsConfigured(), "loading not configured")
      def test_data_dump_to_NetCDF_rectangle(self):
         mydomain1 = Rectangle(n0=NE0, n1=NE1, order=1, l0=1., l1=1., optimize=False)
@@ -134,7 +122,7 @@ class Test_InputOutput(unittest.TestCase):
         mydomain2=LoadMesh(dumpfile)
         self.domainsEqual(mydomain1, mydomain2)
 
-     @unittest.skipIf(getEscriptParamInt('MPIBUILD', 0), "MPI build")
+     @unittest.skipIf(getMPISizeWorld()>1, "number of MPI ranks > 1")
      def test_gmshTags(self):
         dom=ReadGmsh(os.path.join(DUDLEY_TEST_MESH_PATH, "tagtest.msh"),2)
         tags=dom.showTagNames().split(', ')
@@ -142,18 +130,18 @@ class Test_InputOutput(unittest.TestCase):
         self.assertEqual(dom.getTag('tag1'),1,'error with tag1')
         self.assertEqual(dom.getTag('tag2'),2,'error with tag2')
         self.assertEqual(dom.getTag('tag3'),3,'error with tag3')
-        self.assertRaises(RuntimeError, dom.getTag, 'tag4')
+        self.assertRaises(ValueError, dom.getTag, 'tag4')
 
      def test_flyTags(self):
         dom=ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH, "tagtest2.fly"))
-        tags=dom.showTagNames().split(', ')
-        self.assertEqual(tags,['tag1', 'tag2', 'tag3', 'tag4', 'All'])
+        tags=sorted(dom.showTagNames().split(', '))
+        self.assertEqual(tags,sorted(['tag1', 'tag2', 'tag3', 'tag4', 'All']))
         self.assertEqual(dom.getTag('tag1'),5,'error with tag1')
         self.assertEqual(dom.getTag('tag2'),8,'error with tag2,')
         self.assertEqual(dom.getTag('tag3'),6,'error with tag3')
         self.assertEqual(dom.getTag('tag4'),7,'error with tag4')
         self.assertEqual(dom.getTag('All'),10,'error with All')
-        self.assertRaises(RuntimeError, dom.getTag, 'tag6')
+        self.assertRaises(ValueError, dom.getTag, 'tag6')
 
      @unittest.skipIf(not loadIsConfigured(), "loading not configured")
      def test_mesh_dump_to_NetCDF_brick(self):
diff --git a/dudley/test/python/run_linearPDEsOnDudley1.py b/dudley/test/python/run_linearPDEsOnDudley1.py
index f9fb3d1..3868191 100644
--- a/dudley/test/python/run_linearPDEsOnDudley1.py
+++ b/dudley/test/python/run_linearPDEsOnDudley1.py
@@ -39,42 +39,85 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do1, Test_assemblage_3Do1
+from test_pdetools import Test_pdetools
 from esys.escript import *
-from esys.dudley import Rectangle,Brick, ReadMesh
-import sys
+from esys.dudley import Rectangle, Brick
 
+NE=10 # number of element in each spatial direction (must be even)
 
-try:
-     DUDLEY_TEST_DATA=os.environ['DUDLEY_TEST_DATA']
-except KeyError:
-     DUDLEY_TEST_DATA='.'
+class Test_LinearPDEOnDudleyRect(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-DUDLEY_TEST_MESH_PATH=os.path.join(DUDLEY_TEST_DATA,"data_meshes")
+class Test_LinearPDEOnDudleyBrick(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-NE=10 # number of element in each spatial direction (must be even)
+class Test_PDEToolsOnDudleyRect(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnDudleyRectOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_PDEToolsOnDudleyBrick(Test_pdetools):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = Rectangle(NE,NE,1)
+        self.domain = Brick(NE,NE,NE)
         self.order = 1
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnDudleyBrickOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_AssemblageOnDudleyRect(Test_assemblage_2Do1):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = Brick(NE,NE,NE,1)
+        self.domain = Rectangle(NE,NE)
         self.order = 1
    def tearDown(self):
         del self.domain
 
+class Test_AssemblageOnDudleyBrick(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnDudleyRect(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnDudleyBrick(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
diff --git a/dudley/test/python/run_linearPDEsOnDudley2.py b/dudley/test/python/run_linearPDEsOnDudley2.py
index 4568a5e..89b6129 100644
--- a/dudley/test/python/run_linearPDEsOnDudley2.py
+++ b/dudley/test/python/run_linearPDEsOnDudley2.py
@@ -39,14 +39,10 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.dudley import Rectangle,Brick, ReadMesh
-import sys
-
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do1, Test_assemblage_3Do1
+from test_pdetools import Test_pdetools
+from esys.dudley import ReadMesh
 
 try:
      DUDLEY_TEST_DATA=os.environ['DUDLEY_TEST_DATA']
@@ -55,24 +51,74 @@ except KeyError:
 
 DUDLEY_TEST_MESH_PATH=os.path.join(DUDLEY_TEST_DATA,"data_meshes")
 
-NE=6 # number of element in each spatial direction (must be even)
+class Test_LinearPDEOnDudleyTet2D(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnDudleyTet3D(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnDudleyTet2D(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnDudleyTet3D(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnDudleyTet2DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_AssemblageOnDudleyTet2D(Test_assemblage_2Do1):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
-   VERBOSE=False
    def setUp(self):
         self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
         self.order = 1
    def tearDown(self):
         del self.domain
 
+class Test_AssemblageOnDudleyTet3D(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnDudleyTet2D(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnDudleyTet3DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_TransportPDEOnDudleyTet3D(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=False)
+        self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
         self.order = 1
    def tearDown(self):
         del self.domain
diff --git a/dudley/test/python/run_models.py b/dudley/test/python/run_models.py
index b95a5f7..9952e27 100644
--- a/dudley/test/python/run_models.py
+++ b/dudley/test/python/run_models.py
@@ -34,13 +34,8 @@ from esys.dudley import Rectangle, Brick
 
 from math import pi
 import numpy, os, sys, tempfile
-#======================================================================
-try:
-     DUDLEY_WORKDIR=os.environ['DUDLEY_WORKDIR']
-except KeyError:
-     DUDLEY_WORKDIR='.'
 
-#======================================================================
+
 class Darcy(unittest.TestCase): #subclassing required
     # this is a simple test for the darcy flux problem
     #
@@ -69,6 +64,7 @@ class Darcy(unittest.TestCase): #subclassing required
              else:
                 x[i]=self.WIDTH*(x[i]-x_inf)/(x_sup-x_inf)
         self.dom.setX(x)
+
     def getScalarMask(self,include_bottom=True):
         x=self.dom.getX().copy()
         x_inf=inf(x[self.dom.getDim()-1])
@@ -76,6 +72,7 @@ class Darcy(unittest.TestCase): #subclassing required
         out=whereZero(x[self.dom.getDim()-1]-x_sup)
         if include_bottom: out+=whereZero(x[self.dom.getDim()-1]-x_inf)
         return wherePositive(out)
+
     def getVectorMask(self,include_bottom=True):
         x=self.dom.getX().copy()
         out=Vector(0.,Solution(self.dom))
@@ -107,10 +104,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u_ref,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
     def testConstF_FixedBottom_mediumK(self):
         k=1.
         mp=self.getScalarMask(include_bottom=True)
@@ -123,10 +120,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p )
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
 
     def testConstF_FixedBottom_largeK(self):
         k=1.e10
@@ -140,10 +137,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p )
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FixedBottom_smallK(self):
         k=1.e-10
@@ -158,11 +155,11 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p )
         
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FixedBottom_mediumK(self):
         k=1.
@@ -176,10 +173,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p )
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FixedBottom_largeK(self):
         k=1.e10
@@ -193,10 +190,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p )
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testConstF_FreeBottom_smallK(self):
         k=1.e-10
@@ -210,12 +207,12 @@ class Darcy(unittest.TestCase): #subclassing required
                     location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
 
         
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testConstF_FreeBottom_mediumK(self):
         k=1.
@@ -229,10 +226,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref), self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref), self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testConstF_FreeBottom_largeK(self):
         k=1.e10
@@ -246,10 +243,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref),self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref),self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FreeBottom_smallK(self):
         k=1.e-10
@@ -263,10 +260,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")  
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref),self.TEST_TOL*Lsup(u_ref), "flux error too big.")  
+        self.assertLess(Lsup(p-p_ref),self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FreeBottom_mediumK(self):
         k=1.
@@ -280,10 +277,10 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref),self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref),self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
     def testVarioF_FreeBottom_largeK(self):
         k=1.e10
@@ -297,13 +294,13 @@ class Darcy(unittest.TestCase): #subclassing required
                       location_of_fixed_pressure=mp,
                       location_of_fixed_flux=mv,
                       permeability=Scalar(k,Function(self.dom)))
-        #df.setTolerance(rtol=self.TOL)
+        df.getSolverOptionsPressure().setTolerance(rtol=self.TOL)
         v,p=df.solve(u,p)
-        self.assertTrue(Lsup(v-u_ref)<self.TEST_TOL*Lsup(u_ref), "flux error too big.")
-        self.assertTrue(Lsup(p-p_ref)<self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
+        self.assertLess(Lsup(v-u_ref),self.TEST_TOL*Lsup(u_ref), "flux error too big.")
+        self.assertLess(Lsup(p-p_ref),self.TEST_TOL*Lsup(p_ref), "pressure error too big.")
 
-class Test_Darcy2D(Darcy):
-    TOL=1e-6
+class Test_Darcy2DOnDudley(Darcy):
+    TOL=1e-8 if hasFeature('paso') else 1e-9
     TEST_TOL=2.e-3
     WIDTH=1.
     def setUp(self):
@@ -313,8 +310,8 @@ class Test_Darcy2D(Darcy):
     def tearDown(self):
         del self.dom
 
-class Test_Darcy3D(Darcy):
-    TOL=1e-6
+class Test_Darcy3DOnDudley(Darcy):
+    TOL=1e-8 if hasFeature('paso') else 1e-9
     WIDTH=1.
     TEST_TOL=4.e-3
     def setUp(self):
@@ -324,7 +321,7 @@ class Test_Darcy3D(Darcy):
     def tearDown(self):
         del self.dom
 
-class Test_Rheologies(unittest.TestCase):
+class Test_RheologiesOnDudley(unittest.TestCase):
      """
      this is the program used to generate the powerlaw tests:
 
@@ -505,7 +502,7 @@ class Test_Rheologies(unittest.TestCase):
          for i in range(len(taus)): self.checkResult(i,gamma_dot_s[i], pl.getEtaEff(gamma_dot_s[i],dt=dt),taus[i])
 
 
-class Test_FaultSystem(unittest.TestCase):
+class Test_FaultSystemOnDudley(unittest.TestCase):
    EPS=1.e-8
    NE=10
    def test_Fault_MaxValue(self):
@@ -519,41 +516,42 @@ class Test_FaultSystem(unittest.TestCase):
       t, loc=f.getMaxValue(u)
       p=f.getParametrization(x,t)[0]
       m, l=loc(u), loc(p)
-      self.assertTrue(  m == 0.25, "wrong max value")
-      self.assertTrue(  t == 1, "wrong max tag")
-      self.assertTrue(  l == 0., "wrong max location")
+      self.assertEqual(m, 0.25, "wrong max value")
+      self.assertEqual(t, 1, "wrong max tag")
+      self.assertEqual(l, 0., "wrong max location")
 
       u=x[1]*(1.-x[1])*(1-x[0])*x[0]
       t, loc=f.getMaxValue(u)
       p=f.getParametrization(x,t)[0]
       m, l=loc(u), loc(p)
-      self.assertTrue(  m == 0.0625, "wrong max value")
-      self.assertTrue(  t == 2, "wrong max tag")
-      self.assertTrue(  l == 0.5, "wrong max location")
+      self.assertEqual(m, 0.0625, "wrong max value")
+      self.assertEqual(t, 2, "wrong max tag")
+      self.assertEqual(l, 0.5, "wrong max location")
 
       u=x[0]*(1.-x[0])*x[1]
       t, loc=f.getMaxValue(u)
       p=f.getParametrization(x,t)[0]
       m, l=loc(u), loc(p)
-      self.assertTrue(  m == 0.25, "wrong max value")
-      self.assertTrue(  t == 2, "wrong max tag")
-      self.assertTrue(  l == 1.0, "wrong max location")
+      self.assertEqual(m, 0.25, "wrong max value")
+      self.assertEqual(t, 2, "wrong max tag")
+      self.assertEqual(l, 1.0, "wrong max location")
 
       u=x[1]*(1.-x[1])*x[0]
       t, loc=f.getMaxValue(u)
       p=f.getParametrization(x,t)[0]
       m, l=loc(u), loc(p)
-      self.assertTrue(  m == 0.25, "wrong max value")
-      self.assertTrue(  t == 2, "wrong max tag")
-      self.assertTrue(  l == 0., "wrong max location")
+      self.assertEqual(m, 0.25, "wrong max value")
+      self.assertEqual(t, 2, "wrong max tag")
+      self.assertEqual(l, 0., "wrong max location")
 
       u=x[1]*(1.-x[1])*(1.-x[0])
       t, loc=f.getMaxValue(u)
       p=f.getParametrization(x,t)[0]
       m, l=loc(u), loc(p)
-      self.assertTrue(  m == 0.25, "wrong max value")
-      self.assertTrue(  t == 1, "wrong max tag")
-      self.assertTrue(  abs(l-0.70710678118654) <= self.EPS,  "wrong max location")
+      self.assertEqual(m, 0.25, "wrong max value")
+      self.assertEqual(t, 1, "wrong max tag")
+      self.assertLess(abs(l-0.70710678118654), self.EPS,  "wrong max location")
+
    def test_Fault_MinValue(self):
       dom=Rectangle(2*self.NE,2*self.NE)
       x=dom.getX()
diff --git a/dudley/test/python/run_nlpde2dOnDudley.py b/dudley/test/python/run_nlpde2dOnDudley.py
deleted file mode 100644
index 2597f50..0000000
--- a/dudley/test/python/run_nlpde2dOnDudley.py
+++ /dev/null
@@ -1,55 +0,0 @@
-
-########################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# Earth Systems Science Computational Center (ESSCC)
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-########################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-Earth Systems Science Computational Center (ESSCC)
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE  and pdetools test on finley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import os
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from test_nonLinearPDE import Test_nonLinearPDEs, Test_nlpde
-from esys.escript import *
-from esys.dudley import Rectangle,Brick
-import sys
-
-
-class Test_nonLinearPDE(Test_nlpde):
-   def setUp(self):
-        self.domain = Rectangle(l0=1.,l1=1.,n0=10, n1=10) 
-   def tearDown(self):
-        del self.domain
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
-
diff --git a/dudley/test/python/run_nlpde3dOnDudley.py b/dudley/test/python/run_nlpde3dOnDudley.py
deleted file mode 100644
index bb530fe..0000000
--- a/dudley/test/python/run_nlpde3dOnDudley.py
+++ /dev/null
@@ -1,55 +0,0 @@
-
-########################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# Earth Systems Science Computational Center (ESSCC)
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-########################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-Earth Systems Science Computational Center (ESSCC)
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE  and pdetools test on finley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import os
-
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from test_nonLinearPDE import Test_nonLinearPDEs, Test_nlpde
-from esys.escript import *
-from esys.dudley import Rectangle,Brick
-import sys
-
-class Test_nonLinearPDE(Test_nlpde):
-   def setUp(self):
-        self.domain = Brick(l0=1.,l1=1.,l2=1.,n0=10, n1=10,n2=10) 
-   def tearDown(self):
-        del self.domain
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
-
diff --git a/finley/test/python/run_nlpde2dOnFinley.py b/dudley/test/python/run_nonlinearPDEsOnDudley.py
similarity index 66%
rename from finley/test/python/run_nlpde2dOnFinley.py
rename to dudley/test/python/run_nonlinearPDEsOnDudley.py
index e538aff..ee2b9ec 100644
--- a/finley/test/python/run_nlpde2dOnFinley.py
+++ b/dudley/test/python/run_nonlinearPDEsOnDudley.py
@@ -21,34 +21,24 @@ __license__="""Licensed under the Apache License, version 2.0
 http://www.apache.org/licenses/LICENSE-2.0"""
 __url__="https://launchpad.net/escript-finley"
 
-"""
-Test suite for the linearPDE  and pdetools test on finley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import os
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_nonLinearPDE import Test_nonLinearPDEs, Test_nlpde
-from esys.escript import *
-from esys.finley import Rectangle,Brick
-import sys
+from test_nonLinearPDE import Test_nlpde
+from esys.dudley import Rectangle, Brick
 
 
-class Test_nonLinearPDE(Test_nlpde):
+class Test_nonLinearPDEOnDudley2D(Test_nlpde):
    def setUp(self):
         self.domain = Rectangle(l0=1.,l1=1.,n0=10, n1=10) 
    def tearDown(self):
         del self.domain
 
+class Test_nonLinearPDEonDudley3D(Test_nlpde):
+   def setUp(self):
+        self.domain = Brick(l0=1.,l1=1.,l2=1.,n0=10, n1=10, n2=10) 
+   def tearDown(self):
+        del self.domain
+
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/dudley/test/python/run_pasoSolversOnDudley.py b/dudley/test/python/run_pasoSolversOnDudley.py
new file mode 100644
index 0000000..ad81590
--- /dev/null
+++ b/dudley/test/python/run_pasoSolversOnDudley.py
@@ -0,0 +1,316 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on dudley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.dudley import Rectangle, Brick
+from esys.escript import hasFeature
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_PASO = hasFeature('paso')
+
+# number of elements in the spatial directions
+NE0=12
+NE1=13
+NE2=8
+OPTIMIZE=True
+
+ at unittest.skipIf(not HAVE_PASO, "PASO not available")
+class SimpleSolveOnPaso(SimpleSolveTestCase):
+    pass
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveDudleyRect_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveDudleyRect_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Jacobi
+
+class Test_SimpleSolveDudleyRect_Paso_TFQMR_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_TFQMR_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveDudleyRect_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Paso_BICGSTAB_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_BICGSTAB_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Paso_PCG_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_PCG_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Paso_TFQMR_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_TFQMR_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Paso_MINRES_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_MINRES_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + RILU
+
+class Test_SimpleSolveDudleyRect_Paso_BICGSTAB_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_BICGSTAB_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + RILU
+
+class Test_SimpleSolveDudleyRect_Paso_PCG_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_PCG_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveDudleyRect_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + RILU
+
+class Test_SimpleSolveDudleyRect_Paso_MINRES_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Paso_MINRES_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+    run_tests(__name__, exit_on_failure=True)
diff --git a/dudley/test/python/run_simplesolve.py b/dudley/test/python/run_simplesolve.py
deleted file mode 100644
index 272630c..0000000
--- a/dudley/test/python/run_simplesolve.py
+++ /dev/null
@@ -1,550 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE  and pdetools test on dudley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import esys.escriptcore.utestselect as unittest, sys
-from esys.escriptcore.testing import *
-from esys.escript import *
-from esys.dudley import Rectangle,Brick
-from esys.escript.linearPDEs import LinearPDE, SolverOptions
-import numpy
-OPTIMIZE=True
-SOLVER_VERBOSE=False 
-# setNumberOfThreads(2)
-
-try:
-     DUDLEY_TEST_DATA=os.environ['DUDLEY_TEST_DATA']
-except KeyError:
-     DUDLEY_TEST_DATA='.'
-
-DUDLEY_TEST_MESH_PATH=os.path.join(DUDLEY_TEST_DATA,"data_meshes")
-
-# number of elements in the spatial directions
-NE0=8
-NE1=10
-NE2=12
-
-NE0=12
-NE1=13
-NE2=8
-
-SOLVER_TOL=1.e-8
-REL_TOL=1.e-6
-
-FAC_DIAG=1.
-FAC_OFFDIAG=-0.4
-
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        # Tell about how many MPI CPUs and OpenMP threads
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
diff --git a/dudley/test/python/run_splitworldOnDudley.py b/dudley/test/python/run_splitworldOnDudley.py
index a0e1c0c..00c4e94 100644
--- a/dudley/test/python/run_splitworldOnDudley.py
+++ b/dudley/test/python/run_splitworldOnDudley.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -16,7 +16,7 @@
 
 from __future__ import print_function, division
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
@@ -25,12 +25,9 @@ __url__="https://launchpad.net/escript-finley"
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from esys.escript import *
-from esys.dudley import Rectangle, Brick, ReadMesh, ReadGmsh
+from esys.dudley import Rectangle, Brick
 from test_splitworld import Test_SplitWorld, sw_testing
 
-
-mpisize=getMPISizeWorld()
 NE=4 # number elements, must be even
 
 class Test_SplitOnDudley(Test_SplitWorld):
@@ -42,7 +39,6 @@ class Test_SplitOnDudley(Test_SplitWorld):
     
 class Test_dudley_sw_2D(sw_testing):
     def setUp(self):
-        from esys.dudley import Rectangle
         self.domain_ctr=Rectangle
         self.domain_vec=(6,6)
         self.domain_dict={}
@@ -51,10 +47,8 @@ class Test_dudley_sw_2D(sw_testing):
         del self.domain_ctr
         del self.domain_vec
 
-
 class Test_dudley_sw_3D(sw_testing):
     def setUp(self):
-        from esys.dudley import Brick
         self.domain_ctr=Brick
         self.domain_vec=(6,6,6)
         self.domain_dict={}
@@ -62,9 +56,7 @@ class Test_dudley_sw_3D(sw_testing):
     def tearDown(self):
         del self.domain_ctr
         del self.domain_vec
-    
-
-
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/dudley/test/python/run_trilinosSolversOnDudley.py b/dudley/test/python/run_trilinosSolversOnDudley.py
new file mode 100644
index 0000000..d794969
--- /dev/null
+++ b/dudley/test/python/run_trilinosSolversOnDudley.py
@@ -0,0 +1,344 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Open Software License version 3.0
+http://www.opensource.org/licenses/osl-3.0.php"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on dudley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.dudley import Rectangle, Brick
+from esys.escript import hasFeature, SolverOptions
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_TRILINOS = hasFeature('trilinos')
+skip_muelu_long = False #hasFeature("longindex")
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+OPTIMIZE=True
+
+ at unittest.skipIf(not HAVE_TRILINOS, "Trilinos not available")
+class SimpleSolveOnTrilinos(SimpleSolveTestCase):
+    pass
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveDudleyRect_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveDudleyRect_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Jacobi
+
+class Test_SimpleSolveDudleyRect_Trilinos_TFQMR_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_TFQMR_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveDudleyRect_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveDudleyRect_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveDudleyBrick_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Gauss-Seidel
+
+class Test_SimpleSolveDudleyRect_Trilinos_TFQMR_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_TFQMR_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveDudleyRect_Trilinos_MINRES_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveDudleyBrick_Trilinos_MINRES_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + RILU
+
+class Test_SimpleSolveDudleyRect_Trilinos_BICGSTAB_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_BICGSTAB_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + RILU
+
+class Test_SimpleSolveDudleyRect_Trilinos_PCG_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_PCG_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveDudleyRect_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + RILU
+
+class Test_SimpleSolveDudleyRect_Trilinos_MINRES_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_MINRES_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + ILUT
+
+class Test_SimpleSolveDudleyRect_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveDudleyBrick_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+    run_tests(__name__, exit_on_failure=True)
+
diff --git a/dudley/test/python/run_utilOnDudley.py b/dudley/test/python/run_utilOnDudley.py
index 9b4876c..63bfe14 100644
--- a/dudley/test/python/run_utilOnDudley.py
+++ b/dudley/test/python/run_utilOnDudley.py
@@ -25,18 +25,17 @@ __url__="https://launchpad.net/escript-finley"
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_util import Test_util as Test_util
-from test_util import Test_Util_SpatialFunctions, Test_Util_SpatialFunctions_noGradOnBoundary_noContact
+from test_util import Test_util
+from test_util import Test_Util_SpatialFunctions_noGradOnBoundary_noContact
 
-from esys.escript import *
+from esys.escript import FunctionOnBoundary, HAVE_SYMBOLS
 from esys.dudley import Rectangle,Brick,ReadMesh
-import sys
 import os
 
 if HAVE_SYMBOLS:
     from test_symfuncs import Test_symfuncs
 else:
-    print("Skipping symbolic tests since sympy is not available")
+    @unittest.skip("Skipping symbolic tests since sympy is not available")
     class Test_symfuncs:
         pass
 
@@ -55,9 +54,9 @@ DUDLEY_TEST_MESH_PATH=os.path.join(DUDLEY_TEST_DATA,"data_meshes")
 
 NE=4 # number elements, must be even
 
-class Test_UtilOnDudley(Test_util,Test_symfuncs):
+class Test_UtilOnDudley(Test_util):
    def setUp(self):
-       self.domain =Rectangle(NE,NE+1,1)
+       self.domain = Rectangle(NE,NE+1)
        self.functionspace = FunctionOnBoundary(self.domain) # due to a bug in escript python needs to hold a reference to the domain
        self.workdir=DUDLEY_WORKDIR
 
@@ -65,7 +64,17 @@ class Test_UtilOnDudley(Test_util,Test_symfuncs):
        del self.functionspace
        del self.domain
 
-class Test_Util_SpatialFunctionsOnDudleyTet2DOrder1(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
+class Test_SymFuncsOnDudley(Test_symfuncs):
+   def setUp(self):
+       self.domain = Rectangle(NE,NE+1)
+       self.functionspace = FunctionOnBoundary(self.domain)
+       self.workdir=DUDLEY_WORKDIR
+
+   def tearDown(self):
+       del self.functionspace
+       del self.domain
+
+class Test_Util_SpatialFunctionsOnDudleyTet2D(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
     def setUp(self):
         self.order=1
         self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
@@ -74,7 +83,7 @@ class Test_Util_SpatialFunctionsOnDudleyTet2DOrder1(Test_Util_SpatialFunctions_n
         del self.domain
 
 
-class Test_Util_SpatialFunctionsOnDudleyTet3DOrder1(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
+class Test_Util_SpatialFunctionsOnDudleyTet3D(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
     def setUp(self):
         self.order=1
         self.domain = ReadMesh(os.path.join(DUDLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=False)
@@ -82,25 +91,24 @@ class Test_Util_SpatialFunctionsOnDudleyTet3DOrder1(Test_Util_SpatialFunctions_n
         del self.order
         del self.domain
 
-class Test_Util_SpatialFunctionsOnDudleyRectOrder1(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
+class Test_Util_SpatialFunctionsOnDudleyRect(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
     def setUp(self):
         self.order=1
-        self.domain = Rectangle(n0=NE,n1=NE,order=1)
+        self.domain = Rectangle(n0=NE,n1=NE)
     def tearDown(self):
         del self.order
         del self.domain
 
 
-class Test_Util_SpatialFunctionsOnDudleyBrickOrder1(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
+class Test_Util_SpatialFunctionsOnDudleyBrick(Test_Util_SpatialFunctions_noGradOnBoundary_noContact):
     def setUp(self):
         self.order=1
-        self.domain = Brick(n0=NE,n1=NE,n2=NE,order=1)
+        self.domain = Brick(n0=NE,n1=NE,n2=NE)
     def tearDown(self):
         del self.order
         del self.domain
 
 
-
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
 
diff --git a/escript/py_src/SConscript b/escript/py_src/SConscript
index 3f977a8..6a30921 100644
--- a/escript/py_src/SConscript
+++ b/escript/py_src/SConscript
@@ -13,16 +13,14 @@
 #
 ##############################################################################
 
-import os
 Import('*')
-local_env = env.Clone()
 
 # get the source file names
 sources = Glob('*.py')
 
 # compile
-pyc = local_env.PyCompile(sources)
+pyc = env.PyCompile(sources)
 
 # install
-py_inst = local_env.Install(os.path.join(local_env['pyinstall'],'escript'), pyc)
-env.Alias('install_escript_py', [py_inst])
+py_inst = env.Install(Dir('escript', env['pyinstall']), pyc)
+env.Alias('install_escript', [py_inst])
diff --git a/escript/py_src/__init__.py b/escript/py_src/__init__.py
index c2e9b2d..edd8586 100644
--- a/escript/py_src/__init__.py
+++ b/escript/py_src/__init__.py
@@ -29,5 +29,6 @@ from esys.escriptcore.datamanager import DataManager
 from esys.escriptcore.symbolic import *
 from esys.escriptcore.splitworld import *
 
-__all__=[x for x in dir() if not x.startswith('internal_') and not x.startswith('Internal_') and not x.startswith('__')]
+__all__=[x for x in dir() if not x.startswith('internal_') and not x.startswith('Internal_') and not x.startswith('__') and not str(type(eval(x))).find('module')>=0]
+
 
diff --git a/scons/templates/jessie_py3_options.py b/escriptcore/SConscript
similarity index 70%
copy from scons/templates/jessie_py3_options.py
copy to escriptcore/SConscript
index 87e0760..03e3257 100644
--- a/scons/templates/jessie_py3_options.py
+++ b/escriptcore/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,12 +13,14 @@
 #
 ##############################################################################
 
-from .jessie_options import *
+Import('env')
+
+# configure C++ library
+env.SConscript('src/SConscript', duplicate=0)
+
+# configure python module
+env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# boost-python library/libraries to link against
-boost_libs = ['boost_python-py34']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.4m'
-pythonincpath='/usr/include/python3.4'
+# configure unit tests
+env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/escriptcore/py_src/SConscript b/escriptcore/py_src/SConscript
index 007c970..69704d0 100644
--- a/escriptcore/py_src/SConscript
+++ b/escriptcore/py_src/SConscript
@@ -25,10 +25,10 @@ sym_sources = Glob(os.path.join('symbolic','*.py'))
 # compile
 pyc = local_env.PyCompile(sources)
 sym_pyc = local_env.PyCompile(sym_sources)
+env.Alias('build_escript', [pyc, sym_pyc])
 
 # install
 runmodel_inst = local_env.Install(local_env['bininstall'], 'runmodel.py')
-py_inst = local_env.Install(os.path.join(local_env['pyinstall'],'escriptcore'), pyc)
+py_inst = local_env.Install(Dir('escriptcore', local_env['pyinstall']), pyc)
 sym_py_inst = local_env.Install(os.path.join(local_env['pyinstall'],'escriptcore','symbolic'), sym_pyc)
-env.Alias('install_escriptcore_py', [runmodel_inst, py_inst, sym_py_inst])
-
+env.Alias('install_escript', [runmodel_inst, py_inst, sym_py_inst])
diff --git a/escriptcore/py_src/faultsystems.py b/escriptcore/py_src/faultsystems.py
index 37fd9bf..a637b3b 100644
--- a/escriptcore/py_src/faultsystems.py
+++ b/escriptcore/py_src/faultsystems.py
@@ -498,7 +498,7 @@ class FaultSystem(object):
      x=f.getFunctionSpace().getX()
      for t in self.getTags():
         p,m=self.getParametrization(x,tag=t, tol=tol)
-        loc=((m*f)+(1.-m)*ref).maxGlobalDataPoint()
+        loc=((m*f)+(1.-m)*ref).internal_maxGlobalDataPoint()
         f_t=f.getTupleForGlobalDataPoint(*loc)[0]
         if f_t>f_max:
            f_max=f_t
@@ -527,7 +527,7 @@ class FaultSystem(object):
      x=f.getFunctionSpace().getX()
      for t in self.getTags():
         p,m=self.getParametrization(x,tag=t, tol=tol)
-        loc=((m*f)+(1.-m)*ref).minGlobalDataPoint()
+        loc=((m*f)+(1.-m)*ref).internal_minGlobalDataPoint()
         f_t=f.getTupleForGlobalDataPoint(*loc)[0]
         if f_t<f_min:
            f_min=f_t
diff --git a/escriptcore/py_src/flows.py b/escriptcore/py_src/flows.py
index 4b557b8..5ae9850 100644
--- a/escriptcore/py_src/flows.py
+++ b/escriptcore/py_src/flows.py
@@ -147,7 +147,7 @@ class DarcyFlow(object):
       """
       if location_of_fixed_pressure is not None: 
            self.location_of_fixed_pressure=util.wherePositive(util.interpolate(location_of_fixed_pressure, self.__pde_p.getFunctionSpaceForCoefficient("q")))
-           self.ref_point_id=self.location_of_fixed_pressure.maxGlobalDataPoint()
+           self.ref_point_id=self.location_of_fixed_pressure.internal_maxGlobalDataPoint()
            if not self.location_of_fixed_pressure.getTupleForGlobalDataPoint(*self.ref_point_id)[0] > 0: raise ValueError("pressure needs to be fixed at least one point.")
            self.ref_point=self.__pde_p.getFunctionSpaceForCoefficient("q").getX().getTupleForGlobalDataPoint(*self.ref_point_id)
            if self.verbose: print(("DarcyFlow: reference point at %s."%(self.ref_point,)))
diff --git a/escriptcore/py_src/gmshrunner.py b/escriptcore/py_src/gmshrunner.py
index 32b068d..8f1fc65 100644
--- a/escriptcore/py_src/gmshrunner.py
+++ b/escriptcore/py_src/gmshrunner.py
@@ -31,7 +31,7 @@ interface to gmsh
 __all__ = ['gmshGeo2Msh']
 
 from .util import getMPIRankWorld, getMPIWorldMax
-from .escriptcpp import getEscriptParamInt
+from .escriptcpp import hasFeature
 
 try:
     import gmshpy
@@ -39,8 +39,8 @@ try:
 except ImportError:
     HAVE_GMSHPY=False
 
-HAVE_GMSH = getEscriptParamInt("GMSH_SUPPORT")
-GMSH_MPI = HAVE_GMSH and getEscriptParamInt("GMSH_MPI")
+HAVE_GMSH = hasFeature("gmsh")
+GMSH_MPI = HAVE_GMSH and hasFeature("gmsh_mpi")
 
 def _runGmshPy(geoFile, mshFile, numDim, order, verbosity):
     if getMPIRankWorld() == 0:
diff --git a/escriptcore/py_src/linearPDEs.py b/escriptcore/py_src/linearPDEs.py
index e98c9be..b669ec0 100644
--- a/escriptcore/py_src/linearPDEs.py
+++ b/escriptcore/py_src/linearPDEs.py
@@ -130,7 +130,7 @@ class PDECoef(object):
     CONTACT_REDUCED=15
     DIRACDELTA=16
 
-    def __init__(self, where, pattern, altering):
+    def __init__(self, where, pattern, altering, complex=False):
        """
        Initialises a PDE coefficient type.
 
@@ -152,11 +152,15 @@ class PDECoef(object):
        :param altering: indicates what part of the PDE is altered if the
                         coefficient is altered
        :type altering: one of `OPERATOR`, `RIGHTHANDSIDE`, `BOTH`
+       :param complex: if true, this coefficient is part of a complex-valued
+                       PDE and values will be converted to complex.
+       :type complex: ``boolean``
        """
        super(PDECoef, self).__init__()
-       self.what=where
-       self.pattern=pattern
-       self.altering=altering
+       self.what = where
+       self.pattern = pattern
+       self.altering = altering
+       self.complex = complex
        self.resetValue()
 
     def resetValue(self):
@@ -227,7 +231,7 @@ class PDECoef(object):
        :param reducedSolutionOrder: True to indicate that reduced order is used
                                     to represent the solution
        :type reducedSolutionOrder: ``bool``
-       :param newValue: number of components of the PDE solution
+       :param newValue: new value of coefficient
        :type newValue: any object that can be converted into a
                        `Data` object with the appropriate shape
                        and `FunctionSpace`
@@ -253,7 +257,11 @@ class PDECoef(object):
        if not newValue.isEmpty():
            if not self.getShape(domain,numEquations,numSolutions)==newValue.getShape():
                raise IllegalCoefficientValue("Expected shape of coefficient is %s but actual shape is %s."%(self.getShape(domain,numEquations,numSolutions),newValue.getShape()))
-       self.value=newValue
+           if newValue.isComplex() and not self.complex:
+               raise IllegalCoefficientValue("Cannot assign a complex value to a real-valued coefficient!")
+           elif not newValue.isComplex() and self.complex:
+               newValue.promote()
+       self.value = newValue
 
     def isAlteringOperator(self):
         """
@@ -281,6 +289,15 @@ class PDECoef(object):
         else:
             return None
 
+    def isComplex(self):
+        """
+        Checks if the coefficient is complex-valued.
+
+        :rtype: ``bool``
+        :return: True if the coefficient is complex-valued, False otherwise.
+        """
+        return self.complex
+
     def estimateNumEquationsAndNumSolutions(self,domain,shape=()):
        """
        Tries to estimate the number of equations and number of solutions if
@@ -411,7 +428,7 @@ class LinearProblem(object):
    problem will be solved to get the unknown *u*.
 
    """
-   def __init__(self,domain,numEquations=None,numSolutions=None,debug=False):
+   def __init__(self,domain,numEquations=None,numSolutions=None,complex=False,debug=False):
      """
      Initializes a linear problem.
 
@@ -422,33 +439,41 @@ class LinearProblem(object):
      :param numSolutions: number of solution components. If ``None`` the number
                           of solution components is extracted from the
                           coefficients.
+     :param complex: if True this problem will have complex coefficients and
+                     a complex-valued result.
      :param debug: if True debug information is printed
 
      """
      super(LinearProblem, self).__init__()
 
+     self.__complex=complex
      self.__debug=debug
      self.__domain=domain
      self.domainSupportsAssemblers = hasattr(domain, "createAssembler")
      self.assembler = None
      if self.domainSupportsAssemblers:
-            self.assembler = domain.createAssembler("DefaultAssembler", [])
+        options=[]
+        if complex:
+            options=[('dummy', escore.Data(0.j))]
+        self.assembler = domain.createAssembler("DefaultAssembler", options)
      self.__numEquations=numEquations
      self.__numSolutions=numSolutions
+     self.__preservePreconditioner=False
      self.__altered_coefficients=False
      self.__reduce_equation_order=False
      self.__reduce_solution_order=False
      self.__sym=False
-     self.setSolverOptions()
      self.__is_RHS_valid=False
      self.__is_operator_valid=False
      self.__COEFFICIENTS={}
      self.__solution_rtol=1.e99
      self.__solution_atol=1.e99
+     self.setSolverOptions()
      self.setSymmetryOff()
      # initialize things:
      self.resetAllCoefficients()
      self.initializeSystem()
+
    # ==========================================================================
    #    general stuff:
    # ==========================================================================
@@ -460,6 +485,7 @@ class LinearProblem(object):
      :rtype: ``str``
      """
      return "<LinearProblem %d>"%id(self)
+
    # ==========================================================================
    #    debug :
    # ==========================================================================
@@ -515,6 +541,7 @@ class LinearProblem(object):
            self.__COEFFICIENTS[name]=type
            self.__COEFFICIENTS[name].resetValue()
            self.trace("coefficient %s has been introduced."%name)
+
    def resetRightHandSideCoefficients(self):
        """
        Resets all coefficients defining the right hand side
@@ -532,6 +559,7 @@ class LinearProblem(object):
      :rtype: `Domain`
      """
      return self.__domain
+
    def getDomainStatus(self):
      """
      Return the status indicator of the domain
@@ -543,6 +571,7 @@ class LinearProblem(object):
      Return the domain status used to build the current system
      """
      return self.__system_status
+
    def setSystemStatus(self,status=None):
      """
      Sets the system status to ``status`` if ``status`` is not present the
@@ -655,6 +684,7 @@ class LinearProblem(object):
           self.__solver_options=options
        else:
           raise ValueError("options must be a SolverOptions object.")
+       self.__solver_options.setComplex(self.isComplex())
        self.__solver_options.setSymmetry(self.__sym)
 
    def getSolverOptions(self):
@@ -674,6 +704,43 @@ class LinearProblem(object):
       :rtype: ``bool``
       """
       return self.getSolverOptions().getSolverMethod() in [ SolverOptions.ROWSUM_LUMPING, SolverOptions.HRZ_LUMPING ]
+
+   def isComplex(self):
+       """
+       Returns true if this is a complex-valued LinearProblem, false if
+       real-valued.
+
+       :rtype: ``bool``
+       """
+       return self.__complex
+
+   def shouldPreservePreconditioner(self):
+       """
+       Returns true if the preconditioner / factorisation should be kept even
+       when resetting the operator.
+
+       :rtype: ``bool``
+       """
+       return self.__preservePreconditioner
+
+   def preservePreconditioner(self, preserve = True):
+       """
+       Notifies the PDE that the preconditioner should not be reset when
+       making changes to the operator.
+
+       Building the preconditioner data can be quite expensive (e.g. for
+       multigrid methods) so if it is known that changes to the operator are
+       going to be minor calling this method can speed up successive PDE
+       solves.
+
+       :note: Not all operator types support this.
+       :param preserve: if True, preconditioner will be preserved, otherwise
+                        it will be reset when making changes to the operator,
+                        which is the default behaviour.
+       :type preserve: ``bool``
+       """
+       self.__preservePreconditioner =  preserve
+
    # ==========================================================================
    #    symmetry  flag:
    # ==========================================================================
@@ -712,6 +779,7 @@ class LinearProblem(object):
       :note: The method overwrites the symmetry flag set by the solver options
       """
       self.getSolverOptions().setSymmetry(flag)
+
    # ==========================================================================
    # function space handling for the equation as well as the solution
    # ==========================================================================
@@ -750,7 +818,6 @@ class LinearProblem(object):
      self.setReducedOrderForSolutionTo(flag)
      self.setReducedOrderForEquationTo(flag)
 
-
    def setReducedOrderForSolutionOn(self):
      """
      Switches reduced order on for solution representation.
@@ -838,6 +905,7 @@ class LinearProblem(object):
         self.setReducedOrderForEquationOn()
      else:
         self.setReducedOrderForEquationOff()
+
    def getOperatorType(self):
       """
       Returns the current system type.
@@ -988,7 +1056,8 @@ class LinearProblem(object):
      :raise IllegalCoefficient: if ``name`` is not a coefficient of the PDE
      """
      if self.hasCoefficient(name):
-        return escore.Data(0.,self.getShapeOfCoefficient(name),self.getFunctionSpaceForCoefficient(name))
+        zero = 0.j if self.__COEFFICIENTS[name].isComplex() else 0.
+        return escore.Data(zero,self.getShapeOfCoefficient(name),self.getFunctionSpaceForCoefficient(name))
      else:
         raise IllegalCoefficient("illegal coefficient %s requested for general PDE."%name)
 
@@ -1162,20 +1231,22 @@ class LinearProblem(object):
        Returns an instance of a new right hand side.
        """
        self.trace("New right hand side is allocated.")
+       zero = 0.j if self.isComplex() else 0.
        if self.getNumEquations()>1:
-           return escore.Data(0.,(self.getNumEquations(),),self.getFunctionSpaceForEquation(),True)
+           return escore.Data(zero,(self.getNumEquations(),),self.getFunctionSpaceForEquation(),True)
        else:
-           return escore.Data(0.,(),self.getFunctionSpaceForEquation(),True)
+           return escore.Data(zero,(),self.getFunctionSpaceForEquation(),True)
 
    def createSolution(self):
        """
        Returns an instance of a new solution.
        """
        self.trace("New solution is allocated.")
-       if self.getNumSolutions()>1:
-           return escore.Data(0.,(self.getNumSolutions(),),self.getFunctionSpaceForSolution(),True)
+       zero = 0.j if self.isComplex() else 0.
+       if self.getNumSolutions() > 1:
+           return escore.Data(zero,(self.getNumSolutions(),),self.getFunctionSpaceForSolution(),True)
        else:
-           return escore.Data(0.,(),self.getFunctionSpaceForSolution(),True)
+           return escore.Data(zero,(),self.getFunctionSpaceForSolution(),True)
 
    def resetSolution(self):
        """
@@ -1197,6 +1268,7 @@ class LinearProblem(object):
           self.__solution_atol=self.getSolverOptions().getAbsoluteTolerance()
           self.validSolution()
        self.__solution=u
+
    def getCurrentSolution(self):
        """
        Returns the solution in its current state.
@@ -1237,7 +1309,7 @@ class LinearProblem(object):
                self.__operator.setToZero()
            else:
                if self.getOperatorType() == self.getRequiredOperatorType():
-                   self.__operator.resetValues()
+                   self.__operator.resetValues(self.shouldPreservePreconditioner())
                else:
                    self.__operator=self.createOperator()
                    self.__operator_type=self.getRequiredOperatorType()
@@ -1617,7 +1689,7 @@ class LinearPDE(LinearProblem):
 
    """
 
-   def __init__(self,domain,numEquations=None,numSolutions=None,debug=False):
+   def __init__(self,domain,numEquations=None,numSolutions=None,complex=False,debug=False):
      """
      Initializes a new linear PDE.
 
@@ -1631,35 +1703,35 @@ class LinearPDE(LinearProblem):
      :param debug: if True debug information is printed
 
      """
-     super(LinearPDE, self).__init__(domain,numEquations,numSolutions,debug)
+     super(LinearPDE, self).__init__(domain,numEquations,numSolutions,complex,debug)
      #
      #   the coefficients of the PDE:
      #
      self.introduceCoefficients(
-       A=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR),
-       B=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       C=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR),
-       D=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       X=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM),PDECoef.RIGHTHANDSIDE),
-       Y=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       d=PDECoef(PDECoef.BOUNDARY,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       y=PDECoef(PDECoef.BOUNDARY,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       d_contact=PDECoef(PDECoef.CONTACT,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       y_contact=PDECoef(PDECoef.CONTACT,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       A_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR),
-       B_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       C_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR),
-       D_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       X_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM),PDECoef.RIGHTHANDSIDE),
-       Y_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       d_reduced=PDECoef(PDECoef.BOUNDARY_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       y_reduced=PDECoef(PDECoef.BOUNDARY_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       d_contact_reduced=PDECoef(PDECoef.CONTACT_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       y_contact_reduced=PDECoef(PDECoef.CONTACT_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       d_dirac=PDECoef(PDECoef.DIRACDELTA,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR),
-       y_dirac=PDECoef(PDECoef.DIRACDELTA,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE),
-       r=PDECoef(PDECoef.SOLUTION,(PDECoef.BY_SOLUTION,),PDECoef.RIGHTHANDSIDE),
-       q=PDECoef(PDECoef.SOLUTION,(PDECoef.BY_SOLUTION,),PDECoef.BOTH) )
+       A=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR, complex),
+       B=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       C=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR, complex),
+       D=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       X=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,PDECoef.BY_DIM),PDECoef.RIGHTHANDSIDE, complex),
+       Y=PDECoef(PDECoef.INTERIOR,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       d=PDECoef(PDECoef.BOUNDARY,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       y=PDECoef(PDECoef.BOUNDARY,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       d_contact=PDECoef(PDECoef.CONTACT,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       y_contact=PDECoef(PDECoef.CONTACT,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       A_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR, complex),
+       B_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       C_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION,PDECoef.BY_DIM),PDECoef.OPERATOR, complex),
+       D_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       X_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_DIM),PDECoef.RIGHTHANDSIDE, complex),
+       Y_reduced=PDECoef(PDECoef.INTERIOR_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       d_reduced=PDECoef(PDECoef.BOUNDARY_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       y_reduced=PDECoef(PDECoef.BOUNDARY_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       d_contact_reduced=PDECoef(PDECoef.CONTACT_REDUCED,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       y_contact_reduced=PDECoef(PDECoef.CONTACT_REDUCED,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       d_dirac=PDECoef(PDECoef.DIRACDELTA,(PDECoef.BY_EQUATION,PDECoef.BY_SOLUTION),PDECoef.OPERATOR, complex),
+       y_dirac=PDECoef(PDECoef.DIRACDELTA,(PDECoef.BY_EQUATION,),PDECoef.RIGHTHANDSIDE, complex),
+       r=PDECoef(PDECoef.SOLUTION,(PDECoef.BY_SOLUTION,),PDECoef.RIGHTHANDSIDE, complex),
+       q=PDECoef(PDECoef.SOLUTION,(PDECoef.BY_SOLUTION,),PDECoef.BOTH, False) )
 
    def __str__(self):
      """
diff --git a/escriptcore/py_src/nonlinearPDE.py b/escriptcore/py_src/nonlinearPDE.py
index 07c2baa..342b6b2 100644
--- a/escriptcore/py_src/nonlinearPDE.py
+++ b/escriptcore/py_src/nonlinearPDE.py
@@ -811,8 +811,9 @@ class NonlinearPDE(object):
         res=ev.evaluate()
         if len(names)==1: res=[res]
         self.trace3("RHS expressions evaluated in %f seconds."%(time()-T0))
-        for i in range(len(names)):
-            self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
+        if self._debug > self.DEBUG2:
+            for i in range(len(names)):
+                self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
         coeffs_f=dict(zip(names,res))
         #
 
@@ -948,7 +949,7 @@ class NonlinearPDE(object):
         """
         ev=symb.Evaluator()
         names=[]
-        for name in expressions:
+        for name in sorted(expressions):
             if name in self.__COEFFICIENTS:
                 ev.addExpression(expressions[name])
                 names.append(name)
@@ -960,8 +961,9 @@ class NonlinearPDE(object):
         res=ev.evaluate()
         if len(names)==1: res=[res]
         self.trace3("RHS expressions evaluated in %f seconds."%(time()-T0))
-        for i in range(len(names)):
-            self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
+        if self._debug > self.DEBUG2:
+            for i in range(len(names)):
+                self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
         args=dict(zip(names,res))
         # reset coefficients may be set at previous calls:
         for n in self.__COEFFICIENTS:
@@ -975,7 +977,7 @@ class NonlinearPDE(object):
         """
         ev=symb.Evaluator()
         names=[]
-        for name in expressions:
+        for name in sorted(expressions):
             if not name in self.__COEFFICIENTS:
                 ev.addExpression(expressions[name])
                 names.append(name)
@@ -987,8 +989,9 @@ class NonlinearPDE(object):
         res=ev.evaluate()
         if len(names)==1: res=[res]
         self.trace3("Matrix expressions evaluated in %f seconds."%(time()-T0))
-        for i in range(len(names)):
-            self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
+        if self._debug > self.DEBUG2:
+            for i in range(len(names)):
+                self.trace3("util.Lsup(%s)=%s"%(names[i],util.Lsup(res[i])))
         self._lpde.setValue(**dict(zip(names,res)))
 
     def _updateLinearPDE(self, expressions, subs, **constants):
diff --git a/escriptcore/py_src/pdetools.py b/escriptcore/py_src/pdetools.py
index 8f0c8ff..aba1137 100644
--- a/escriptcore/py_src/pdetools.py
+++ b/escriptcore/py_src/pdetools.py
@@ -354,9 +354,9 @@ class Locator(object):
        if iterative:
            self.__id=[]
            for p in x:
-              self.__id.append(util.length(xxx-p[:self.__function_space.getDim()]).minGlobalDataPoint())
+              self.__id.append(util.length(xxx-p[:self.__function_space.getDim()]).internal_minGlobalDataPoint())
        else:
-           self.__id=util.length(xxx-x[:self.__function_space.getDim()]).minGlobalDataPoint()
+           self.__id=util.length(xxx-x[:self.__function_space.getDim()]).internal_minGlobalDataPoint()
 
      def __str__(self):
        """
@@ -460,7 +460,7 @@ def getInfLocator(arg):
     if not isinstance(arg, escore.Data):
        raise TypeError("getInfLocator: Unknown argument type.")
     a_inf=util.inf(arg)
-    loc=util.length(arg-a_inf).minGlobalDataPoint()     # This gives us the location but not coords
+    loc=util.length(arg-a_inf).internal_minGlobalDataPoint()     # This gives us the location but not coords
     x=arg.getFunctionSpace().getX()
     x_min=x.getTupleForGlobalDataPoint(*loc)
     return Locator(arg.getFunctionSpace(),x_min)
@@ -470,9 +470,9 @@ def getSupLocator(arg):
     Return a Locator for a point with the sup value over all arg.
     """
     if not isinstance(arg, escore.Data):
-       raise TypeError("getInfLocator: Unknown argument type.")
+       raise TypeError("getSupLocator: Unknown argument type.")
     a_inf=util.sup(arg)
-    loc=util.length(arg-a_inf).minGlobalDataPoint()     # This gives us the location but not coords
+    loc=util.length(arg-a_inf).internal_minGlobalDataPoint()     # This gives us the location but not coords
     x=arg.getFunctionSpace().getX()
     x_min=x.getTupleForGlobalDataPoint(*loc)
     return Locator(arg.getFunctionSpace(),x_min)
diff --git a/escriptcore/py_src/util.py b/escriptcore/py_src/util.py
index 01b8262..b29162d 100644
--- a/escriptcore/py_src/util.py
+++ b/escriptcore/py_src/util.py
@@ -41,10 +41,13 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 
 
 import math
-import numpy
+import cmath
 import os
 import warnings
+import numpy
 warnings.simplefilter('default', category=DeprecationWarning)
+# suppress the following which comes from sympy with python 3.5
+warnings.filterwarnings('ignore', category=DeprecationWarning, message='inspect.getargspec.*')
 
 from . import escriptcpp as escore
 from .escriptcpp import C_GeneralTensorProduct, Data
@@ -430,7 +433,7 @@ def Lsup(arg):
         return sup(abs(arg))
     elif isinstance(arg,escore.Data):
         return arg._Lsup()
-    elif isinstance(arg,float):
+    elif isinstance(arg,float) or isinstance(arg, complex):
         return abs(arg)
     elif isinstance(arg,int):
         return abs(float(arg))
@@ -455,6 +458,8 @@ def sup(arg):
         return arg
     elif isinstance(arg,int):
         return float(arg)
+    elif isinstance(arg,complex):
+        raise TypeError("sup:  Operation not supported for complex.")
     else:
         raise TypeError("sup: Unknown argument type.")
 
@@ -476,6 +481,8 @@ def inf(arg):
         return arg
     elif isinstance(arg,int):
         return float(arg)
+    elif isinstance(arg,complex):
+        raise TypeError("inf:  Operation not supported for complex.")
     else:
       raise TypeError("inf: Unknown argument type.")
 
@@ -499,9 +506,7 @@ def getRank(arg):
         return arg.ndim
     elif isinstance(arg,escore.Data):
         return arg.getRank()
-    elif isinstance(arg,float):
-        return 0
-    elif isinstance(arg,int):
+    elif isinstance(arg,float) or isinstance(arg,int) or isinstance(arg,complex):
         return 0
     elif isinstance(arg,sym.Symbol):
         return arg.getRank()
@@ -526,9 +531,7 @@ def getShape(arg):
         return numpy.array(arg).shape
     elif isinstance(arg,escore.Data):
         return arg.getShape()
-    elif isinstance(arg,float):
-        return ()
-    elif isinstance(arg,int):
+    elif isinstance(arg,float) or isinstance(arg,int) or isinstance(arg,complex):
         return ()
     elif isinstance(arg,sym.Symbol):
         return arg.getShape()
@@ -613,9 +616,7 @@ def testForZero(arg):
        return not Lsup(arg)>0.
     elif isinstance(arg,escore.Data):
        return False
-    elif isinstance(arg,float):
-       return not Lsup(arg)>0.
-    elif isinstance(arg,int):
+    elif isinstance(arg,float) or isinstance(arg,int) or isinstance(arg,complex):
        return not Lsup(arg)>0.
     else:
        return False
@@ -643,6 +644,8 @@ def matchType(arg0=0.,arg1=0.):
           arg1=numpy.array(arg1,dtype=numpy.float64)
        elif isinstance(arg1,int):
           arg1=numpy.array(float(arg1),dtype=numpy.float64)
+       elif isinstance(arg1,complex):
+          arg1=numpy.array(arg1, dtype=numpy.complex)
        elif isinstance(arg1,sym.Symbol):
           pass
        else:
@@ -652,7 +655,7 @@ def matchType(arg0=0.,arg1=0.):
           arg1=escore.Data(arg1,arg0.getFunctionSpace())
        elif isinstance(arg1,escore.Data):
           pass
-       elif isinstance(arg1,float):
+       elif isinstance(arg1,float) or isinstance(arg1,complex):
           arg1=escore.Data(arg1,(),arg0.getFunctionSpace())
        elif isinstance(arg1,int):
           arg1=escore.Data(float(arg1),(),arg0.getFunctionSpace())
@@ -665,6 +668,8 @@ def matchType(arg0=0.,arg1=0.):
           pass
        elif isinstance(arg1,escore.Data):
           pass
+       elif isinstance(arg1,complex):  
+          pass
        elif isinstance(arg1,float):
           pass
        elif isinstance(arg1,int):
@@ -673,6 +678,23 @@ def matchType(arg0=0.,arg1=0.):
           pass
        else:
           raise TypeError("function: Unknown type of second argument.")
+    elif isinstance(arg0,complex):
+       if isinstance(arg1,numpy.ndarray):
+          arg0=numpy.array(arg0,dtype=numpy.complex128)
+       elif isinstance(arg1,escore.Data):
+          arg0=escore.Data(arg0,arg1.getFunctionSpace())
+       elif isinstance(arg1,float):
+          arg0=numpy.array(arg0,dtype=numpy.complex)
+          arg1=numpy.array(arg1,dtype=numpy.complex)
+       elif isinstance(arg1,int):
+          arg0=numpy.array(arg0,dtype=numpy.complex)
+          arg1=numpy.array(float(arg1),dtype=numpy.complex)
+       elif isinstance(arg1,sym.Symbol):
+          pass
+       elif isinstance(arg1,complex):
+          pass
+       else:
+          raise TypeError("function: Unknown type of second argument.") 
     elif isinstance(arg0,float):
        if isinstance(arg1,numpy.ndarray):
           arg0=numpy.array(arg0,dtype=numpy.float64)
@@ -684,6 +706,9 @@ def matchType(arg0=0.,arg1=0.):
        elif isinstance(arg1,int):
           arg0=numpy.array(arg0,dtype=numpy.float64)
           arg1=numpy.array(float(arg1),dtype=numpy.float64)
+       elif isinstance(arg1,complex):
+          arg0=numpy.array(complex(arg0),dtype=numpy.complex)
+          arg1=numpy.array(complex(arg1),dtype=numpy.complex)    
        elif isinstance(arg1,sym.Symbol):
           pass
        else:
@@ -696,6 +721,9 @@ def matchType(arg0=0.,arg1=0.):
        elif isinstance(arg1,float):
           arg0=numpy.array(float(arg0),dtype=numpy.float64)
           arg1=numpy.array(arg1,dtype=numpy.float64)
+       elif isinstance(arg1,complex):
+          arg0=numpy.array(complex(arg0),dtype=numpy.complex)
+          arg1=numpy.array(complex(arg1),dtype=numpy.complex)          
        elif isinstance(arg1,int):
           arg0=numpy.array(float(arg0),dtype=numpy.float64)
           arg1=numpy.array(float(arg1),dtype=numpy.float64)
@@ -744,10 +772,10 @@ def log10(arg):
       return numpy.log10(arg)
    elif isinstance(arg,escore.Data):
       return arg._log10()
-   elif isinstance(arg,float):
+   elif isinstance(arg,complex):
+      return cmath.log10(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.log10(arg)
-   elif isinstance(arg,int):
-      return math.log10(float(arg))
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.log10)
    else:
@@ -781,6 +809,8 @@ def wherePositive(arg):
         return 0.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.wherePositive)
+   elif isinstance(arg,complex):
+      raise TypeError("wherePositive: operation not supported for complex");
    else:
       raise TypeError("wherePositive: Unknown argument type.")
 
@@ -812,6 +842,8 @@ def whereNegative(arg):
         return 0.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.whereNegative)
+   elif isinstance(arg,complex):
+      raise TypeError("whereNegative: operation not supported for complex");
    else:
       raise TypeError("whereNegative: Unknown argument type.")
 
@@ -843,6 +875,8 @@ def whereNonNegative(arg):
         return 1.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.whereNonNegative)
+   elif isinstance(arg,complex):
+      raise TypeError("whereNonNegative: operation not supported for complex");
    else:
       raise TypeError("whereNonNegative: Unknown argument type.")
 
@@ -874,6 +908,8 @@ def whereNonPositive(arg):
         return 1.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.whereNonPositive)
+   elif isinstance(arg,complex):
+      raise TypeError("whereNonPositive: operation not supported for complex");
    else:
       raise TypeError("whereNonPositive: Unknown argument type.")
 
@@ -902,16 +938,11 @@ def whereZero(arg,tol=None,rtol=math.sqrt(EPSILON)):
       return out
    elif isinstance(arg,escore.Data):
       return arg._whereZero(tol)
-   elif isinstance(arg,float):
+   elif isinstance(arg,float) or isinstance(arg,complex) or isinstance(arg, int):
       if abs(arg)<=tol:
         return 1.
       else:
         return 0.
-   elif isinstance(arg,int):
-      if abs(float(arg))<=tol:
-        return 1.
-      else:
-        return 0.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.whereZero)
    else:
@@ -940,16 +971,11 @@ def whereNonZero(arg,tol=0.):
       return out
    elif isinstance(arg,escore.Data):
       return arg._whereNonZero(tol)
-   elif isinstance(arg,float):
+   elif isinstance(arg,float) or isinstance(arg,complex) or isinstance(arg, int):
       if abs(arg)>tol:
         return 1.
       else:
         return 0.
-   elif isinstance(arg,int):
-      if abs(float(arg))>tol:
-        return 1.
-      else:
-        return 0.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.whereNonZero)
    else:
@@ -1001,9 +1027,9 @@ def sin(arg):
       return numpy.sin(arg)
    elif isinstance(arg,escore.Data):
       return arg._sin()
-   elif isinstance(arg,float):
-      return math.sin(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.sin(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.sin(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.sin)
@@ -1024,9 +1050,9 @@ def cos(arg):
       return numpy.cos(arg)
    elif isinstance(arg,escore.Data):
       return arg._cos()
-   elif isinstance(arg,float):
-      return math.cos(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.cos(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.cos(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.cos)
@@ -1047,9 +1073,9 @@ def tan(arg):
       return numpy.tan(arg)
    elif isinstance(arg,escore.Data):
       return arg._tan()
-   elif isinstance(arg,float):
-      return math.tan(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.tan(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.tan(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.tan)
@@ -1070,9 +1096,9 @@ def asin(arg):
       return numpy.arcsin(arg)
    elif isinstance(arg,escore.Data):
       return arg._asin()
-   elif isinstance(arg,float):
-      return math.asin(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.asin(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.asin(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.asin)
@@ -1093,9 +1119,9 @@ def acos(arg):
       return numpy.arccos(arg)
    elif isinstance(arg,escore.Data):
       return arg._acos()
-   elif isinstance(arg,float):
-      return math.acos(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.acos(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.acos(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.acos)
@@ -1116,9 +1142,9 @@ def atan(arg):
       return numpy.arctan(arg)
    elif isinstance(arg,escore.Data):
       return arg._atan()
-   elif isinstance(arg,float):
-      return math.atan(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.atan(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.atan(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.atan)
@@ -1147,9 +1173,9 @@ def sinh(arg):
       return numpy.sinh(arg)
    elif isinstance(arg,escore.Data):
       return arg._sinh()
-   elif isinstance(arg,float):
-      return math.sinh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.sinh(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.sinh(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.sinh)
@@ -1170,9 +1196,9 @@ def cosh(arg):
       return numpy.cosh(arg)
    elif isinstance(arg,escore.Data):
       return arg._cosh()
-   elif isinstance(arg,float):
-      return math.cosh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.cosh(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.cosh(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.cosh)
@@ -1193,9 +1219,9 @@ def tanh(arg):
       return numpy.tanh(arg)
    elif isinstance(arg,escore.Data):
       return arg._tanh()
-   elif isinstance(arg,float):
-      return math.tanh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.tanh(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.tanh(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.tanh)
@@ -1216,9 +1242,9 @@ def asinh(arg):
       return numpy.arcsinh(arg)
    elif isinstance(arg,escore.Data):
       return arg._asinh()
-   elif isinstance(arg,float):
-      return numpy.arcsinh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return numpy.arcsinh(complex(arg))
+   elif isinstance(arg,float) or isinstance(arg,int):
       return numpy.arcsinh(float(arg))
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.asinh)
@@ -1239,9 +1265,9 @@ def acosh(arg):
       return numpy.arccosh(arg)
    elif isinstance(arg,escore.Data):
       return arg._acosh()
-   elif isinstance(arg,float):
-      return numpy.arccosh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return numpy.arccosh(complex(arg))
+   elif isinstance(arg,float) or isinstance(arg,int):
       return numpy.arccosh(float(arg))
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.acosh)
@@ -1262,9 +1288,9 @@ def atanh(arg):
       return numpy.arctanh(arg)
    elif isinstance(arg,escore.Data):
       return arg._atanh()
-   elif isinstance(arg,float):
-      return numpy.arctanh(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return numpy.arctanh(complex(arg))
+   elif isinstance(arg,float) or isinstance(arg,int):
       return numpy.arctanh(float(arg))
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.atanh)
@@ -1285,9 +1311,9 @@ def exp(arg):
       return numpy.exp(arg)
    elif isinstance(arg,escore.Data):
       return arg._exp()
-   elif isinstance(arg,float):
-      return math.exp(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.exp(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.exp(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.exp)
@@ -1308,9 +1334,9 @@ def sqrt(arg):
       return numpy.sqrt(arg)
    elif isinstance(arg,escore.Data):
       return arg._sqrt()
-   elif isinstance(arg,float):
-      return math.sqrt(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.sqrt(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.sqrt(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.sqrt)
@@ -1331,9 +1357,9 @@ def log(arg):
       return numpy.log(arg)
    elif isinstance(arg,escore.Data):
       return arg._log()
-   elif isinstance(arg,float):
-      return math.log(arg)
-   elif isinstance(arg,int):
+   elif isinstance(arg,complex):
+      return cmath.log(arg)
+   elif isinstance(arg,float) or isinstance(arg,int):
       return math.log(arg)
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.log)
@@ -1354,20 +1380,13 @@ def sign(arg):
       return wherePositive(arg)-whereNegative(arg)
    elif isinstance(arg,escore.Data):
       return arg._sign()
-   elif isinstance(arg,float):
+   elif isinstance(arg,float) or isinstance(arg,complex) or isinstance(arg,int):
       if arg>0:
         return 1.
       elif arg<0:
         return -1.
       else:
         return 0.
-   elif isinstance(arg,int):
-      if float(arg)>0:
-        return 1.
-      elif float(arg)<0:
-        return -1.
-      else:
-        return 0.
    elif isinstance(arg,sym.Symbol):
       return arg.applyfunc(sym.symfn.sign)
    else:
@@ -1486,6 +1505,8 @@ def trace(arg,axis_offset=0):
       if not s[axis_offset] == s[axis_offset+1]:
         raise ValueError("dimensions of component %d and %d must match."%(axis_offset,axis_offset+1))
       return arg.trace(axis_offset)
+   elif isinstance(arg,complex):
+      raise TypeError("illegal argument type complex.")
    elif isinstance(arg,float):
       raise TypeError("illegal argument type float.")
    elif isinstance(arg,int):
@@ -1519,6 +1540,10 @@ def transpose(arg,axis_offset=None):
       if axis_offset<0 or axis_offset>r:
         raise ValueError("axis_offset must be between 0 and %s"%r)
       return arg._transpose(axis_offset)
+   elif isinstance(arg,complex):
+      if not ( axis_offset==0 or axis_offset is None):
+        raise ValueError("axis_offset must be 0 for complex argument")
+      return arg
    elif isinstance(arg,float):
       if not ( axis_offset==0 or axis_offset is None):
         raise ValueError("axis_offset must be 0 for float argument")
@@ -1560,6 +1585,8 @@ def swap_axes(arg,axis0=0,axis1=1):
       return arg._swap_axes(axis0,axis1)
    elif isinstance(arg,sym.Symbol):
       return arg.swap_axes(axis0,axis1)
+   elif isinstance(arg,complex):
+      raise TypeError("complex argument is not supported.")
    elif isinstance(arg,float):
       raise TypeError("float argument is not supported.")
    elif isinstance(arg,int):
@@ -1608,6 +1635,8 @@ def symmetric(arg):
         else:
             raise ValueError("symmetric: rank 2 or 4 is required.")
         return (arg+transpose(arg))/2
+    elif isinstance(arg,complex):
+      return arg
     elif isinstance(arg,float):
       return arg
     elif isinstance(arg,int):
@@ -1617,52 +1646,161 @@ def symmetric(arg):
 
 def nonsymmetric(arg):
     """
-    Returns the non-symmetric part of the square matrix ``arg``. That is,
+    Deprecated alias for antisymmetric
+    """
+    return antisymmetric(arg)
+
+def antisymmetric(arg):
+    """
+    Returns the anti-symmetric part of the square matrix ``arg``. That is,
     *(arg-transpose(arg))/2*.
 
     :param arg: input matrix. Must have rank 2 or 4 and be square.
     :type arg: ``numpy.ndarray``, `escript.Data`, `Symbol`
-    :return: non-symmetric part of ``arg``
+    :return: anti-symmetric part of ``arg``
     :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
     """
     if isinstance(arg,numpy.ndarray):
       if arg.ndim==2:
         if not (arg.shape[0]==arg.shape[1]):
-           raise ValueError("nonsymmetric: argument must be square.")
+           raise ValueError("antisymmetric: argument must be square.")
       elif arg.ndim==4:
         if not (arg.shape[0]==arg.shape[2] and arg.shape[1]==arg.shape[3]):
-           raise ValueError("nonsymmetric: argument must be square.")
+           raise ValueError("antisymmetric: argument must be square.")
       else:
-        raise ValueError("nonsymmetric: rank 2 or 4 is required.")
+        raise ValueError("antisymmetric: rank 2 or 4 is required.")
       return (arg-transpose(arg))/2
     elif isinstance(arg,escore.Data):
       if arg.getRank()==2:
         if not (arg.getShape()[0]==arg.getShape()[1]):
            raise ValueError("argument must be square.")
-        return arg._nonsymmetric()
+        return arg._antisymmetric()
       elif arg.getRank()==4:
         if not (arg.getShape()[0]==arg.getShape()[2] and arg.getShape()[1]==arg.getShape()[3]):
            raise ValueError("argument must be square.")
-        return arg._nonsymmetric()
+        return arg._antisymmetric()
       else:
         raise ValueError("rank 2 or 4 is required.")
     elif isinstance(arg, sym.Symbol):
         if arg.getRank()==2:
             if arg.getShape()[0]!=arg.getShape()[1]:
-                raise ValueError("nonsymmetric: argument must be square.")
+                raise ValueError("antisymmetric: argument must be square.")
         elif arg.getRank()==4:
             if arg.getShape()[0]!=arg.getShape()[2] or arg.getShape()[1]!=arg.getShape()[3]:
-                raise ValueError("nonsymmetric: argument must be square.")
+                raise ValueError("antisymmetric: argument must be square.")
         else:
-            raise ValueError("nonsymmetric: rank 2 or 4 is required.")
+            raise ValueError("antisymmetric: rank 2 or 4 is required.")
         return (arg-transpose(arg))/2
+    elif isinstance(arg,complex):
+        return complex(0)
     elif isinstance(arg,float):
-        return arg
+        return float(0)
     elif isinstance(arg,int):
-        return float(arg)
+        return float(0)
+    else:
+        raise TypeError("antisymmetric: Unknown argument type.")
+
+def hermitian(arg):
+    """
+    Returns the hermitian part of the square matrix ``arg``. That is,
+    *(arg+adjoint(arg))/2*.
+
+    :param arg: input matrix. Must have rank 2 or 4 and be square.
+    :type arg: ``numpy.ndarray``, `escript.Data`, `Symbol`
+    :return: hermitian part of ``arg``
+    :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
+    """
+    if isinstance(arg,numpy.ndarray):
+      if arg.ndim==2:
+        if not (arg.shape[0]==arg.shape[1]):
+           raise ValueError("argument must be square.")
+      elif arg.ndim==4:
+        if not (arg.shape[0]==arg.shape[2] and arg.shape[1]==arg.shape[3]):
+           raise ValueError("argument must be square.")
+      else:
+        raise ValueError("rank 2 or 4 is required.")
+      return (arg+arg.transpose().conj())/2
+    elif isinstance(arg,escore.Data):
+      if arg.getRank()==2:
+        if not (arg.getShape()[0]==arg.getShape()[1]):
+           raise ValueError("argument must be square.")
+        return arg._hermitian()
+      elif arg.getRank()==4:
+        if not (arg.getShape()[0]==arg.getShape()[2] and arg.getShape()[1]==arg.getShape()[3]):
+           raise ValueError("argument must be square.")
+        return arg._hermitian()
+      else:
+        raise ValueError("rank 2 or 4 is required.")
+    elif isinstance(arg, sym.Symbol):
+        if arg.getRank()==2:
+            if arg.getShape()[0]!=arg.getShape()[1]:
+                raise ValueError("hermitian: argument must be square.")
+        elif arg.getRank()==4:
+            if arg.getShape()[0]!=arg.getShape()[2] or arg.getShape()[1]!=arg.getShape()[3]:
+                raise ValueError("hermitian: argument must be square.")
+        else:
+            raise ValueError("hermitian: rank 2 or 4 is required.")
+        return (arg+adjoint(arg))/2
+    elif isinstance(arg,complex):
+      return complex(arg.real)
+    elif isinstance(arg,float):
+      return arg
+    elif isinstance(arg,int):
+      return float(arg)
     else:
-        raise TypeError("nonsymmetric: Unknown argument type.")
+      raise TypeError("hermitian: Unknown argument type.")
 
+def antihermitian(arg):
+    """
+    Returns the anti-symmetric part of the square matrix ``arg``. That is,
+    *(arg-adjoint(arg))/2*.
+
+    :param arg: input matrix. Must have rank 2 or 4 and be square.
+    :type arg: ``numpy.ndarray``, `escript.Data`, `Symbol`
+    :return: anti-hermitian part of ``arg``
+    :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
+    """
+    if isinstance(arg,numpy.ndarray):
+      if arg.ndim==2:
+        if not (arg.shape[0]==arg.shape[1]):
+           raise ValueError("antihermitian: argument must be square.")
+      elif arg.ndim==4:
+        if not (arg.shape[0]==arg.shape[2] and arg.shape[1]==arg.shape[3]):
+           raise ValueError("antihermitian: argument must be square.")
+      else:
+        raise ValueError("antihermitian: rank 2 or 4 is required.")
+      return (arg-arg.transpose().conj())/2
+    elif isinstance(arg,escore.Data):
+      if arg.getRank()==2:
+        if not (arg.getShape()[0]==arg.getShape()[1]):
+           raise ValueError("argument must be square.")
+        return arg._antihermitian()
+      elif arg.getRank()==4:
+        if not (arg.getShape()[0]==arg.getShape()[2] and arg.getShape()[1]==arg.getShape()[3]):
+           raise ValueError("argument must be square.")
+        return arg._antihermitian()
+      else:
+        raise ValueError("rank 2 or 4 is required.")
+    elif isinstance(arg, sym.Symbol):
+        if arg.getRank()==2:
+            if arg.getShape()[0]!=arg.getShape()[1]:
+                raise ValueError("antihermitian: argument must be square.")
+        elif arg.getRank()==4:
+            if arg.getShape()[0]!=arg.getShape()[2] or arg.getShape()[1]!=arg.getShape()[3]:
+                raise ValueError("antihermitian: argument must be square.")
+        else:
+            raise ValueError("antihermitian: rank 2 or 4 is required.")
+        return (arg-hermitian(arg))/2
+    elif isinstance(arg,complex):
+        return complex(arg.imag*1j)
+    elif isinstance(arg,float):
+        return float(0)
+    elif isinstance(arg,int):
+        return float(0)
+    else:
+        raise TypeError("antihermitian: Unknown argument type.")        
+        
+        
 def inverse(arg):
     """
     Returns the inverse of the square matrix ``arg``.
@@ -1680,6 +1818,8 @@ def inverse(arg):
       return numpy.linalg.tensorinv(arg,ind=1)
     elif isinstance(arg,escore.Data):
       return escript_inverse(arg)
+    elif isinstance(arg,complex):
+      return 1./arg
     elif isinstance(arg,float):
       return 1./arg
     elif isinstance(arg,int):
@@ -1762,6 +1902,8 @@ def eigenvalues(arg):
       return out
     elif isinstance(arg,escore.Data):
       return arg._eigenvalues()
+    elif isinstance(arg,complex):
+      return arg
     elif isinstance(arg,float):
       return arg
     elif isinstance(arg,int):
@@ -1790,6 +1932,8 @@ def eigenvalues_and_eigenvectors(arg):
       raise TypeError("eigenvalues_and_eigenvectors does not support numpy.ndarray arguments")
     elif isinstance(arg,escore.Data):
       return arg._eigenvalues_and_eigenvectors()
+    elif isinstance(arg,complex):
+      return (numpy.array([[arg]],numpy.complex_),numpy.ones((1,1),numpy.complex_))
     elif isinstance(arg,float):
       return (numpy.array([[arg]],numpy.float_),numpy.ones((1,1),numpy.float_))
     elif isinstance(arg,int):
@@ -2094,7 +2238,8 @@ def generalTensorProduct(arg0,arg1,axis_offset=0):
              point
     :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
     """
-    if isinstance(arg0,float) and isinstance(arg1,float): return arg1*arg0
+    if (isinstance(arg0,float) or isinstance(arg0,complex)) and (isinstance(arg1,float) or isinstance(arg1,complex)):
+         return arg1*arg0
     arg0,arg1=matchType(arg0,arg1)
     # at this stage arg0 and arg1 are both numpy.ndarray or escript.Data,
     # or one is a Symbol and the other either of the allowed types
@@ -2121,7 +2266,10 @@ def generalTensorProduct(arg0,arg1,axis_offset=0):
        for i in sh1[:axis_offset]: d01*=i
        arg0_c.resize((d0,d01))
        arg1_c.resize((d01,d1))
-       out=numpy.zeros((d0,d1),numpy.float64)
+       if arg0_c.dtype!=numpy.float64:
+           out=numpy.zeros((d0,d1),arg0_c.dtype)
+       else:
+           out=numpy.zeros((d0,d1),numpy.float64)
        for i0 in range(d0):
           for i1 in range(d1):
              out[i0,i1]=numpy.sum(arg0_c[i0,:]*arg1_c[:,i1])
@@ -2241,7 +2389,7 @@ def generalTransposedTensorProduct(arg0,arg1,axis_offset=0):
              each data point
     :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
     """
-    if isinstance(arg0,float) and isinstance(arg1,float): return arg1*arg0
+    if (isinstance(arg0,float) and isinstance(arg1,float)) or (isinstance(arg0,complex) and isinstance(arg1,complex)): return arg1*arg0
     arg0,arg1=matchType(arg0,arg1)
     # at this stage arg0 and arg1 are both numpy.ndarray or escript.Data,
     # or one is a Symbol and the other either of the allowed types
@@ -2377,7 +2525,9 @@ def generalTensorTransposedProduct(arg0,arg1,axis_offset=0):
              each data point
     :rtype: ``numpy.ndarray``, `escript.Data`, `Symbol` depending on the input
     """
-    if isinstance(arg0,float) and isinstance(arg1,float): return arg1*arg0
+    if ((isinstance(arg0,float) or isinstance(arg0,complex)) and 
+        (isinstance(arg1,float) or isinstance(arg1,complex))):
+            return arg1*arg0
     arg0,arg1=matchType(arg0,arg1)
     # at this stage arg0 and arg1 are both numpy.ndarray or escript.Data,
     # or one is a Symbol and the other either of the allowed types
@@ -2387,7 +2537,7 @@ def generalTensorTransposedProduct(arg0,arg1,axis_offset=0):
        r1=getRank(arg1)
        if not sh0[arg0.getRank()-axis_offset:]==sh1[r1-axis_offset:]:
           raise ValueError("dimensions of last %s components in left argument don't match the first %s components in the right argument."%(axis_offset,axis_offset))
-       if isinstance(arg1,float):
+       if isinstance(arg1,float) or isinstance(arg1,complex):
           return arg0*arg1
        elif isinstance(arg1,numpy.ndarray) or isinstance(arg1, sym.Symbol):
           return arg0.tensorTransposedProduct(arg1, axis_offset)
@@ -2690,36 +2840,38 @@ def mkDir(*pathname):
     :type pathname: ``str`` or ``sequence of strings``
     :note: The method is MPI safe.
     """
-    errno=0
-    p_fail=None
-    if getMPIRankWorld()==0:
+    errno = 0
+    p_fail = None
+    ex = None
+    if getMPIRankWorld() == 0:
       for p in pathname:
        if os.path.exists(p):
           if not os.path.isdir(p):
-                errno=2
-                p_fail=p
+                errno = 2
+                p_fail = p
        else:
           try:
               os.makedirs(p)
           except Exception as e:
-              errno=1
-              p_fail=p
+              ex = e
+              errno = 1
+              p_fail = p
     
-    errno=getMPIWorldMax(errno)
-    if errno>0:
+    errno = getMPIWorldMax(errno)
+    if errno > 0:
          if errno==2:
             if p_fail is None:
                raise IOError("Unable to create directory.")
             else:
                raise IOError("Unable to create directory %s. It already exists and is not a directory."%p_fail)
-         elif e is None:
+         elif ex is None:
             if p_fail is None:
                raise IOError("Unable to create directory.")
             else:
                raise IOError("Unable to create directory %s."%p_fail)
          else:
-            if hasattr(e,"message"):
-               raise IOError(e.message)
+            if len(str(ex)) > 0:
+               raise IOError(str(ex))
             else:
                if p_fail is None:
                   raise IOError("Unable to create directory.")
diff --git a/escriptcore/src/AbstractContinuousDomain.cpp b/escriptcore/src/AbstractContinuousDomain.cpp
index a02c252..2d52e2c 100644
--- a/escriptcore/src/AbstractContinuousDomain.cpp
+++ b/escriptcore/src/AbstractContinuousDomain.cpp
@@ -14,8 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
 #include "AbstractContinuousDomain.h"
 #include "Data.h"
 
@@ -188,16 +186,16 @@ ATP_ptr AbstractContinuousDomain::newTransportProblem(
   return ATP_ptr();
 }
 
-dim_t AbstractContinuousDomain::getNumDataPointsGlobal() const
+DataTypes::dim_t AbstractContinuousDomain::getNumDataPointsGlobal() const
 {
   throwStandardException("AbstractContinuousDomain::getNumDataPointsGlobal");
   return 1;
 }
 
-std::pair<int,dim_t> AbstractContinuousDomain::getDataShape(int functionSpaceCode) const
+std::pair<int,DataTypes::dim_t> AbstractContinuousDomain::getDataShape(int functionSpaceCode) const
 {
   throwStandardException("AbstractContinuousDomain::getDataShape");
-  return std::pair<int,dim_t>(0,0);
+  return std::pair<int,DataTypes::dim_t>(0,0);
 }
 
 void AbstractContinuousDomain::setNewX(const escript::Data& arg)
@@ -213,6 +211,5 @@ void AbstractContinuousDomain::Print_Mesh_Info(const bool full) const
 }
 
 
-
-
 }  // end of namespace
+
diff --git a/escriptcore/src/AbstractContinuousDomain.h b/escriptcore/src/AbstractContinuousDomain.h
index 7a5fc9b..ba2b7cb 100644
--- a/escriptcore/src/AbstractContinuousDomain.h
+++ b/escriptcore/src/AbstractContinuousDomain.h
@@ -15,18 +15,17 @@
 *****************************************************************************/
 
 
-#if !defined escript_AbstractContinuousDomain_20040528_H
-#define escript_AbstractContinuousDomain_20040528_H
+#ifndef __ESCRIPT_ABSTRACTCONTINUOUSDOMAIN_H__
+#define __ESCRIPT_ABSTRACTCONTINUOUSDOMAIN_H__
 
 #include "system_dep.h"
 #include "AbstractDomain.h"
+#include "AbstractSystemMatrix.h"
+#include "AbstractTransportProblem.h"
 
 #include <string>
 #include <vector>
 
-#include "AbstractSystemMatrix.h"
-#include "AbstractTransportProblem.h"
-
 namespace escript {
 
 //
@@ -272,14 +271,14 @@ class ESCRIPT_DLL_API AbstractContinuousDomain : public AbstractDomain
      \brief
      Return the number of data points summed across all MPI processes
   */
-  virtual dim_t getNumDataPointsGlobal() const;
+  virtual DataTypes::dim_t getNumDataPointsGlobal() const;
 
   /**
      \brief
      Return the number of data points per sample, and the number of samples as a pair.
      \param functionSpaceCode Input -
   */
-  virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
+  virtual std::pair<int,DataTypes::dim_t> getDataShape(int functionSpaceCode) const;
 
   /**
      \brief
@@ -292,14 +291,9 @@ class ESCRIPT_DLL_API AbstractContinuousDomain : public AbstractDomain
      \param full
   */
   virtual void Print_Mesh_Info(const bool full=false) const;
-
-
- protected:
-
- private:
-
 };
 
 } // end of namespace
 
-#endif
+#endif // __ESCRIPT_ABSTRACTCONTINUOUSDOMAIN_H__
+
diff --git a/escriptcore/src/AbstractDomain.cpp b/escriptcore/src/AbstractDomain.cpp
index 6e8662b..6c1515d 100644
--- a/escriptcore/src/AbstractDomain.cpp
+++ b/escriptcore/src/AbstractDomain.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "AbstractDomain.h" 
 #include "DomainException.h"
 
diff --git a/escriptcore/src/AbstractDomain.h b/escriptcore/src/AbstractDomain.h
index da30292..cc30b72 100644
--- a/escriptcore/src/AbstractDomain.h
+++ b/escriptcore/src/AbstractDomain.h
@@ -19,9 +19,9 @@
 #define __ESCRIPT_ABSTRACTDOMAIN_H__
 
 #include "system_dep.h"
-#include "Pointers.h"
 #include "DataTypes.h"
-#include <esysUtils/Esys_MPI.h>
+#include "EsysMPI.h"
+#include "Pointers.h"
 
 #include <boost/python/tuple.hpp>
 
@@ -65,6 +65,12 @@ public:
 
     /**
      \brief
+     returns a shared pointer to the MPI information wrapper for this domain
+    */
+    virtual JMPI getMPI() const = 0;
+
+    /**
+     \brief
      return the number of processors used for this domain
     */
     virtual int getMPISize() const = 0;
@@ -161,7 +167,7 @@ public:
         \return pair, first - number of data points per sample,
                 second - number of samples
     */
-    virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const = 0;
+    virtual std::pair<int,DataTypes::dim_t> getDataShape(int functionSpaceCode) const = 0;
 
     /**
        \brief
@@ -169,7 +175,7 @@ public:
        \param functionSpaceType Input - The function space type.
        \param sampleNo Input - The sample number.
     */
-    virtual int getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const = 0;
+    virtual int getTagFromSampleNo(int functionSpaceType, DataTypes::index_t sampleNo) const = 0;
 
     /**
        \brief
@@ -205,7 +211,7 @@ public:
        \param functionSpaceType Input - The function space type.
     */
     ESCRIPT_DLL_API
-    virtual const dim_t* borrowSampleReferenceIDs(int functionSpaceType) const = 0;
+    virtual const DataTypes::dim_t* borrowSampleReferenceIDs(int functionSpaceType) const = 0;
 
     /**
        \brief
@@ -326,7 +332,7 @@ public:
     \brief True if this rank owns the sample(id)
     Must be implemented by the Domain adapter
     */
-    virtual bool ownSample(int fs_code, index_t id) const = 0;
+    virtual bool ownSample(int fs_code, DataTypes::index_t id) const = 0;
 
     /**
        \brief
diff --git a/escriptcore/src/AbstractReducer.cpp b/escriptcore/src/AbstractReducer.cpp
index fd45e8d..054ed9f 100644
--- a/escriptcore/src/AbstractReducer.cpp
+++ b/escriptcore/src/AbstractReducer.cpp
@@ -13,21 +13,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include <sstream>
-#include <limits>
-#include <boost/python/extract.hpp>
-#include <boost/scoped_array.hpp>
-
 #include "AbstractReducer.h"
 #include "SplitWorldException.h"
 
-using namespace boost::python;
-using namespace escript;
-
+namespace escript {
 
 const int AbstractReducer::PARAMTAG=120567;
 
@@ -56,3 +45,5 @@ bool AbstractReducer::canClash()
     return false;
 }
 
+} // namespace escript
+
diff --git a/escriptcore/src/AbstractReducer.h b/escriptcore/src/AbstractReducer.h
index 4652cc7..3214537 100644
--- a/escriptcore/src/AbstractReducer.h
+++ b/escriptcore/src/AbstractReducer.h
@@ -13,11 +13,12 @@
 *
 *****************************************************************************/
 
-#ifndef __ESCRIPT_REDUCER_H__
-#define __ESCRIPT_REDUCER_H__
+#ifndef __ESCRIPT_ABSTRACTREDUCER_H__
+#define __ESCRIPT_ABSTRACTREDUCER_H__
+
+#include <escript/Data.h>
+#include <escript/EsysMPI.h>
 
-#include "esysUtils/Esys_MPI.h"
-#include "escript/Data.h"
 #include <boost/shared_ptr.hpp>
 
 namespace escript
@@ -28,86 +29,85 @@ namespace reducerstatus
 {
   
 // Because these may be used in loops, the values must form a contiguous block (except ERROR)  
-const unsigned char NONE=0;  	// I have no value for this var and no interest in it
-const unsigned char INTERESTED=1;	// I am interested in this variable but I have no value for it
-const unsigned char OLD=2;	// I have a copy from elsewhere but no new values to contribute
-const unsigned char OLDINTERESTED=3;	// interested but only have a cached copy (no new values)
-const unsigned char NEW=4;	// I have a new value for this variable
-const unsigned char ERROR='!';	// Something bad happened  
+const unsigned char NONE=0;     // I have no value for this var and no interest in it
+const unsigned char INTERESTED=1;   // I am interested in this variable but I have no value for it
+const unsigned char OLD=2;  // I have a copy from elsewhere but no new values to contribute
+const unsigned char OLDINTERESTED=3;    // interested but only have a cached copy (no new values)
+const unsigned char NEW=4;  // I have a new value for this variable
+const unsigned char ERROR='!';  // Something bad happened  
 }
   
 // There is currently no way to get a completely generic result out of this
 class AbstractReducer
 {
 public:
-    virtual ~AbstractReducer(){};
-	// Is the value compatible with this reduction function?
-	// does not guarantee the value is compatible with
-	// other values added so far
+    virtual ~AbstractReducer() {}
+    // Is the value compatible with this reduction function?
+    // does not guarantee the value is compatible with
+    // other values added so far
     virtual bool valueCompatible(boost::python::object v)=0;
-	// merge the parameter with the answer we already have
+    // merge the parameter with the answer we already have
     virtual bool reduceLocalValue(boost::python::object v, std::string& errstring)=0;
-	// clear previous result ready for a new set of reductions
+    // clear previous result ready for a new set of reductions
     virtual void reset()=0;
     
     virtual std::string description()=0;
     
-	// converse with other subworlds to ensure subtype information matches
-	// The main problem case here would be Data on different function spaces
-	// same communicator requirements for reduceRemoteValues
-	// Must give the same answer when called on any process in the subworlds
-	// Must only be called on 
-    virtual bool checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring)=0;
-	// Some reducers need to know what domain they are operating in
-    virtual void setDomain(Domain_ptr dom){} 
+    // converse with other subworlds to ensure subtype information matches
+    // The main problem case here would be Data on different function spaces
+    // same communicator requirements for reduceRemoteValues
+    // Must give the same answer when called on any process in the subworlds
+    // Must only be called on 
+    virtual bool checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring)=0;
+    // Some reducers need to know what domain they are operating in
+    virtual void setDomain(Domain_ptr dom) {} 
     
 
 #ifdef ESYS_MPI  
-	// send from proc 0 in the communicator to all others
-	// second param is true if we have rank o
+    // send from proc 0 in the communicator to all others
+    // second param is true if we have rank o
     virtual bool groupSend(MPI_Comm& com, bool imsending)=0;
     
-	// reduction with some procs submitting identity values
+    // reduction with some procs submitting identity values
     virtual bool groupReduce(MPI_Comm& com, char mystate)=0;  
 #endif  
     
-	// call to merge with values on other subworlds
-	// It does not take a value argument because local values should have 
-	// already been added with reduceLocal
-	// Must only be called on participating SubWorlds
-	// the mpi_info holds a communicator linking corresponding processes
-	// in every participating subworld
+    // call to merge with values on other subworlds
+    // It does not take a value argument because local values should have 
+    // already been added with reduceLocal
+    // Must only be called on participating SubWorlds
+    // the mpi_info holds a communicator linking corresponding processes
+    // in every participating subworld
     virtual bool reduceRemoteValues(MPI_Comm& comm)=0;
     
-	// true if at least one localValue has been added
-	// used to check if this subworld should participate in remote merges
+    // true if at least one localValue has been added
+    // used to check if this subworld should participate in remote merges
     bool hasValue();
     
-	// true if reductions could fail for some reason other than MPI failure
-	// for example SET type variables 
+    // true if reductions could fail for some reason other than MPI failure
+    // for example SET type variables 
     virtual bool canClash();
     
-	// Get a value for this variable from another process
-	// This is not a reduction and will replace any existing value
-    virtual bool recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)=0;
+    // Get a value for this variable from another process
+    // This is not a reduction and will replace any existing value
+    virtual bool recvFrom(int localid, int source, JMPI& mpiinfo)=0;
 
-	// Send a value to this variable to another process
-	// This is not a reduction and will replace any existing value    
-    virtual bool sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo)=0;
+    // Send a value to this variable to another process
+    // This is not a reduction and will replace any existing value    
+    virtual bool sendTo(int localid, int target, JMPI& mpiinfo)=0;
     
     virtual double getDouble();
    
     virtual boost::python::object getPyObj()=0; 
     
-	// notify the reducer that a new runJobs() call 
-	// is being executed
+    // notify the reducer that a new runJobs() call is being executed
     virtual void newRunJobs();
 
     virtual void clear();
 
     virtual void copyValueFrom(boost::shared_ptr<AbstractReducer>& src)=0;
-protected:
 
+protected:
     bool valueadded;
     bool had_an_export_this_round;
     static const int PARAMTAG;    
@@ -118,5 +118,5 @@ typedef boost::shared_ptr<AbstractReducer> Reducer_ptr;
 
 }
 
-#endif // __ESCRIPT_REDUCER_H__
+#endif // __ESCRIPT_ABSTRACTREDUCER_H__
 
diff --git a/escriptcore/src/AbstractSystemMatrix.cpp b/escriptcore/src/AbstractSystemMatrix.cpp
index 1456e6f..174caad 100644
--- a/escriptcore/src/AbstractSystemMatrix.cpp
+++ b/escriptcore/src/AbstractSystemMatrix.cpp
@@ -14,11 +14,9 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
 #include "AbstractSystemMatrix.h" 
-#include "DataException.h"
 #include "Data.h"
+#include "DataException.h"
 #include "DataTypes.h"
 
 namespace escript {
@@ -40,6 +38,24 @@ AbstractSystemMatrix::AbstractSystemMatrix(int row_blocksize,
 
 }
 
+ASM_ptr AbstractSystemMatrix::getPtr()
+{
+    try {
+        return shared_from_this();
+    } catch (boost::bad_weak_ptr p) {
+        return ASM_ptr(this);
+    }
+}
+
+const_ASM_ptr AbstractSystemMatrix::getPtr() const 
+{  
+    try {
+        return shared_from_this();
+    } catch (boost::bad_weak_ptr p) {
+        return const_ASM_ptr(this);
+    }
+}
+
 Data operator*(const AbstractSystemMatrix& left, const Data& right)
 {
     return left.vectorMultiply(right);
@@ -55,7 +71,10 @@ Data AbstractSystemMatrix::vectorMultiply(const Data& right) const
     if (getRowBlockSize() > 1)
         shape.push_back(getRowBlockSize());
 
-    Data out(0., shape, getRowFunctionSpace(), true);
+    Data out = right.isComplex() ?
+        Data(DataTypes::cplx_t(0), shape, getRowFunctionSpace(), true) :
+        Data(0., shape, getRowFunctionSpace(), true);
+
     Data in(right, getColumnFunctionSpace());
     ypAx(out, in);
     return out;
@@ -78,7 +97,9 @@ Data AbstractSystemMatrix::solve(const Data& in,
     DataTypes::ShapeType shape;
     if (getRowBlockSize() > 1)
         shape.push_back(getColumnBlockSize());
-    Data out(0., shape, getColumnFunctionSpace(), true);
+    Data out = in.isComplex() ?
+        Data(DataTypes::cplx_t(0), shape, getColumnFunctionSpace(), true) :
+        Data(0., shape, getColumnFunctionSpace(), true);
     setToSolution(out, *const_cast<Data*>(&in), options);
     return out;
 }
@@ -105,7 +126,7 @@ void AbstractSystemMatrix::saveHB(const std::string& fileName) const
     throw SystemMatrixException("Harwell-Boeing interface not available.");
 }
 
-void AbstractSystemMatrix::resetValues()
+void AbstractSystemMatrix::resetValues(bool preserveSolverData)
 {
     throw SystemMatrixException("resetValues() is not implemented.");
 }
diff --git a/escriptcore/src/AbstractSystemMatrix.h b/escriptcore/src/AbstractSystemMatrix.h
index 264ce38..9f90a4b 100644
--- a/escriptcore/src/AbstractSystemMatrix.h
+++ b/escriptcore/src/AbstractSystemMatrix.h
@@ -14,27 +14,32 @@
 *
 *****************************************************************************/
 
-
 #ifndef __ESCRIPT_ABSTRACTSYSTEMMATRIX_H__
 #define __ESCRIPT_ABSTRACTSYSTEMMATRIX_H__
 
 #include "system_dep.h"
 #include "FunctionSpace.h"
+#include "Pointers.h"
 #include "SystemMatrixException.h"
-#include <boost/python/object.hpp>
 
+#include <boost/python/object.hpp>
 
 namespace escript {
 
 //
 // Forward declaration
+class AbstractSystemMatrix;
 class Data;
 
+typedef POINTER_WRAPPER_CLASS(AbstractSystemMatrix) ASM_ptr;
+typedef POINTER_WRAPPER_CLASS(const AbstractSystemMatrix) const_ASM_ptr;
+
+
 /**
    \brief
    Base class for escript system matrices.
 */
-class ESCRIPT_DLL_API AbstractSystemMatrix
+class ESCRIPT_DLL_API AbstractSystemMatrix: public REFCOUNT_BASE_CLASS(AbstractSystemMatrix)
 {
 public:
 
@@ -56,6 +61,18 @@ public:
     virtual ~AbstractSystemMatrix() {}
 
     /**
+        \brief Returns smart pointer which is managing this object.
+        If one does not exist yet it creates one.
+    */
+    ASM_ptr getPtr();
+
+    /**
+        \brief Returns smart pointer which is managing this object.
+        If one does not exist yet it creates one.
+    */
+    const_ASM_ptr getPtr() const; 
+
+    /**
         \brief
         returns the matrix-vector product this*right
     */
@@ -140,7 +157,7 @@ public:
     /**
         \brief resets the matrix entries
     */
-    virtual void resetValues();
+    virtual void resetValues(bool preserveSolverData = false);
 
 private:
 
@@ -167,8 +184,6 @@ private:
 ESCRIPT_DLL_API
 Data operator*(const AbstractSystemMatrix& left, const Data& right);
 
-typedef boost::shared_ptr<AbstractSystemMatrix> ASM_ptr;
-
 } // end of namespace
 
 #endif // __ESCRIPT_ABSTRACTSYSTEMMATRIX_H__
diff --git a/escriptcore/src/AbstractTransportProblem.cpp b/escriptcore/src/AbstractTransportProblem.cpp
index 82116f5..9526fe9 100644
--- a/escriptcore/src/AbstractTransportProblem.cpp
+++ b/escriptcore/src/AbstractTransportProblem.cpp
@@ -14,110 +14,110 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-#include "AbstractTransportProblem.h" 
-#include "TransportProblemException.h"
-#include "DataTypes.h"
+#include "AbstractTransportProblem.h"
 #include "Data.h"
-#include <iostream>
+#include "DataTypes.h"
+#include "TransportProblemException.h"
 
+namespace bp = boost::python;
 
 namespace escript {
 
-AbstractTransportProblem::AbstractTransportProblem() {
+AbstractTransportProblem::AbstractTransportProblem()
+{
     m_empty=1;
 }
 
-AbstractTransportProblem::AbstractTransportProblem(const int blocksize,
+AbstractTransportProblem::AbstractTransportProblem(int blocksize,
                                                    const FunctionSpace& functionspace)
-:m_functionspace(functionspace)
+    : m_empty(0),
+      m_blocksize(blocksize),
+      m_functionspace(functionspace)
 {
-  if (blocksize<=0) 
-     throw TransportProblemException("Error - negative block size of transport problem.");
-
-   m_empty=0;
-   m_blocksize=blocksize;
-//    m_functionspace=functionspace;
+    ESYS_ASSERT(blocksize>0, "non-positive block size given");
 }
 
 AbstractTransportProblem::~AbstractTransportProblem() {
 }
 
-int AbstractTransportProblem::isEmpty() const {
+int AbstractTransportProblem::isEmpty() const
+{
    return m_empty;
 }
 
 
-Data AbstractTransportProblem::solve(Data& u0, Data& source, const double dt, boost::python::object& options) const
+Data AbstractTransportProblem::solve(Data& u0, Data& source, double dt,
+                                     bp::object& options)
 {
-     if (isEmpty())
-          throw TransportProblemException("Error - transport problem is empty.");
-     if (dt<=0.)
-          throw TransportProblemException("Error - dt needs to be positive.");
-     if (source.getFunctionSpace()!=getFunctionSpace())
-          throw TransportProblemException("Error - function space of transport problem and function space of source do not match.");
-     if (u0.getFunctionSpace()!=getFunctionSpace())
-          throw TransportProblemException("Error - function space of transport problem and function space of initial value do not match.");
-     if (source.getDataPointSize()!=getBlockSize())
-          throw TransportProblemException("Error - block size of transport problem and source do not match.");
-     if (u0.getDataPointSize()!=getBlockSize())
-          throw TransportProblemException("Error - block size of transport problem and initial value do not match.");
-
-     DataTypes::ShapeType shape;
-     if (getBlockSize()>1) shape.push_back(getBlockSize());
-     Data out=Data(0.,shape,getFunctionSpace(),true);
-     setToSolution(out, u0, source, dt, options);
-     return out;
+    if (isEmpty())
+        throw TransportProblemException("Error - transport problem is empty.");
+    if (dt<=0.)
+        throw ValueError("dt needs to be positive.");
+    if (source.getFunctionSpace()!=getFunctionSpace())
+        throw ValueError("Function space of transport problem and function space of source do not match.");
+    if (u0.getFunctionSpace()!=getFunctionSpace())
+        throw ValueError("Function space of transport problem and function space of initial value do not match.");
+    if (source.getDataPointSize()!=getBlockSize())
+        throw ValueError("Block size of transport problem and source do not match.");
+    if (u0.getDataPointSize()!=getBlockSize())
+        throw ValueError("Block size of transport problem and initial value do not match.");
+
+    DataTypes::ShapeType shape;
+    if (getBlockSize()>1) shape.push_back(getBlockSize());
+    Data out=Data(0.,shape,getFunctionSpace(),true);
+    setToSolution(out, u0, source, dt, options);
+    return out;
 }
 
-void AbstractTransportProblem::insertConstraint(Data& source, Data& q, Data& r) const
+void AbstractTransportProblem::insertConstraint(Data& source, Data& q, Data& r)
 {
-     source.expand();
-     if (isEmpty())
-          throw TransportProblemException("Error - transport problem is empty.");
-     if (q.isEmpty()) {
-          return;
-     }
-     if (((getBlockSize()==1) && (q.getDataPointRank()>0)) || (q.getDataPointRank()>1))
-          throw TransportProblemException("Error - illegal rank of constraint location.");
-     if (q.getDataPointSize()!=getBlockSize())
-          throw TransportProblemException("Error - block size of transport problem and constraint location don't match.");
-     Data q2=Data(q,getFunctionSpace());
-
-     if (r.isEmpty()) {
-          Data r2=Data(0.,q.getDataPointShape(),getFunctionSpace());
-          copyConstraint(source,q2,r2);
-     } else {
+    source.expand();
+    if (isEmpty())
+        throw TransportProblemException("insertConstraint(): Transport problem is empty.");
+    if (q.isEmpty()) {
+        return;
+    }
+    if (((getBlockSize()==1) && (q.getDataPointRank()>0)) || (q.getDataPointRank()>1))
+        throw ValueError("insertConstraint(): illegal rank of constraint location.");
+    if (q.getDataPointSize()!=getBlockSize())
+        throw ValueError("insertConstraint(): Block size of transport problem and constraint location don't match.");
+    Data q2=Data(q,getFunctionSpace());
+
+    if (r.isEmpty()) {
+        Data r2=Data(0.,q.getDataPointShape(),getFunctionSpace(), false);
+        copyConstraint(source,q2,r2);
+    } else {
         if (((getBlockSize()==1) && (r.getDataPointRank()>0)) || (r.getDataPointRank()>1))
-             throw TransportProblemException("Error - illegal rank of constraint value.");
+            throw ValueError("Illegal rank of constraint value.");
         if (r.getDataPointSize()!=getBlockSize())
-             throw TransportProblemException("Error - block size of transport problem and constraint value don't match.");
+            throw ValueError("Block size of transport problem and constraint value don't match.");
         Data r2=Data(r,getFunctionSpace());
         copyConstraint(source,q2,r2);
-     }
+    }
 }
 
-void AbstractTransportProblem::copyConstraint(Data& source, Data& q, Data& r) const
+void AbstractTransportProblem::copyConstraint(Data& source, Data& q, Data& r)
 {
-    throw TransportProblemException("Error - copyConstraint is not available");
+    throw NotImplementedError("copyConstraint is not available");
 }
 
-void AbstractTransportProblem::setToSolution(Data& out, Data &u0, Data& source,const double dt, boost::python::object& options) const
+void AbstractTransportProblem::setToSolution(Data& out, Data &u0, Data& source,
+                                             double dt, bp::object& options)
 {
-    throw TransportProblemException("Error - setToSolution is not available");
+    throw NotImplementedError("setToSolution is not available");
 }
 void AbstractTransportProblem::resetTransport() const
 {
-    throw TransportProblemException("Error - resetProblem is not implemented.");
+    throw NotImplementedError("resetProblem is not implemented.");
 }
 double AbstractTransportProblem::getSafeTimeStepSize() const
 {
-    throw TransportProblemException("Error - getSafeTimeStepSize is not implemented.");
+    throw NotImplementedError("getSafeTimeStepSize is not implemented.");
 }
 double AbstractTransportProblem::getUnlimitedTimeStepSize() const
 {
-    throw TransportProblemException("Error - getUnlimitedTimeStepSize is not implemented.");
+    throw NotImplementedError("getUnlimitedTimeStepSize is not implemented.");
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/AbstractTransportProblem.h b/escriptcore/src/AbstractTransportProblem.h
index f42225a..30faecc 100644
--- a/escriptcore/src/AbstractTransportProblem.h
+++ b/escriptcore/src/AbstractTransportProblem.h
@@ -14,16 +14,14 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_ABSTRACTTRANSPORTPROBLEM_H__
+#define __ESCRIPT_ABSTRACTTRANSPORTPROBLEM_H__
 
-#if !defined  escript_AbstractTransportProblem_H
-#define escript_AbstractTransportProblem_H
 #include "system_dep.h"
-
 #include "FunctionSpace.h"
 #include "TransportProblemException.h"
-#include <boost/python/object.hpp>
-
 
+#include <boost/python/object.hpp>
 
 namespace escript {
 
@@ -42,9 +40,9 @@ class Data;
    For templates describe any conditions that the parameters used in the
    template must satisfy
 */
-class AbstractTransportProblem {
-
- public:
+class AbstractTransportProblem
+{
+public:
 
   /**
      \brief
@@ -105,7 +103,7 @@ class AbstractTransportProblem {
      returns the solution u for a time step dt>0 with initial value u0 at time t=0
   */
   ESCRIPT_DLL_API
-  Data solve(Data& u0, Data& source, const double dt, boost::python::object& options) const;
+  Data solve(Data& u0, Data& source, const double dt, boost::python::object& options);
 
 
   /**
@@ -119,7 +117,7 @@ class AbstractTransportProblem {
      inserts constraint u_{,t}=r where q>0  into the problem
   */
   ESCRIPT_DLL_API
-  void insertConstraint(Data& source, Data& q, Data& r) const;
+  void insertConstraint(Data& source, Data& q, Data& r);
   /*
    *      \brief returns a safe time step size.
    */
@@ -141,16 +139,17 @@ class AbstractTransportProblem {
      sets solution out by time step dt.
   */
   ESCRIPT_DLL_API
-  virtual void setToSolution(Data& out, Data& u0, Data& source, const double dt, boost::python::object& options) const;
+  virtual void setToSolution(Data& out, Data& u0, Data& source, double dt,
+                             boost::python::object& options);
 
   /**
      \brief
-     copy constraint u_{,t}=r where q>0  into the problem 
-     it can be assumed that q and r are not empty and have  
+     copy constraint u_{,t}=r where q>0  into the problem
+     it can be assumed that q and r are not empty and have
      appropriate shape and function space.
   */
   ESCRIPT_DLL_API
-  virtual void copyConstraint(Data& source, Data& q, Data& r) const;
+  virtual void copyConstraint(Data& source, Data& q, Data& r);
 
   int m_empty;
   int m_blocksize;
@@ -158,8 +157,9 @@ class AbstractTransportProblem {
 
 };
 
-
 typedef boost::shared_ptr<AbstractTransportProblem> ATP_ptr;
 
 } // end of namespace
-#endif
+
+#endif // __ESCRIPT_ABSTRACTTRANSPORTPROBLEM_H__
+
diff --git a/escriptcore/src/ArrayOps.cpp b/escriptcore/src/ArrayOps.cpp
new file mode 100644
index 0000000..cbf3fc1
--- /dev/null
+++ b/escriptcore/src/ArrayOps.cpp
@@ -0,0 +1,70 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+#include "ArrayOps.h"
+
+using namespace escript;
+
+namespace escript
+{
+
+bool supports_cplx(escript::ES_optype operation)
+{
+    switch (operation)
+    {
+    case NEG:
+    case SIN: 
+    case COS: 
+    case TAN: 
+    case ASIN: 
+    case ACOS: 
+    case ATAN: 
+    case SINH: 
+    case COSH: 
+    case TANH: return true;
+    case ERF: return false;
+    case ASINH: 
+    case ACOSH: 
+    case ATANH: 
+    case LOG10: 
+    case LOG: return true;
+    case SIGN: return false;
+    case ABS: 
+    case EXP: 
+    case SQRT: return true;
+    case EZ:
+    case NEZ:return true;
+    case GZ:
+    case GEZ:
+    case LZ:
+    case LEZ: return false;   
+    case CONJ: return true;
+    case REAL: return true;
+    case IMAG: return true;
+    case RECIP: return true;
+    default:
+      return false;	// let's be conservative
+  }  
+}
+
+bool always_real(escript::ES_optype operation)
+{
+    return ((operation==REAL) || (operation==IMAG) || (operation==EZ) || (operation==NEZ) || (operation==ABS));
+}
+
+
+}
\ No newline at end of file
diff --git a/escriptcore/src/ArrayOps.h b/escriptcore/src/ArrayOps.h
new file mode 100644
index 0000000..2dd7640
--- /dev/null
+++ b/escriptcore/src/ArrayOps.h
@@ -0,0 +1,892 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+#ifndef __ESCRIPT_LOCALOPS_H__
+#define __ESCRIPT_LOCALOPS_H__
+
+#include "DataTypes.h"
+#include "DataException.h"
+#include <iostream>
+#include <cmath>
+#include <complex>
+
+#ifdef ESYS_USE_BOOST_ACOS
+#include <boost/math/complex/acos.hpp>	// std::acos for complex on OSX (elcapitan) is wrong
+#endif
+
+#ifndef M_PI
+#   define M_PI           3.14159265358979323846  /* pi */
+#endif
+
+
+/**
+\file LocalOps.h 
+\brief Describes binary operations performed on double*.
+
+For operations on DataAbstract see BinaryOp.h.
+For operations on DataVector see DataMaths.h.
+*/
+
+#include "ES_optype.h"
+
+namespace escript {
+
+
+
+bool always_real(escript::ES_optype operation);
+
+
+/**
+   \brief
+   Return the maximum value of the two given values.
+*/
+struct FMax 
+{
+  inline DataTypes::real_t operator()(DataTypes::real_t x, DataTypes::real_t y) const
+  {
+    return std::max(x,y);
+  }
+  typedef DataTypes::real_t first_argument_type;
+  typedef DataTypes::real_t second_argument_type;
+  typedef DataTypes::real_t result_type;
+};
+
+/**
+   \brief
+   Return the minimum value of the two given values.
+*/
+struct FMin
+{
+  inline DataTypes::real_t operator()(DataTypes::real_t x, DataTypes::real_t y) const
+  {
+    return std::min(x,y);
+  }
+  typedef DataTypes::real_t first_argument_type;
+  typedef DataTypes::real_t second_argument_type;
+  typedef DataTypes::real_t result_type;  
+};
+
+/**
+   \brief
+   Return the absolute maximum value of the two given values.
+*/
+template<typename T>
+struct AbsMax 
+{
+  inline DataTypes::real_t operator()(T x, T y) const
+  {
+    return std::max(std::abs(x),std::abs(y));
+  }
+  typedef T first_argument_type;
+  typedef T second_argument_type;
+  typedef DataTypes::real_t result_type;
+};
+
+
+inline
+DataTypes::real_t
+fsign(DataTypes::real_t x)
+{
+  if (x == 0) {
+    return 0;
+  } else {
+    return x/fabs(x);
+  }
+}
+
+/**
+\brief acts as a wrapper to isnan.
+\warning if compiler does not support FP_NAN this function will always return false.
+*/
+inline
+bool nancheck(DataTypes::real_t d)
+{
+                // Q: so why not just test d!=d?
+                // A: Coz it doesn't always work [I've checked].
+                // One theory is that the optimizer skips the test.
+    return std::isnan(d);       // isNan should be a function in C++ land
+}
+
+/**
+\brief returns a NaN.
+\warning Should probably only used where you know you can test for NaNs
+*/
+inline
+DataTypes::real_t makeNaN()
+{
+#ifdef nan
+    return nan("");
+#else
+    return sqrt(-1.);
+#endif
+
+}
+
+
+/**
+   \brief
+   solves a 1x1 eigenvalue A*V=ev*V problem
+
+   \param A00 Input - A_00
+   \param ev0 Output - eigenvalue
+*/
+inline
+void eigenvalues1(const DataTypes::real_t A00,DataTypes::real_t* ev0) {
+   *ev0=A00;
+}
+
+inline
+void eigenvalues1(const DataTypes::cplx_t A00,DataTypes::cplx_t* ev0) {
+   *ev0=A00;
+}
+
+
+/**
+   \brief
+   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
+
+   \param A00 Input - A_00
+   \param A01 Input - A_01
+   \param A11 Input - A_11
+   \param ev0 Output - smallest eigenvalue
+   \param ev1 Output - largest eigenvalue
+*/
+template <class T>
+inline
+void eigenvalues2(const T A00,const T A01,const T A11,
+                 T* ev0, T* ev1) {
+      const T trA=(A00+A11)/2.;
+      const T A_00=A00-trA;
+      const T A_11=A11-trA;
+      const T s=sqrt(A01*A01-A_00*A_11);
+      *ev0=trA-s;
+      *ev1=trA+s;
+}
+/**
+   \brief
+   solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
+
+   \param A00 Input - A_00
+   \param A01 Input - A_01
+   \param A02 Input - A_02
+   \param A11 Input - A_11
+   \param A12 Input - A_12
+   \param A22 Input - A_22
+   \param ev0 Output - smallest eigenvalue
+   \param ev1 Output - eigenvalue
+   \param ev2 Output - largest eigenvalue
+*/
+inline
+void eigenvalues3(const DataTypes::real_t A00, const DataTypes::real_t A01, const DataTypes::real_t A02,
+                                   const DataTypes::real_t A11, const DataTypes::real_t A12,
+                                                     const DataTypes::real_t A22,
+                 DataTypes::real_t* ev0, DataTypes::real_t* ev1,DataTypes::real_t* ev2) {
+
+      const DataTypes::real_t trA=(A00+A11+A22)/3.;
+      const DataTypes::real_t A_00=A00-trA;
+      const DataTypes::real_t A_11=A11-trA;
+      const DataTypes::real_t A_22=A22-trA;
+      const DataTypes::real_t A01_2=A01*A01;
+      const DataTypes::real_t A02_2=A02*A02;
+      const DataTypes::real_t A12_2=A12*A12;
+      const DataTypes::real_t p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
+      if (p<=0.) {
+         *ev2=trA;
+         *ev1=trA;
+         *ev0=trA;
+
+      } else {
+         const DataTypes::real_t q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
+         const DataTypes::real_t sq_p=sqrt(p/3.);
+         DataTypes::real_t z=-q/(2*pow(sq_p,3));
+         if (z<-1.) {
+            z=-1.;
+         } else if (z>1.) {
+            z=1.;
+         }
+         const DataTypes::real_t alpha_3=acos(z)/3.;
+         *ev2=trA+2.*sq_p*cos(alpha_3);
+         *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
+         *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
+      }
+}
+/**
+   \brief
+   solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
+
+   \param A00 Input - A_00
+   \param ev0 Output - eigenvalue
+   \param V00 Output - eigenvector
+   \param tol Input - tolerance to identify to eigenvalues
+*/
+inline
+void  eigenvalues_and_eigenvectors1(const DataTypes::real_t A00,DataTypes::real_t* ev0,DataTypes::real_t* V00,const DataTypes::real_t tol)
+{
+      eigenvalues1(A00,ev0);
+      *V00=1.;
+      return;
+}
+/**
+   \brief
+   returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
+
+   \param A00 Input - matrix component
+   \param A10 Input - matrix component
+   \param A01 Input - matrix component
+   \param A11 Input - matrix component
+   \param V0 Output - vector component
+   \param V1 Output - vector component
+*/
+inline
+void  vectorInKernel2(const DataTypes::real_t A00,const DataTypes::real_t A10,const DataTypes::real_t A01,const DataTypes::real_t A11,
+                      DataTypes::real_t* V0, DataTypes::real_t*V1)
+{
+      DataTypes::real_t absA00=fabs(A00);
+      DataTypes::real_t absA10=fabs(A10);
+      DataTypes::real_t absA01=fabs(A01);
+      DataTypes::real_t absA11=fabs(A11);
+      DataTypes::real_t m=absA11>absA10 ? absA11 : absA10;
+      if (absA00>m || absA01>m) {
+         *V0=-A01;
+         *V1=A00;
+      } else {
+         if (m<=0) {
+           *V0=1.;
+           *V1=0.;
+         } else {
+           *V0=A11;
+           *V1=-A10;
+         }
+     }
+}
+/**
+   \brief
+   returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
+   assuming that the kernel dimension is at least 1 and A00 is non zero.
+
+   \param A00 Input - matrix component
+   \param A10 Input - matrix component
+   \param A20 Input - matrix component
+   \param A01 Input - matrix component
+   \param A11 Input - matrix component
+   \param A21 Input - matrix component
+   \param A02 Input - matrix component
+   \param A12 Input - matrix component
+   \param A22 Input - matrix component
+   \param V0 Output - vector component
+   \param V1 Output - vector component
+   \param V2 Output - vector component
+*/
+inline
+void  vectorInKernel3__nonZeroA00(const DataTypes::real_t A00,const DataTypes::real_t A10,const DataTypes::real_t A20,
+                                const DataTypes::real_t A01,const DataTypes::real_t A11,const DataTypes::real_t A21,
+                                const DataTypes::real_t A02,const DataTypes::real_t A12,const DataTypes::real_t A22,
+                                DataTypes::real_t* V0,DataTypes::real_t* V1,DataTypes::real_t* V2)
+{
+    DataTypes::real_t TEMP0,TEMP1;
+    const DataTypes::real_t I00=1./A00;
+    const DataTypes::real_t IA10=I00*A10;
+    const DataTypes::real_t IA20=I00*A20;
+    vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
+                    A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
+    *V0=-(A10*TEMP0+A20*TEMP1);
+    *V1=A00*TEMP0;
+    *V2=A00*TEMP1;
+}
+
+/**
+   \brief
+   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
+   ordered by increasing value and eigen vectors are normalizeVector3d such that
+   length is zero and first non-zero component is positive.
+
+   \param A00 Input - A_00
+   \param A01 Input - A_01
+   \param A11 Input - A_11
+   \param ev0 Output - smallest eigenvalue
+   \param ev1 Output - eigenvalue
+   \param V00 Output - eigenvector componenent coresponding to ev0
+   \param V10 Output - eigenvector componenent coresponding to ev0
+   \param V01 Output - eigenvector componenent coresponding to ev1
+   \param V11 Output - eigenvector componenent coresponding to ev1
+   \param tol Input - tolerance to identify to eigenvalues
+*/
+inline
+void  eigenvalues_and_eigenvectors2(const DataTypes::real_t A00,const DataTypes::real_t A01,const DataTypes::real_t A11,
+                                    DataTypes::real_t* ev0, DataTypes::real_t* ev1,
+                                    DataTypes::real_t* V00, DataTypes::real_t* V10, DataTypes::real_t* V01, DataTypes::real_t* V11,
+                                    const DataTypes::real_t tol)
+{
+     DataTypes::real_t TEMP0,TEMP1;
+     eigenvalues2(A00,A01,A11,ev0,ev1);
+     const DataTypes::real_t absev0=fabs(*ev0);
+     const DataTypes::real_t absev1=fabs(*ev1);
+     DataTypes::real_t max_ev=absev0>absev1 ? absev0 : absev1;
+     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
+        *V00=1.;
+        *V10=0.;
+        *V01=0.;
+        *V11=1.;
+     } else {
+        vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
+        const DataTypes::real_t scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
+        if (TEMP0<0.) {
+            *V00=-TEMP0*scale;
+            *V10=-TEMP1*scale;
+            if (TEMP1<0.) {
+               *V01=  *V10;
+               *V11=-(*V00);
+            } else {
+               *V01=-(*V10);
+               *V11= (*V00);
+            }
+        } else if (TEMP0>0.) {
+            *V00=TEMP0*scale;
+            *V10=TEMP1*scale;
+            if (TEMP1<0.) {
+               *V01=-(*V10);
+               *V11= (*V00);
+            } else {
+               *V01= (*V10);
+               *V11=-(*V00);
+            }
+        } else {
+           *V00=0.;
+           *V10=1;
+           *V11=0.;
+           *V01=1.;
+       }
+   }
+}
+/**
+   \brief
+   nomalizes a 3-d vector such that length is one and first non-zero component is positive.
+
+   \param V0 - vector componenent
+   \param V1 - vector componenent
+   \param V2 - vector componenent
+*/
+inline
+void  normalizeVector3(DataTypes::real_t* V0,DataTypes::real_t* V1,DataTypes::real_t* V2)
+{
+    DataTypes::real_t s;
+    if (*V0>0) {
+        s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
+        *V0*=s;
+        *V1*=s;
+        *V2*=s;
+    } else if (*V0<0)  {
+        s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
+        *V0*=s;
+        *V1*=s;
+        *V2*=s;
+    } else {
+        if (*V1>0) {
+            s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
+            *V1*=s;
+            *V2*=s;
+        } else if (*V1<0)  {
+            s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
+            *V1*=s;
+            *V2*=s;
+        } else {
+            *V2=1.;
+        }
+    }
+}
+/**
+   \brief
+   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
+   ordered by increasing value and eigen vectors are normalizeVector3d such that
+   length is zero and first non-zero component is positive.
+
+   \param A00 Input - A_00
+   \param A01 Input - A_01
+   \param A02 Input - A_02
+   \param A11 Input - A_11
+   \param A12 Input - A_12
+   \param A22 Input - A_22
+   \param ev0 Output - smallest eigenvalue
+   \param ev1 Output - eigenvalue
+   \param ev2 Output -
+   \param V00 Output - eigenvector componenent coresponding to ev0
+   \param V10 Output - eigenvector componenent coresponding to ev0
+   \param V20 Output -
+   \param V01 Output - eigenvector componenent coresponding to ev1
+   \param V11 Output - eigenvector componenent coresponding to ev1
+   \param V21 Output -
+   \param V02 Output -
+   \param V12 Output -
+   \param V22 Output -
+   \param tol Input - tolerance to identify to eigenvalues
+*/
+inline
+void  eigenvalues_and_eigenvectors3(const DataTypes::real_t A00, const DataTypes::real_t A01, const DataTypes::real_t A02,
+                                    const DataTypes::real_t A11, const DataTypes::real_t A12, const DataTypes::real_t A22,
+                                    DataTypes::real_t* ev0, DataTypes::real_t* ev1, DataTypes::real_t* ev2,
+                                    DataTypes::real_t* V00, DataTypes::real_t* V10, DataTypes::real_t* V20,
+                                    DataTypes::real_t* V01, DataTypes::real_t* V11, DataTypes::real_t* V21,
+                                    DataTypes::real_t* V02, DataTypes::real_t* V12, DataTypes::real_t* V22,
+                                    const DataTypes::real_t tol)
+{
+      const DataTypes::real_t absA01=fabs(A01);
+      const DataTypes::real_t absA02=fabs(A02);
+      const DataTypes::real_t m=absA01>absA02 ? absA01 : absA02;
+      if (m<=0) {
+        DataTypes::real_t TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
+        eigenvalues_and_eigenvectors2(A11,A12,A22,
+                                      &TEMP_EV0,&TEMP_EV1,
+                                      &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
+        if (A00<=TEMP_EV0) {
+            *V00=1.;
+            *V10=0.;
+            *V20=0.;
+            *V01=0.;
+            *V11=TEMP_V00;
+            *V21=TEMP_V10;
+            *V02=0.;
+            *V12=TEMP_V01;
+            *V22=TEMP_V11;
+            *ev0=A00;
+            *ev1=TEMP_EV0;
+            *ev2=TEMP_EV1;
+        } else if (A00>TEMP_EV1) {
+            *V02=1.;
+            *V12=0.;
+            *V22=0.;
+            *V00=0.;
+            *V10=TEMP_V00;
+            *V20=TEMP_V10;
+            *V01=0.;
+            *V11=TEMP_V01;
+            *V21=TEMP_V11;
+            *ev0=TEMP_EV0;
+            *ev1=TEMP_EV1;
+            *ev2=A00;
+        } else {
+            *V01=1.;
+            *V11=0.;
+            *V21=0.;
+            *V00=0.;
+            *V10=TEMP_V00;
+            *V20=TEMP_V10;
+            *V02=0.;
+            *V12=TEMP_V01;
+            *V22=TEMP_V11;
+            *ev0=TEMP_EV0;
+            *ev1=A00;
+            *ev2=TEMP_EV1;
+        }
+      } else {
+         eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
+         const DataTypes::real_t absev0=fabs(*ev0);
+         const DataTypes::real_t absev1=fabs(*ev1);
+         const DataTypes::real_t absev2=fabs(*ev2);
+         DataTypes::real_t max_ev=absev0>absev1 ? absev0 : absev1;
+         max_ev=max_ev>absev2 ? max_ev : absev2;
+         const DataTypes::real_t d_01=fabs((*ev0)-(*ev1));
+         const DataTypes::real_t d_12=fabs((*ev1)-(*ev2));
+         const DataTypes::real_t max_d=d_01>d_12 ? d_01 : d_12;
+         if (max_d<=tol*max_ev) {
+             *V00=1.;
+             *V10=0;
+             *V20=0;
+             *V01=0;
+             *V11=1.;
+             *V21=0;
+             *V02=0;
+             *V12=0;
+             *V22=1.;
+         } else {
+            const DataTypes::real_t S00=A00-(*ev0);
+            const DataTypes::real_t absS00=fabs(S00);
+            if (absS00>m) {
+                vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
+            } else if (absA02<m) {
+                vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
+            } else {
+                vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
+            }
+            normalizeVector3(V00,V10,V20);;
+            const DataTypes::real_t T00=A00-(*ev2);
+            const DataTypes::real_t absT00=fabs(T00);
+            if (absT00>m) {
+                 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
+            } else if (absA02<m) {
+                 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
+            } else {
+                 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
+            }
+            const DataTypes::real_t dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
+            *V02-=dot*(*V00);
+            *V12-=dot*(*V10);
+            *V22-=dot*(*V20);
+            normalizeVector3(V02,V12,V22);
+            *V01=(*V10)*(*V22)-(*V12)*(*V20);
+            *V11=(*V20)*(*V02)-(*V00)*(*V22);
+            *V21=(*V00)*(*V12)-(*V02)*(*V10);
+            normalizeVector3(V01,V11,V21);
+         }
+   }
+}
+
+// General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
+// SM is the product of the last axis_offset entries in arg_0.getShape().
+template <class LEFT, class RIGHT, class RES>
+inline
+void matrix_matrix_product(const int SL, const int SM, const int SR, const LEFT* A, const RIGHT* B, RES* C, int transpose)
+{
+  if (transpose == 0) {
+    for (int i=0; i<SL; i++) {
+      for (int j=0; j<SR; j++) {
+        RES sum = 0.0;
+        for (int l=0; l<SM; l++) {
+          sum += A[i+SL*l] * B[l+SM*j];
+        }
+        C[i+SL*j] = sum;
+      }
+    }
+  }
+  else if (transpose == 1) {
+    for (int i=0; i<SL; i++) {
+      for (int j=0; j<SR; j++) {
+        RES sum = 0.0;
+        for (int l=0; l<SM; l++) {
+          sum += A[i*SM+l] * B[l+SM*j];
+        }
+        C[i+SL*j] = sum;
+      }
+    }
+  }
+  else if (transpose == 2) {
+    for (int i=0; i<SL; i++) {
+      for (int j=0; j<SR; j++) {
+        RES sum = 0.0;
+        for (int l=0; l<SM; l++) {
+          sum += A[i+SL*l] * B[l*SR+j];
+        }
+        C[i+SL*j] = sum;
+      }
+    }
+  }
+}
+
+#if defined (_WIN32) && !defined(__INTEL_COMPILER)
+#else
+
+inline
+DataTypes::real_t calc_erf(DataTypes::real_t x)
+{
+    return ::erf(x);
+}
+
+inline
+DataTypes::cplx_t calc_erf(DataTypes::cplx_t x)
+{
+    return makeNaN();
+}
+
+#endif
+
+inline DataTypes::real_t calc_sign(DataTypes::real_t x)
+{
+    return escript::fsign(x);
+}
+
+inline DataTypes::cplx_t calc_sign(DataTypes::cplx_t x)
+{
+    return makeNaN();
+}
+
+inline 
+DataTypes::real_t calc_acos(DataTypes::real_t x)
+{
+    return acos(x);
+}
+
+inline 
+DataTypes::cplx_t calc_acos(DataTypes::cplx_t x)
+{
+#ifdef ESYS_USE_BOOST_ACOS
+              return boost::math::acos(x);
+#else
+              return acos(x);
+#endif  
+}
+
+
+inline escript::DataTypes::real_t fabs(const escript::DataTypes::cplx_t c)
+{
+    return abs(c);
+}
+
+
+
+inline DataTypes::real_t calc_gtzero(const DataTypes::real_t& x) {return x>0;}
+inline DataTypes::cplx_t calc_gtzero(const DataTypes::cplx_t& x) {return makeNaN();}
+
+
+inline DataTypes::real_t calc_gezero(const DataTypes::real_t& x) {return x>=0;}
+inline DataTypes::cplx_t calc_gezero(const DataTypes::cplx_t& x) {return makeNaN();}
+
+
+inline DataTypes::real_t calc_ltzero(const DataTypes::real_t& x) {return x<0;}
+inline DataTypes::cplx_t calc_ltzero(const DataTypes::cplx_t& x) {return makeNaN();}
+
+inline DataTypes::real_t calc_lezero(const DataTypes::real_t& x) {return x<=0;}
+inline DataTypes::cplx_t calc_lezero(const DataTypes::cplx_t& x) {return makeNaN();}
+
+template <typename IN>
+inline DataTypes::real_t abs_f(IN i)
+{
+    return fabs(i);
+}
+
+template <>
+inline DataTypes::real_t abs_f(DataTypes::cplx_t i)
+{
+    return abs(i);
+}
+
+
+
+
+// deals with unary operations which return real, regardless of
+// their input type
+template <class IN>
+inline void tensor_unary_array_operation_real(const size_t size,
+                             const IN *arg1,
+                             DataTypes::real_t * argRes,
+                             escript::ES_optype operation,
+                             DataTypes::real_t tol=0)
+{
+   switch (operation)
+   {
+     case REAL: 
+          for (int i = 0; i < size; ++i) {
+              argRes[i] = std::real(arg1[i]);
+          }
+          break;          
+     case IMAG: 
+          for (int i = 0; i < size; ++i) {
+              argRes[i] = std::imag(arg1[i]);
+          }
+          break;  
+    case EZ:   
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = (fabs(arg1[i])<=tol);
+          }
+          break;
+    case NEZ: 
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = (fabs(arg1[i])>tol);
+          }
+          break;
+    case ABS: 
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = abs_f(arg1[i]);
+          }
+          break;     	  
+     default:
+          throw DataException("Unsupported unary operation");      
+   }  
+}
+
+
+
+template <typename OUT, typename IN>
+inline OUT conjugate(const IN i)
+{
+    return conj(i);
+}
+
+// This should never actually be called
+template <>
+inline DataTypes::real_t conjugate(const DataTypes::real_t r)
+{
+    return r;
+}
+
+// No openmp because it's called by Lazy
+// In most cases, IN and OUT will be the same
+// but not ruling out putting Re() and Im()
+// through this
+template <class IN, typename OUT>
+inline void tensor_unary_array_operation(const size_t size,
+                             const IN *arg1,
+                             OUT * argRes,
+                             escript::ES_optype operation,
+                             DataTypes::real_t tol=0)
+{
+  switch (operation)
+  {
+    case NEG:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = -arg1[i];
+          }
+          break;
+    case SIN: 
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = sin(arg1[i]);
+          }
+          break;
+    case COS:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = cos(arg1[i]);
+          }
+          break;
+    case TAN:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = tan(arg1[i]);
+          }
+          break;
+    case ASIN: 
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = asin(arg1[i]);
+          }
+          break;
+    case ACOS:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i]=calc_acos(arg1[i]);
+          }
+          break;
+    case ATAN:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = atan(arg1[i]);
+          }
+          break;
+    case ABS:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = std::abs(arg1[i]);
+          }
+          break;      
+    case SINH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = sinh(arg1[i]);
+          }
+          break;
+    case COSH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = cosh(arg1[i]);
+          }
+          break;
+    case TANH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = tanh(arg1[i]);
+          }
+          break;
+    case ERF: 
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_erf(arg1[i]);
+          }
+          break;
+    case ASINH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = asinh(arg1[i]);
+          }
+          break;
+    case ACOSH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = acosh(arg1[i]);
+          }
+          break;
+    case ATANH:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = atanh(arg1[i]);
+          }
+          break;
+    case LOG10:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = log10(arg1[i]);
+          }
+          break;
+    case LOG:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = log(arg1[i]);
+          }
+          break;      
+    case SIGN:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_sign(arg1[i]);
+          }
+          break;      
+    case EXP:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = exp(arg1[i]);
+          }
+          break;      
+    case SQRT:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = sqrt(arg1[i]);
+          }
+          break;      
+    case GZ:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_gtzero(arg1[i]);
+          }
+          break;      
+    case GEZ:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_gezero(arg1[i]);
+          }
+          break;            
+    case LZ:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_ltzero(arg1[i]);
+          }
+          break;            
+    case LEZ:
+	  for (size_t i = 0; i < size; ++i) {
+              argRes[i] = calc_lezero(arg1[i]);
+          }
+          break;            
+    case CONJ: 
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = conjugate<OUT,IN>(arg1[i]);
+          }
+          break; 
+    case RECIP: 
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = 1.0/arg1[i];
+          }
+          break; 
+    case EZ:
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = fabs(arg1[i])<=tol;
+          }	  
+	  break;
+    case NEZ:
+          for (size_t i = 0; i < size; ++i) {
+              argRes[i] = fabs(arg1[i])>tol;
+          }	  
+	  break;      
+      
+    default:
+      std::string s="Unsupported unary operation ";
+      s+=operation;
+      throw DataException(s);
+  }
+  return;
+}
+
+bool supports_cplx(escript::ES_optype operation);
+
+
+} // end of namespace
+
+#endif // __ESCRIPT_LOCALOPS_H__
+
diff --git a/escriptcore/src/Assert.h b/escriptcore/src/Assert.h
new file mode 100644
index 0000000..e5664ab
--- /dev/null
+++ b/escriptcore/src/Assert.h
@@ -0,0 +1,83 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_ASSERT_H__
+#define __ESCRIPT_ASSERT_H__
+
+/**
+   \brief
+   EsysAssert is a MACRO that will throw an exception if the boolean
+   condition specified is false.
+
+   Description:
+   EsysAssert is conditionally compiled into code only when DOASSERT is
+   defined.  When DOASSERT is not defined, the EsysAssert statement is
+   entirely removed from code.
+*/
+
+#if DOASSERT
+
+//
+// DOASSERT is defined, evaluate assertions and abort on failure.
+//
+
+#include <escript/EsysException.h>
+#include <iostream>
+#include <sstream>
+
+#if ESYS_MPI
+
+#include <mpi.h>
+
+#define ESYS_ASSERT(assert_test, assert_msg)\
+    do {\
+        const bool result = (assert_test);\
+        if (!result) {\
+            std::ostringstream message;\
+            message << assert_msg << "\n\n"\
+            << __FILE__ << ":" << __LINE__ << ": " << #assert_test << "\n";\
+            std::cerr << message.str();\
+            MPI_Abort(MPI_COMM_WORLD, 455347);\
+        }\
+    } while (0)
+
+#else
+
+#define ESYS_ASSERT(assert_test, assert_msg)\
+    do {\
+        const bool result = (assert_test);\
+        if (!result) {\
+            std::ostringstream message;\
+            message << assert_msg << "\n\n"\
+            << __FILE__ << ":" << __LINE__ << ": " << #assert_test << "\n";\
+            throw escript::AssertException(message.str());\
+        }\
+    } while (0)
+
+#endif // ESYS_MPI
+
+#else // !DOASSERT
+
+//
+// DOASSERT is not defined, replace ESYS_ASSERT macro with no-op
+//
+
+#define ESYS_ASSERT(a,b)
+
+#endif
+
+#endif // __ESCRIPT_ASSERT_H__
+
diff --git a/escriptcore/src/BinaryDataReadyOps.cpp b/escriptcore/src/BinaryDataReadyOps.cpp
new file mode 100644
index 0000000..8d36f6c
--- /dev/null
+++ b/escriptcore/src/BinaryDataReadyOps.cpp
@@ -0,0 +1,873 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+
+
+#include "BinaryDataReadyOps.h"
+#include "DataTagged.h"
+
+#include <sstream>
+using namespace escript;
+using namespace std;
+
+namespace escript
+{
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperCCC(DataConstant& res, const DataConstant& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=1*DataTypes::noValues(res.getShape());	// since we are constant, by definition
+	  // there is only one datapoint
+  
+  
+  if (right.getRank()==0) 
+  {		
+    escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      left.getTypedVectorRO(dummyl), 0,
+			      &right.getTypedVectorRO(dummyr)[0], true,
+			      operation, true);
+  }
+  else if (left.getRank()==0)
+  {
+    escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      &left.getTypedVectorRO(dummyl)[0], true,		// left is const so it only has one sample of one data point (and from the if we know that sample is rank0)
+			      right.getTypedVectorRO(dummyr), 0,
+			      operation, true);
+  }
+  else
+  {
+    escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      left.getTypedVectorRO(dummyl), 0, false,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+  }
+}
+
+
+void binaryOpDataCCC(DataConstant& result, const DataConstant& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperCCC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperCCC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperCCC<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperCCC<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperTCT(DataTagged& res, const DataConstant& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=DataTypes::noValues(res.getShape());	// there is only one datapoint per sample
+
+  // self update is not a possibility here because res and left are different types
+  if (res.getTagCount()!=0)		// no tags
+  {
+      throw DataException("Programming error: result must have no tags for binaryOpDataReadyTCT");
+  }	
+
+  if (res.getTagCount()==0)
+  {
+      const DataTagged::DataMapType& lookup_1=right.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_1.begin();i!=lookup_1.end();i++)
+      {
+	res.addTag(i->first);
+      }
+  }
+  // so now we know that both tagged objects have the same tags (perhaps not in the same order though)
+  if (right.getRank()==0) 	// scalar op on the right
+  {		
+      // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, 
+				1, valcount,
+			      left.getTypedVectorRO(dummyl), 0,
+			      &right.getTypedVectorRO(dummyr)[0], 0,
+			      operation, false);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type rightoffset=right.getOffsetForTag(i->first);
+	  escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), resoffset, 
+				    1, valcount,
+				  left.getTypedVectorRO(dummyl), 0,
+				  &right.getTypedVectorRO(dummyr)[rightoffset], 0,
+				  operation, false);	  
+      }        
+  }
+  else if (left.getRank()==0)	// scalar op on the left
+  {
+      escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, 
+				1, valcount,
+			      &left.getTypedVectorRO(dummyl)[0], 0,
+			      right.getTypedVectorRO(dummyr), 0,
+			      operation, false);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type rightoffset=right.getOffsetForTag(i->first);	  
+	  escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), resoffset, 
+				    1, valcount, 
+				  &left.getTypedVectorRO(dummyl)[0], 0,
+				  right.getTypedVectorRO(dummyr), rightoffset,
+				  operation, false);	  
+      }        
+  }
+  else
+  {
+      // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      left.getTypedVectorRO(dummyl), 0, true,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+      const DataTagged::DataMapType& lookup_1=right.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_1.begin();i!=lookup_1.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=right.getOffsetForTag(i->first);
+	  escript::binaryOpVector(res.getTypedVectorRW(resdummy), resoffset, 1, valcount, 
+				  left.getTypedVectorRO(dummyl), 0, true,
+				  right.getTypedVectorRO(dummyr), i->second, false,
+				  operation);	  
+      }      
+      
+  }
+}
+
+
+void binaryOpDataTCT(DataTagged& result, const DataConstant& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTCT<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperTCT<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTCT<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperTCT<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperECE(DataExpanded& res, const DataConstant& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=res.getNumDPPSample()*DataTypes::noValues(res.getShape());
+  
+  // if both sides are rank0, then that should be handled normally rather than with a special case
+  // hence we check for that possibility first
+
+  if (right.getRank()==left.getRank())		// both zero or both equal and non-zero 
+  {
+    escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 
+			      res.getNumSamples()*res.getNumDPPSample(),DataTypes::noValues(res.getShape()) , 
+			      left.getTypedVectorRO(dummyl), 0, true,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+  } else if (right.getRank()==0) 
+  {
+      // This is a tricky one. There are lots of individual scalars on the RHS, each of which need to be 
+      // multiplied by a number of values which make up a single const
+      // fiddling by pretending samples are smaller but there are more of them
+    escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, 
+					 res.getNumSamples()*res.getNumDPPSample(), DataTypes::noValues(res.getShape()), 
+			      left.getTypedVectorRO(dummyl), 0,
+			      &right.getTypedVectorRO(dummyr)[0], false,
+			      operation,
+			      true);
+  }
+  else  // if (left.getRank()==0)
+  {
+    escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, right.getNumSamples(), valcount, 
+			      &left.getTypedVectorRO(dummyl)[0], true,
+			      right.getTypedVectorRO(dummyr), false,
+			      operation,
+			      false);
+  }
+}
+
+
+void binaryOpDataECE(DataExpanded& result, const DataConstant& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperECE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperECE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperECE<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperECE<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperEET(DataExpanded& res, const DataExpanded& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=res.getNumDPPSample()*DataTypes::noValues(res.getShape());
+
+    escript::binaryOpVectorTagged(res.getTypedVectorRW(resdummy), 
+			      res.getNumSamples(),res.getNumDPPSample(), DataTypes::noValues(res.getShape()), 
+			      left.getTypedVectorRO(dummyl), left.getRank()==0,
+			      right.getTypedVectorRO(dummyr), right.getRank()==0,
+			      false,	// right object is the tagged one
+			      right,	// source of tags
+			      operation);  
+  
+}
+
+
+void binaryOpDataEET(DataExpanded& result, const DataExpanded& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEET<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperEET<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEET<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperEET<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperETE(DataExpanded& res, const DataTagged& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=res.getNumDPPSample()*DataTypes::noValues(res.getShape());
+
+    escript::binaryOpVectorTagged(res.getTypedVectorRW(resdummy), 
+			      res.getNumSamples(),res.getNumDPPSample(), DataTypes::noValues(res.getShape()), 
+			      left.getTypedVectorRO(dummyl), left.getRank()==0,
+			      right.getTypedVectorRO(dummyr), right.getRank()==0,
+			      true,	// left object is the tagged one
+			      left,	// source of tags
+			      operation);  
+  
+}
+
+
+void binaryOpDataETE(DataExpanded& result, const DataTagged& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperETE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperETE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperETE<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperETE<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+
+
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperTTC(DataTagged& res, const DataTagged& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=DataTypes::noValues(res.getShape());	// there is only one datapoint per sample
+
+  // We need to consider two possibilities:
+  //   1) we are dealing with a new result object (which won't have tags)
+  //   2) we are storing the result back into the same object (eg +=)
+  // for case 1, we need to add tags in the correct order and then calculate on a per tag basis
+  // for case 2, we just need to calculate tags
+
+  
+  // first let's exclude anything but our two cases
+  if ((&res!=&left) &&		// self update 
+    (res.getTagCount()!=0))		// no tags
+  {
+      throw DataException("binaryOpDataReadyTTC expects a=(a op b) or c=(a op b)");
+  }	
+
+  if (res.getTagCount()==0)
+  {
+      const DataTagged::DataMapType& lookup_1=left.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_1.begin();i!=lookup_1.end();i++)
+      {
+	res.addTag(i->first);
+      }
+  }
+  // so now we know that both tagged objects have the same tags (perhaps not in the same order though)
+  if (right.getRank()==0) 	// scalar op on the right
+  {
+    
+        // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, 
+				1, valcount,
+			      left.getTypedVectorRO(dummyl), 0,
+			      &right.getTypedVectorRO(dummyr)[0], 0,
+			      operation, false);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);
+	  escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), resoffset, 
+				    1, valcount,
+				  left.getTypedVectorRO(dummyl), leftoffset,
+				  &right.getTypedVectorRO(dummyr)[0], 0,
+				  operation, false);     
+      }  
+  }
+  else if (left.getRank()==0)	// scalar op on the left
+  {
+      escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, 
+				1, valcount,
+			      &left.getTypedVectorRO(dummyl)[0], 0,
+			      right.getTypedVectorRO(dummyr), 0,
+			      operation, false);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);	  
+	  escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), resoffset, 
+				    1, valcount, 
+				  &left.getTypedVectorRO(dummyl)[leftoffset], 0,
+				  right.getTypedVectorRO(dummyr), 0,
+				  operation, false);	  
+      }       
+  }
+  else
+  {
+      // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      left.getTypedVectorRO(dummyl), 0, true,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);	  
+	  escript::binaryOpVector(res.getTypedVectorRW(resdummy), resoffset, 1, valcount, 
+				  left.getTypedVectorRO(dummyl), leftoffset, true,
+				  right.getTypedVectorRO(dummyr), 0, false,
+				  operation);	  
+      }      
+  }  
+}
+
+
+void binaryOpDataTTC(DataTagged& result, const DataTagged& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTTC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperTTC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTTC<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperTTC<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperTTT(DataTagged& res, const DataTagged& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=DataTypes::noValues(res.getShape());	// there is only one datapoint per sample
+
+  // We need to consider two possibilities:
+  //   1) we are dealing with a new result object (which won't have tags)
+  //   2) we are storing the result back into the same object (eg +=)
+  // for case 1, we need to add tags in the correct order and then calculate on a per tag basis
+  // for case 2, we just need to calculate tags
+
+  
+  // first let's exclude anything but our two cases
+  if ((&res!=&left) &&		// self update 
+    (res.getTagCount()!=0))		// no tags
+  {
+      throw DataException("binaryOpDataReadyTTT expects a=(a op b) or c=(a op b)");
+  }	
+
+  // add tags from both sides
+  if (res.getTagCount()==0)
+  {
+      const DataTagged::DataMapType& lookup_1=left.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_1.begin();i!=lookup_1.end();i++)
+      {
+	res.addTag(i->first);
+      }
+
+      const DataTagged::DataMapType& lookup_r=right.getTagLookup();
+      for (i=lookup_r.begin();i!=lookup_r.end();i++)
+      {
+	res.addTag(i->first);
+      }
+  }
+  else	// result already has tags in it
+  {	// add tags from right, any duplicates are silently ignored by addTag
+      const DataTagged::DataMapType& lookup_r=right.getTagLookup();
+      for (auto i=lookup_r.begin();i!=lookup_r.end();i++)
+      {
+	res.addTag(i->first);
+      }      
+  }
+  // so now we know that both tagged objects have the same tags (perhaps not in the same order though)
+  if (right.getRank()==0) 	// scalar op on the right
+  {		// we'll reuse this code by pretending samples are 1 value long
+      // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 
+				valcount, 1,	// (arguments reversed from normal) 
+			      left.getTypedVectorRO(dummyl), 0, false,
+			      right.getTypedVectorRO(dummyr), 0, true,
+			      operation);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type rightoffset=right.getOffsetForTag(i->first);	
+	  escript::binaryOpVector(res.getTypedVectorRW(resdummy), resoffset, 
+				    valcount, 1, // (arguments reversed from normal) 
+				  left.getTypedVectorRO(dummyl), leftoffset, false,
+				  right.getTypedVectorRO(dummyr), rightoffset, true,
+				  operation);	  
+      }        
+  }
+  else if (left.getRank()==0)	// scalar op on the left
+  {
+      escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 
+				valcount, 1,	// (arguments reversed from normal) 
+			      left.getTypedVectorRO(dummyl), 0, true,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=i->second;
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type rightoffset=right.getOffsetForTag(i->first);	  
+	  escript::binaryOpVector(res.getTypedVectorRW(resdummy), resoffset, 
+				    valcount, 1, // (arguments reversed from normal) 
+				  left.getTypedVectorRO(dummyl), leftoffset, true,
+				  right.getTypedVectorRO(dummyr), rightoffset, false,
+				  operation);	  
+      }        
+  }
+  else
+  {
+      // This will process the default value (which we know is stored in location 0)
+      escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 1, valcount, 
+			      left.getTypedVectorRO(dummyl), 0, false,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+      const DataTagged::DataMapType& lookup_re=res.getTagLookup();
+      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+      for (i=lookup_re.begin();i!=lookup_re.end();i++)
+      {
+	  DataTypes::RealVectorType::size_type resoffset=res.getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type leftoffset=left.getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type rightoffset=right.getOffsetForTag(i->first);
+	  escript::binaryOpVector(res.getTypedVectorRW(resdummy), resoffset, 1, valcount, 
+				  left.getTypedVectorRO(dummyl), leftoffset, false,
+				  right.getTypedVectorRO(dummyr), rightoffset, false,
+				  operation);	  
+      }      
+  }    
+}
+
+
+void binaryOpDataTTT(DataTagged& result, const DataTagged& left, const DataTagged& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTTT<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperTTT<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperTTT<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);		
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperTTT<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperEEC(DataExpanded& res, const DataExpanded& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=res.getNumDPPSample()*DataTypes::noValues(res.getShape());
+
+  if (left.hasNoSamples() || right.hasNoSamples())
+  {
+      return;
+  }
+  
+  if (right.getRank()==0) 
+  {	
+    escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, res.getNumSamples(), valcount,
+			      left.getTypedVectorRO(dummyl), 0,
+			      &right.getTypedVectorRO(dummyr)[0], true,
+			      operation,
+			      false);
+  }
+  else if (left.getRank()==0)
+  {
+    escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, 	// "shrink" the samples to make this work
+					res.getNumSamples()*res.getNumDPPSample(),DataTypes::noValues(res.getShape()) , 
+			      &left.getTypedVectorRO(dummyl)[0], 0,
+			      right.getTypedVectorRO(dummyr), false,
+			      operation,
+			      true);
+  } 
+  else //(right.getRank()==left.getRank())
+  {
+    escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, 
+			      res.getNumSamples()*res.getNumDPPSample(),DataTypes::noValues(res.getShape()) ,
+			      left.getTypedVectorRO(dummyl), 0, false,
+			      right.getTypedVectorRO(dummyr), 0, true,
+			      operation);
+  }
+  
+}
+
+
+void binaryOpDataEEC(DataExpanded& result, const DataExpanded& left, const DataConstant& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEEC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperEEC<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEEC<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperEEC<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+
+template <class ResSCALAR, class LSCALAR, class RSCALAR>
+inline void binaryOpDataReadyHelperEEE(DataExpanded& res, const DataExpanded& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  ResSCALAR resdummy=0;
+  LSCALAR dummyl=0;
+  RSCALAR dummyr=0;
+  DataTypes::RealVectorType::size_type valcount=res.getNumDPPSample()*DataTypes::noValues(res.getShape());
+  
+  if (left.hasNoSamples() || right.hasNoSamples())
+  {
+      return;
+  }
+  
+  if (left.getRank()==right.getRank())
+  {
+    escript::binaryOpVector(res.getTypedVectorRW(resdummy), 0, res.getNumSamples(), valcount,
+			      left.getTypedVectorRO(dummyl), 0, false,
+			      right.getTypedVectorRO(dummyr), 0, false,
+			      operation);
+  }
+  else if (right.getRank()==0) 
+  {
+      // in this case, we need to deal with the fact that the loops in binaryOpVector are in terms of samples
+      // but for this case, we need to change arguments once per datapoint.
+      // To get around this, we'll pretend that the samples are smaller (and only contain one datapoint)
+      // this should hopefully lead to the same openmp thread division
+    escript::binaryOpVectorRightScalar(res.getTypedVectorRW(resdummy), 0, 
+					 res.getNumSamples()*res.getNumDPPSample(), DataTypes::noValues(res.getShape()), 
+			      left.getTypedVectorRO(dummyl), 0, 
+			      &right.getTypedVectorRO(dummyr)[0], false,
+			      operation, false);
+  }
+  else // if (left.getRank()==0)
+  {
+    escript::binaryOpVectorLeftScalar(res.getTypedVectorRW(resdummy), 0, 
+					res.getNumSamples()*res.getNumDPPSample(), DataTypes::noValues(res.getShape()),
+			      &left.getTypedVectorRO(dummyl)[0], false,
+			      right.getTypedVectorRO(dummyr), 0,
+			      operation, false);
+  }
+}
+
+
+void binaryOpDataEEE(DataExpanded& result, const DataExpanded& left, const DataExpanded& right, 
+		     escript::ES_optype operation)
+{
+  bool cplxresult=left.isComplex() || right.isComplex();
+  if (result.isComplex()!=cplxresult)
+  {
+      ostringstream oss;
+      oss << "Programming error: result has unexpected complexity ";
+      oss << result.isComplex() << "==" << left.isComplex() << "||";
+      oss << right.isComplex();
+      throw DataException(oss.str());
+  }
+  
+  if (left.isComplex())
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEEE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::cplx_t>(result, left, right, operation);
+      }
+      else
+      {
+	  binaryOpDataReadyHelperEEE<DataTypes::cplx_t, DataTypes::cplx_t, DataTypes::real_t>(result, left, right, operation);	
+      }    
+  }
+  else	// left is real
+  {
+      if (right.isComplex())
+      {
+	  binaryOpDataReadyHelperEEE<DataTypes::cplx_t, DataTypes::real_t, DataTypes::cplx_t>(result, left, right, operation);	
+      }
+      else	// right is real
+      {
+	  binaryOpDataReadyHelperEEE<DataTypes::real_t, DataTypes::real_t, DataTypes::real_t>(result, left, right, operation);	
+      }        
+  }    
+}
+
+}
diff --git a/escriptcore/src/BinaryDataReadyOps.h b/escriptcore/src/BinaryDataReadyOps.h
new file mode 100644
index 0000000..6cfa63a
--- /dev/null
+++ b/escriptcore/src/BinaryDataReadyOps.h
@@ -0,0 +1,59 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_BINARYOP_H__
+#define __ESCRIPT_BINARYOP_H__
+
+#include "system_dep.h"
+#include "DataTypes.h"
+#include "DataConstant.h"
+#include "DataExpanded.h"
+#include "DataVectorOps.h"
+#include "DataTagged.h"
+
+/**
+\file BinaryDataReadyOps.h 
+\brief Describes binary operations performed on instances of DataAbstract.
+
+For operations on DataVector see DataMaths.h.
+For operations on double* see LocalOps.h.
+*/
+
+namespace escript {
+
+void binaryOpDataCCC(DataConstant& result, const DataConstant& left, const DataConstant& right, 
+		     escript::ES_optype operation);
+void binaryOpDataTCT(DataTagged& result, const DataConstant& left, const DataTagged& right, 
+		     escript::ES_optype operation);
+void binaryOpDataTTC(DataTagged& result, const DataTagged& left, const DataConstant& right, 
+		     escript::ES_optype operation);
+void binaryOpDataTTT(DataTagged& result, const DataTagged& left, const DataTagged& right, 
+		     escript::ES_optype operation);
+void binaryOpDataEEC(DataExpanded& result, const DataExpanded& left, const DataConstant& right, 
+		     escript::ES_optype operation);
+void binaryOpDataECE(DataExpanded& result, const DataConstant& left, const DataExpanded& right, 
+		     escript::ES_optype operation);
+void binaryOpDataEEE(DataExpanded& result, const DataExpanded& left, const DataExpanded& right, 
+		     escript::ES_optype operation);
+void binaryOpDataETE(DataExpanded& result, const DataTagged& left, const DataExpanded& right, 
+		     escript::ES_optype operation);
+void binaryOpDataEET(DataExpanded& result, const DataExpanded& left, const DataTagged& right, 
+ 		     escript::ES_optype operation);
+
+} // end of namespace
+
+#endif // __ESCRIPT_BINARYOP_H__
+
diff --git a/escriptcore/src/BinaryOp.h b/escriptcore/src/BinaryOp.h
deleted file mode 100644
index c5ff9ff..0000000
--- a/escriptcore/src/BinaryOp.h
+++ /dev/null
@@ -1,206 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  escript_BinaryOp_20040315_H
-#define escript_BinaryOp_20040315_H
-#include "system_dep.h"
-
-#include "DataTypes.h"
-#include "DataConstant.h"
-#include "DataTagged.h"
-#include "DataExpanded.h"
-#include "DataMaths.h"
-
-/**
-\file BinaryOp.h 
-\brief Describes binary operations performed on instances of DataAbstract.
-
-For operations on DataVector see DataMaths.h.
-For operations on double* see LocalOps.h.
-*/
-
-namespace escript {
-/**
-   \brief
-   Perform the given binary operation.
-   \param left Input/Output - The left hand side.
-   \param right Input - The right hand side.
-   \param operation Input - The operation to perform.
-*/
-template <class BinaryFunction>
-inline void binaryOp(DataTagged& left, const DataConstant& right, 
-		     BinaryFunction operation)
-{
-//  binaryOp(left,right.getPointDataView(),operation);
-  //
-  // perform the operation on each tagged value
-  const DataTagged::DataMapType& lookup=left.getTagLookup();
-  DataTagged::DataMapType::const_iterator i;
-  DataTagged::DataMapType::const_iterator lookupEnd=lookup.end();
-  DataTypes::ValueType& leftVec=left.getVectorRW();
-  const DataTypes::ShapeType& leftShape=left.getShape();
-  const DataTypes::ShapeType& rightShape=right.getShape();
-  double rvalue=right.getVectorRO()[0];		// for rank==0
-  const DataTypes::ValueType& rightVec=right.getVectorRO();   // for rank>0
-  if (right.getRank()==0) {
-    for (i=lookup.begin();i!=lookupEnd;i++) {
-      DataMaths::binaryOp(leftVec,leftShape,i->second,rvalue,operation);
-    }
-  } else {
-    for (i=lookup.begin();i!=lookupEnd;i++) {
-      DataMaths::binaryOp(leftVec, leftShape, i->second,rightVec,rightShape,0,operation);
-    }
-  }
-  //
-  // finally perform the operation on the default value
-  if (right.getRank()==0) {
-    DataMaths::binaryOp(leftVec,leftShape,left.getDefaultOffset(),rvalue,operation);
-  } else {
-    DataMaths::binaryOp(leftVec,leftShape,left.getDefaultOffset(),rightVec,rightShape,0,operation);
-  }
-}
-
-/**
-   \brief apply the binary op to each value in left and the single value right.
-
-   The value in right will be assumed to begin at offset 0
-*/
-template <class BinaryFunction>
-inline void binaryOp(DataTagged& left, const DataTypes::ValueType& right, 
-		     const DataTypes::ShapeType& shape,
-		     BinaryFunction operation)
-{
-  //
-  // perform the operation on each tagged value
-  const DataTagged::DataMapType& lookup=left.getTagLookup();
-  DataTagged::DataMapType::const_iterator i;
-  DataTagged::DataMapType::const_iterator lookupEnd=lookup.end();
-  DataTypes::ValueType& lvec=left.getVectorRW();
-  const DataTypes::ShapeType& lshape=left.getShape();
-  if (DataTypes::getRank(shape)==0) {
-    for (i=lookup.begin();i!=lookupEnd;i++) {
-      DataMaths::binaryOp(lvec, lshape,i->second,right[0],operation);
-    }
-  } else {
-    for (i=lookup.begin();i!=lookupEnd;i++) {
-      DataMaths::binaryOp(lvec, lshape, i->second,right,shape,0,operation);
-    }
-  }
-  //
-  // finally perform the operation on the default value
-  if (DataTypes::getRank(shape)==0) {
-    DataMaths::binaryOp(lvec,lshape,left.getDefaultOffset(),right[0],operation);
-  } else {
-    DataMaths::binaryOp(lvec,lshape,left.getDefaultOffset(),right, shape,0,operation);
-  }
-}
-
-
-
-
-template <class BinaryFunction>
-inline void binaryOp(DataTagged& left, const DataTagged& right, 
-		     BinaryFunction operation)
-{
-  using namespace DataMaths;
-
-  int right_rank=right.getRank();
-  //
-  // Add the right hand tag keys which can't currently be found on the left
-  const DataTagged::DataMapType& rightLookup=right.getTagLookup();
-  DataTagged::DataMapType::const_iterator i;
-  DataTagged::DataMapType::const_iterator rightLookupEnd=rightLookup.end();
-  for (i=rightLookup.begin();i!=rightLookupEnd;i++) {
-    //
-    // If the left does not already have a value assigned to this tag,
-    // add the right hand tag to the left hand tag list and assign
-    // the left's default value.
-    if (!left.isCurrentTag(i->first)) {
-      left.addTag(i->first);
-    }
-  }
-  DataTypes::ValueType& leftVec=left.getVectorRW();
-  const DataTypes::ShapeType& leftShape=left.getShape();
-  //
-  // Perform the operation.
-  const DataTagged::DataMapType& leftLookup=left.getTagLookup();
-  DataTagged::DataMapType::const_iterator leftLookupEnd=leftLookup.end();
-  for (i=leftLookup.begin();i!=leftLookupEnd;i++) {
-    if (right_rank==0) {
-       binaryOp(leftVec,leftShape,i->second, right.getDataByTagRO(i->first,0),operation);
-
-    } else {	// rank>0
-       binaryOp(leftVec,leftShape,left.getOffsetForTag(i->first),right.getVectorRO(), right.getShape(), right.getOffsetForTag(i->first), operation);
-    }
-  }
-  //
-  // finally perform the operation on the default value
-  if (right_rank==0) {
-     binaryOp(leftVec,leftShape, left.getDefaultOffset(), right.getVectorRO()[0],operation);
-  } else {
-     binaryOp(leftVec,leftShape, left.getDefaultOffset(), right.getVectorRO(), right.getShape(), right.getDefaultOffset(), operation);
-  }
-}
-
-template <class BinaryFunction>
-inline void binaryOp(DataConstant& left, const DataConstant& right, 
-		     BinaryFunction operation)
-{
-  if (right.getRank()==0) {
-    double r=right.getVectorRO()[0];
-    DataMaths::binaryOp(left.getVectorRW(), left.getShape(),0, r,operation);
-  } else {
-    DataMaths::binaryOp(left.getVectorRW(), left.getShape(),0, right.getVectorRO(),right.getShape(),0,operation);
-  }
-
-}
-
-
-
-template <class BinaryFunction>
-inline void binaryOp(DataExpanded& left, const DataReady& right, 
-		     BinaryFunction operation)
-{
-  int i,j;
-  DataTypes::ValueType::size_type numDPPSample=left.getNumDPPSample();
-  DataTypes::ValueType::size_type numSamples=left.getNumSamples();
-  if (right.getRank()==0) {
-
-    const DataTypes::ShapeType& leftShape=left.getShape();
-    DataTypes::ValueType& leftVec=left.getVectorRW();
-    //
-    // This will call the double version of binaryOp
-    #pragma omp parallel for private(i,j) schedule(static)
-    for (i=0;i<numSamples;i++) {
-      for (j=0;j<numDPPSample;j++) {
-	DataMaths::binaryOp(leftVec,leftShape,left.getPointOffset(i,j), right.getVectorRO()[right.getPointOffset(i,j)]  ,operation);
-      }
-    }
-  } else {
-    #pragma omp parallel for private(i,j) schedule(static)
-    for (i=0;i<numSamples;i++) {
-      for (j=0;j<numDPPSample;j++) {
-	DataMaths::binaryOp(left.getVectorRW(),left.getShape(),left.getPointOffset(i,j), right.getVectorRO(), right.getShape(),right.getPointOffset(i,j), operation);
-      }
-    }
-  }
-}
-
-
-} // end of namespace
-
-#endif
diff --git a/escriptcore/src/Data.cpp b/escriptcore/src/Data.cpp
index 9edcd2b..28bda2c 100644
--- a/escriptcore/src/Data.cpp
+++ b/escriptcore/src/Data.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Data.h"
 
 #include "AbstractContinuousDomain.h"
@@ -28,19 +25,18 @@
 #include "EscriptParams.h"
 #include "FunctionSpaceException.h"
 #include "FunctionSpaceFactory.h"
-#include "UnaryFuncs.h"
+#include "BinaryDataReadyOps.h"
 
 #ifdef IKNOWWHATIMDOING
 #include "Dodgy.h"
 #endif
 
-#include "esysUtils/blocktimer.h"
-
 #include <algorithm>
 #include <fstream>
 #include <functional>
 #include <sstream>      // so we can throw messages about ranks
 #include <vector>
+#include <iostream>
 
 #include <boost/python/dict.hpp>
 #include <boost/python/extract.hpp>
@@ -49,13 +45,20 @@
 
 namespace bp = boost::python;
 using namespace escript;
+using namespace escript::DataTypes;
 using namespace std;
+using DataTypes::real_t;
+using DataTypes::cplx_t;
+
+
+#define THROWONCOMPLEX if (m_data->isComplex()){throw DataException("Operation does not support complex objects");}
+#define THROWONCOMPLEXA(Z) if (Z.isComplex()){throw DataException("Operation does not support complex objects");}
 
 // ensure the current object is not a DataLazy
 // The idea was that we could add an optional warning whenever a resolve is forced
 // #define forceResolve() if (isLazy()) {#resolve();}
 
-#define AUTOLAZYON escriptParams.getAUTOLAZY()
+#define AUTOLAZYON escriptParams.getAutoLazy()
 #define MAKELAZYOP(X) do {\
   if (isLazy() || (AUTOLAZYON && m_data->isExpanded())) \
   {\
@@ -101,12 +104,17 @@ using namespace std;
 #define MAKELAZYBIN2(L,R,X) do {\
   if (L.isLazy() || R.isLazy() || (AUTOLAZYON && (L.isExpanded() || R.isExpanded()))) \
   {\
+  if (L.isComplex() || R.isComplex()) \
+  {\
+      throw DataException("Lazy operations on complex not supported yet");\
+  }\
         DataLazy* c=new DataLazy(L.borrowDataPtr(),R.borrowDataPtr(),X);\
         return Data(c);\
   }\
 }while(0)
 
-#define CHECK_DO_CRES escriptParams.getRESOLVE_COLLECTIVE()
+#define CHECK_DO_CRES escriptParams.getResolveCollective()
+
 
 namespace
 {
@@ -239,7 +247,7 @@ pointToTuple( const DataTypes::ShapeType& shape,ARR v)
 }  // anonymous namespace
 
 Data::Data()
-    : m_shared(false), m_lazy(false)
+    :  m_lazy(false)
 {
     //
     // Default data is type DataEmpty
@@ -248,35 +256,153 @@ Data::Data()
     m_protected=false;
 }
 
-Data::Data(double value,
-           const boost::python::tuple& shape,
+
+
+Data::Data(real_t value,
+           const DataTypes::ShapeType& dataPointShape,
            const FunctionSpace& what,
            bool expanded)
-    : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
-    DataTypes::ShapeType dataPointShape;
-    for (int i = 0; i < shape.attr("__len__")(); ++i) {
-        dataPointShape.push_back(bp::extract<const int>(shape[i]));
-    }
-
-    int len = DataTypes::noValues(dataPointShape);
-    DataVector temp_data(len,value,len);
-    initialise(temp_data, dataPointShape, what, expanded);
+    initialise(value, dataPointShape, what, expanded);
     m_protected=false;
 }
 
-Data::Data(double value,
+Data::Data(cplx_t value,
            const DataTypes::ShapeType& dataPointShape,
            const FunctionSpace& what,
            bool expanded)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     initialise(value, dataPointShape, what, expanded);
     m_protected=false;
 }
 
+// Ordering is: shape, functionspace, expanded
+Data::Data(boost::python::object value,
+       boost::python::object par2,
+       boost::python::object par3,
+       boost::python::object par4)
+{
+    if (value.is_none())
+    {
+	throw DataException("Data contructor from python - first argument must not be None.");
+    }
+      // now to enforce contiguous Nones
+    if ((par2.is_none() && (!par3.is_none() || !par4.is_none())) ||
+        (par3.is_none() && !par4.is_none()))
+    {
+	throw DataException("Data contructor from python - arguments must be omitted from the right.");
+    }
+  
+    // what is the first arg
+    boost::python::extract<DataTypes::cplx_t> exc(value);
+    boost::python::extract<DataTypes::real_t> exr(value);
+    boost::python::extract<Data> exd(value);
+    if (exc.check() || exr.check())
+    {
+	//(value, shape, functionspace, expanded), but shape could be missing in which case => ()
+        DataTypes::ShapeType dataPointShape;	// default to scalar
+	if (!par2.is_none())
+	{
+	    boost::python::extract<boost::python::tuple> ex2t(par2);
+	    boost::python::extract<FunctionSpace> ex2fs(par2);
+	    if (ex2t.check())
+	    {
+		for (int i = 0; i < par2.attr("__len__")(); ++i) {
+		    dataPointShape.push_back(bp::extract<const int>(par2[i]));
+		}	    
+	    }
+	    else if (ex2fs.check())
+	    {
+		// shape will default to ()
+	        // now we copy the params up to make it look like they did specify one
+	        par4=par3;
+		par3=par2;
+	    }
+	    else
+	    {
+		throw DataException("Data contructor from python - expected a tuple or None as second argument.");
+	    }
+	}
+	boost::python::extract<FunctionSpace> ex3fs(par3);	
+	if (!par3.is_none())
+	{
+	    if (!ex3fs.check())
+	    {
+		throw DataException("Data contructor from python - expected a FunctionSpace or None as third argument.");
+	    }
+	}
+	bool expa=false;
+	if (!par4.is_none())
+	{
+	    boost::python::extract<bool> ex4b(par4);
+	    if (!ex4b.check())
+	    {
+		throw DataException("Data contructor from python - expected a boolean or None as fourth argument.");	      
+	    }
+	    expa=ex4b();
+	}
+	if (exr.check())
+	{
+	    int len = DataTypes::noValues(dataPointShape);
+	    RealVectorType temp_data(len,exr(),len);
+	    initialise(temp_data, dataPointShape, par3.is_none()?FunctionSpace():ex3fs(), expa);
+	    m_protected=false;	  	  
+	}
+	else
+	{
+	    int len = DataTypes::noValues(dataPointShape);
+	    CplxVectorType temp_data(len,exc(),len);
+	    initialise(temp_data, dataPointShape, par3.is_none()?FunctionSpace():ex3fs(), expa);
+	    m_protected=false;	  
+	}
+    }
+    else if (exd.check())	// Construct from (Data, [FunctionSpace]
+    {
+	boost::python::extract<FunctionSpace> ex2fs(par2);	      
+	if (!par2.is_none())
+	{
+	    if (!ex2fs.check())
+	    {
+		throw DataException("Data contructor from python - expected a FunctionSpace or None as second argument.");	      
+	    }
+	}        
+        init_from_data_and_fs(exd(), par2.is_none()?FunctionSpace():ex2fs());
+    }
+    else	//non-data, non-scalar first argument
+    {   //               2         3
+	//(value, functionspace, expanded)      
+        if (!par4.is_none())
+	{
+	    throw DataException("Data contructor from python - unexpected fourth argument.");
+	}
+	bool expa=false;
+	if (!par3.is_none())
+	{
+	    boost::python::extract<bool> ex3b(par3);
+	    if (!ex3b.check())
+	    {
+		throw DataException("Data contructor from python - expected a boolean or None as third argument.");	      
+	    }
+	    expa=ex3b();
+	}
+	boost::python::extract<FunctionSpace> ex2fs(par2);	 
+	if (!par2.is_none())
+	{
+	    if (!ex2fs.check())
+	    {
+		throw DataException("Data contructor from python - expected a FunctionSpace or None as second argument.");	      
+	    }
+	}
+	WrappedArray w(value);
+	initialise(w,par2.is_none()?FunctionSpace():ex2fs(),expa);
+	m_protected=false;
+    }
+}
+
 Data::Data(const Data& inData)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     set_m_data(inData.m_data);
     m_protected=inData.isProtected();
@@ -285,7 +411,7 @@ Data::Data(const Data& inData)
 
 Data::Data(const Data& inData,
            const DataTypes::RegionType& region)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     DataAbstract_ptr dat=inData.m_data;
     if (inData.isLazy())
@@ -304,9 +430,8 @@ Data::Data(const Data& inData,
 
 }
 
-Data::Data(const Data& inData,
+void Data::init_from_data_and_fs(const Data& inData,
            const FunctionSpace& functionspace)
-        : m_shared(false), m_lazy(false)
 {
     if (inData.isEmpty())
     {
@@ -346,45 +471,44 @@ Data::Data(const Data& inData,
     m_protected=false;
 }
 
+
+Data::Data(const Data& inData,
+           const FunctionSpace& functionspace)
+        :  m_lazy(false)
+{
+    init_from_data_and_fs(inData, functionspace);  
+}
+
 Data::Data(DataAbstract* underlyingdata)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     set_m_data(underlyingdata->getPtr());
     m_protected=false;
 }
 
 Data::Data(DataAbstract_ptr underlyingdata)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     set_m_data(underlyingdata);
     m_protected=false;
 }
 
-Data::Data(const DataTypes::ValueType& value,
+Data::Data(const DataTypes::RealVectorType& value,
            const DataTypes::ShapeType& shape,
            const FunctionSpace& what,
            bool expanded)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     initialise(value,shape,what,expanded);
     m_protected=false;
 }
 
 
-Data::Data(const boost::python::object& value,
-           const FunctionSpace& what,
-           bool expanded)
-        : m_shared(false), m_lazy(false)
-{
-    WrappedArray w(value);
-    initialise(w,what,expanded);
-    m_protected=false;
-}
 
 
 Data::Data(const WrappedArray& w, const FunctionSpace& what,
            bool expanded)
-           :m_shared(false), m_lazy(false)
+           : m_lazy(false)
 {
     initialise(w,what,expanded);  
     m_protected=false; 
@@ -393,7 +517,7 @@ Data::Data(const WrappedArray& w, const FunctionSpace& what,
 
 Data::Data(const boost::python::object& value,
            const Data& other)
-        : m_shared(false), m_lazy(false)
+        :  m_lazy(false)
 {
     WrappedArray w(value);
 
@@ -403,12 +527,12 @@ Data::Data(const boost::python::object& value,
 
         // get the space for the data vector
         int len1 = DataTypes::noValues(tempShape);
-        DataVector temp_data(len1, 0.0, len1);
+        RealVectorType temp_data(len1, 0.0, len1);
         temp_data.copyFromArray(w,1);
 
         int len = DataTypes::noValues(other.getDataPointShape());
 
-        DataVector temp2_data(len, temp_data[0], len);
+        RealVectorType temp2_data(len, temp_data[0], len);
         DataConstant* t=new DataConstant(other.getFunctionSpace(),other.getDataPointShape(),temp2_data);
         set_m_data(DataAbstract_ptr(t));
 
@@ -432,15 +556,9 @@ Data::~Data()
 // This method should be atomic
 void Data::set_m_data(DataAbstract_ptr p)
 {
-    if (m_data.get()!=0)  // release old ownership
-    {
-        m_data->removeOwner(this);
-    }
     if (p.get()!=0)
     {
         m_data=p;
-        m_data->addOwner(this);
-        m_shared=m_data->isShared();
         m_lazy=m_data->isLazy();
     }
 }
@@ -465,7 +583,27 @@ void Data::initialise(const WrappedArray& value,
 
 
 void
-Data::initialise(const DataTypes::ValueType& value,
+Data::initialise(const DataTypes::RealVectorType& value,
+                 const DataTypes::ShapeType& shape,
+                 const FunctionSpace& what,
+                 bool expanded)
+{
+    //
+    // Construct a Data object of the appropriate type.
+    // Construct the object first as there seems to be a bug which causes
+    // undefined behaviour if an exception is thrown during construction
+    // within the shared_ptr constructor.
+    if (expanded) {
+        DataAbstract* temp=new DataExpanded(what, shape, value);
+        set_m_data(temp->getPtr());
+    } else {
+        DataAbstract* temp=new DataConstant(what, shape, value);
+        set_m_data(temp->getPtr());
+    }
+}
+
+void
+Data::initialise(const DataTypes::CplxVectorType& value,
                  const DataTypes::ShapeType& shape,
                  const FunctionSpace& what,
                  bool expanded)
@@ -484,8 +622,32 @@ Data::initialise(const DataTypes::ValueType& value,
     }
 }
 
+
+void
+Data::initialise(const real_t value,
+                 const DataTypes::ShapeType& shape,
+                 const FunctionSpace& what,
+                 bool expanded)
+{
+    //
+    // Construct a Data object of the appropriate type.
+    // Construct the object first as there seems to be a bug which causes
+    // undefined behaviour if an exception is thrown during construction
+    // within the shared_ptr constructor.
+    if (expanded) {
+        DataAbstract* temp=new DataExpanded(what, shape, value);
+        DataAbstract_ptr p(temp);
+        set_m_data(p);
+    } else {
+        DataAbstract* temp=new DataConstant(what, shape, value);
+        DataAbstract_ptr p(temp);
+        set_m_data(p);
+    }
+}
+
+
 void
-Data::initialise(const double value,
+Data::initialise(const cplx_t value,
                  const DataTypes::ShapeType& shape,
                  const FunctionSpace& what,
                  bool expanded)
@@ -532,7 +694,7 @@ Data::getShapeTuple() const
 // There are ways to fix this involving creating function pointer variables for each form
 // but there doesn't seem to be a need given that the methods have the same name from the python point of view
 Data
-Data::copySelf()
+Data::copySelf() const
 {
     DataAbstract* temp=m_data->deepCopy();
     return Data(temp);
@@ -585,7 +747,7 @@ Data::setToZero()
     }
     if (isLazy())
     {
-        DataTypes::ValueType v(getNoValues(),0);
+        DataTypes::RealVectorType v(getNoValues(),0);
         DataConstant* dc=new DataConstant(getFunctionSpace(),getDataPointShape(),v);
         DataLazy* dl=new DataLazy(dc->getPtr());
         set_m_data(dl->getPtr());
@@ -672,9 +834,9 @@ Data::copyWithMask(const Data& other,
     }
     exclusiveWrite();
     // Now we iterate over the elements
-    DataVector& self=getReady()->getVectorRW();;
-    const DataVector& ovec=other2.getReadyPtr()->getVectorRO();
-    const DataVector& mvec=mask2.getReadyPtr()->getVectorRO();
+    RealVectorType& self=getReady()->getVectorRW();;
+    const RealVectorType& ovec=other2.getReadyPtr()->getVectorRO();
+    const RealVectorType& mvec=mask2.getReadyPtr()->getVectorRO();
 
     if ((selfrank>0) && (otherrank==0) &&(maskrank==0))
     {
@@ -730,9 +892,9 @@ Data::copyWithMask(const Data& other,
             for (i=tlookup.begin();i!=tlookup.end();i++)
             {
                 // get the target offset
-                DataTypes::ValueType::size_type toff=tptr->getOffsetForTag(i->first);
-                DataTypes::ValueType::size_type moff=mptr->getOffsetForTag(i->first);
-                DataTypes::ValueType::size_type ooff=optr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type toff=tptr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type moff=mptr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type ooff=optr->getOffsetForTag(i->first);
                 for (int j=0;j<getDataPointSize();++j)
                 {
                     if (mvec[j+moff]>0)
@@ -755,9 +917,9 @@ Data::copyWithMask(const Data& other,
             for (i=tlookup.begin();i!=tlookup.end();i++)
             {
                 // get the target offset
-                DataTypes::ValueType::size_type toff=tptr->getOffsetForTag(i->first);
-                DataTypes::ValueType::size_type moff=mptr->getOffsetForTag(i->first);
-                DataTypes::ValueType::size_type ooff=optr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type toff=tptr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type moff=mptr->getOffsetForTag(i->first);
+                DataTypes::RealVectorType::size_type ooff=optr->getOffsetForTag(i->first);
                 for (int j=0;j<getDataPointSize();++j)
                 {
                     if (mvec[j+moff]>0)
@@ -877,6 +1039,12 @@ Data::isReady() const
 }
 
 
+bool
+Data::isComplex() const
+{
+    return m_data->isComplex();
+}
+
 void
 Data::setProtection()
 {
@@ -961,53 +1129,65 @@ Data
 Data::oneOver() const
 {
     MAKELAZYOP(RECIP);
-    return C_TensorUnaryOperation(*this, bind1st(divides<double>(),1.));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::RECIP);    
 }
 
 Data
 Data::wherePositive() const
 {
+    if (isComplex())
+    {
+        throw DataException("The wherePositive operation is not supported for complex data.");
+    }
     MAKELAZYOP(GZ);
-    return C_TensorUnaryOperation(*this, bind2nd(greater<double>(),0.0));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::GZ);    
 }
 
 Data
 Data::whereNegative() const
 {
+    if (isComplex())
+    {
+        throw DataException("The whereNegative operation is not supported for complex data.");
+    }
     MAKELAZYOP(LZ);
-    return C_TensorUnaryOperation(*this, bind2nd(less<double>(),0.0));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::LZ);    
 }
 
 Data
 Data::whereNonNegative() const
 {
+    if (isComplex())
+    {
+        throw DataException("The whereNonNegative operation is not supported for complex data.");
+    }
     MAKELAZYOP(GEZ);
-    return C_TensorUnaryOperation(*this, bind2nd(greater_equal<double>(),0.0));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::GEZ);    
 }
 
 Data
 Data::whereNonPositive() const
 {
+    if (isComplex())
+    {
+        throw DataException("The whereNonPositive operation is not supported for complex data.");
+    }
     MAKELAZYOP(LEZ);
-    return C_TensorUnaryOperation(*this, bind2nd(less_equal<double>(),0.0));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::LEZ);
 }
 
 Data
-Data::whereZero(double tol) const
+Data::whereZero(real_t tol) const
 {
-//   Data dataAbs=abs();
-//   return C_TensorUnaryOperation(dataAbs, bind2nd(less_equal<double>(),tol));
     MAKELAZYOPOFF(EZ,tol);
-    return C_TensorUnaryOperation(*this, bind2nd(AbsLTE(),tol));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::EZ, tol);
 }
 
 Data
-Data::whereNonZero(double tol) const
+Data::whereNonZero(real_t tol) const
 {
-//   Data dataAbs=abs();
-//   return C_TensorUnaryOperation(dataAbs, bind2nd(greater<double>(),tol));
     MAKELAZYOPOFF(NEZ,tol);
-    return C_TensorUnaryOperation(*this, bind2nd(AbsGT(),tol));
+    return C_TensorUnaryOperation(*this, escript::ES_optype::NEZ, tol);
 }
 
 Data
@@ -1025,24 +1205,24 @@ Data::probeInterpolation(const FunctionSpace& functionspace) const
 Data
 Data::gradOn(const FunctionSpace& functionspace) const
 {
+    THROWONCOMPLEX
     if (isEmpty())
     {
         throw DataException("Error - operation not permitted on instances of DataEmpty.");
     }
-    double blocktimer_start = blocktimer_time();
     if (functionspace.getDomain()!=getDomain())
         throw DataException("Error - gradient cannot be calculated on different domains.");
     DataTypes::ShapeType grad_shape=getDataPointShape();
     grad_shape.push_back(functionspace.getDim());
     Data out(0.0,grad_shape,functionspace,true);
     getDomain()->setToGradient(out,*this);
-    blocktimer_increment("grad()", blocktimer_start);
     return out;
 }
 
 Data
 Data::grad() const
 {
+    THROWONCOMPLEX
     if (isEmpty())
     {
         throw DataException("Error - operation not permitted on instances of DataEmpty.");
@@ -1057,7 +1237,7 @@ Data::getDataPointSize() const
 }
 
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 Data::getLength() const
 {
     return m_data->getLength();
@@ -1080,67 +1260,132 @@ Data::toListOfTuples(bool scalarastuple)
 
     int npoints=getNumDataPoints();
     expand();                   // This will also resolve if required
-    const DataTypes::ValueType& vec=getReady()->getVectorRO();
     bp::list temp;
     temp.append(bp::object());
     bp::list res(temp*npoints);// pre-size the list by the "[None] * npoints"  trick
-    if (rank==0)
+    if (isComplex())
     {
-        long count;
-        if (scalarastuple)
+        const DataTypes::CplxVectorType& vec=getReady()->getTypedVectorRO(cplx_t(0));
+        if (rank==0)
         {
-            for (count=0;count<npoints;++count)
+            long count;
+            if (scalarastuple)
+            {
+                for (count=0;count<npoints;++count)
+                {
+                    res[count]=bp::make_tuple(vec[count]);
+                }
+            }
+            else
             {
-                res[count]=bp::make_tuple(vec[count]);
+                for (count=0;count<npoints;++count)
+                {
+                    res[count]=vec[count];
+                }
             }
         }
-        else
+        else if (rank==1)
         {
-            for (count=0;count<npoints;++count)
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
             {
-                res[count]=vec[count];
+                res[count]=pointToTuple1(getDataPointShape(), vec, offset);
             }
         }
-    }
-    else if (rank==1)
-    {
-        size_t count;
-        size_t offset=0;
-        for (count=0;count<npoints;++count,offset+=size)
+        else if (rank==2)
         {
-            res[count]=pointToTuple1(getDataPointShape(), vec, offset);
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple2(getDataPointShape(), vec, offset);
+            }
         }
-    }
-    else if (rank==2)
-    {
-        size_t count;
-        size_t offset=0;
-        for (count=0;count<npoints;++count,offset+=size)
+        else if (rank==3)
         {
-            res[count]=pointToTuple2(getDataPointShape(), vec, offset);
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple3(getDataPointShape(), vec, offset);
+            }
         }
-    }
-    else if (rank==3)
-    {
-        size_t count;
-        size_t offset=0;
-        for (count=0;count<npoints;++count,offset+=size)
+        else if (rank==4)
         {
-            res[count]=pointToTuple3(getDataPointShape(), vec, offset);
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple4(getDataPointShape(), vec, offset);
+            }
         }
-    }
-    else if (rank==4)
-    {
-        size_t count;
-        size_t offset=0;
-        for (count=0;count<npoints;++count,offset+=size)
+        else
         {
-            res[count]=pointToTuple4(getDataPointShape(), vec, offset);
-        }
+            throw DataException("Unknown rank in ::toListOfTuples()");
+        }      
     }
     else
     {
-        throw DataException("Unknown rank in ::toListOfTuples()");
+        const DataTypes::RealVectorType& vec=getReady()->getVectorRO();
+        if (rank==0)
+        {
+            long count;
+            if (scalarastuple)
+            {
+                for (count=0;count<npoints;++count)
+                {
+                    res[count]=bp::make_tuple(vec[count]);
+                }
+            }
+            else
+            {
+                for (count=0;count<npoints;++count)
+                {
+                    res[count]=vec[count];
+                }
+            }
+        }
+        else if (rank==1)
+        {
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple1(getDataPointShape(), vec, offset);
+            }
+        }
+        else if (rank==2)
+        {
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple2(getDataPointShape(), vec, offset);
+            }
+        }
+        else if (rank==3)
+        {
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple3(getDataPointShape(), vec, offset);
+            }
+        }
+        else if (rank==4)
+        {
+            size_t count;
+            size_t offset=0;
+            for (count=0;count<npoints;++count,offset+=size)
+            {
+                res[count]=pointToTuple4(getDataPointShape(), vec, offset);
+            }
+        }
+        else
+        {
+            throw DataException("Unknown rank in ::toListOfTuples()");
+        }
     }
     return res;
 }
@@ -1164,8 +1409,16 @@ Data::getValueOfDataPointAsTuple(int dataPointNo)
             throw DataException("Error - Data::getValueOfDataPointAsTuple: invalid dataPointNoInSample.");
         }
         // TODO: global error handling
-        DataTypes::ValueType::size_type offset=getDataOffset(sampleNo, dataPointNoInSample);
-        return pointToTuple(getDataPointShape(),&(getDataAtOffsetRO(offset)));
+        if (isComplex())
+	{
+	    DataTypes::CplxVectorType::size_type offset=getDataOffset(sampleNo, dataPointNoInSample);
+	    return pointToTuple(getDataPointShape(),&(getDataAtOffsetRO(offset, cplx_t(0))));
+	}
+	else
+	{
+	    DataTypes::RealVectorType::size_type offset=getDataOffset(sampleNo, dataPointNoInSample);
+	    return pointToTuple(getDataPointShape(),&(getDataAtOffsetRO(offset, real_t(0))));
+	}
     }
     else
     {
@@ -1187,6 +1440,7 @@ Data::setValueOfDataPointToPyObject(int dataPointNo, const bp::object& py_object
 void
 Data::setTupleForGlobalDataPoint(int id, int proc, bp::object v)
 {
+    THROWONCOMPLEX
 #ifdef ESYS_MPI 
     int error=0;
 #endif
@@ -1194,7 +1448,7 @@ Data::setTupleForGlobalDataPoint(int id, int proc, bp::object v)
     {
         try
         {
-            bp::extract<double> dex(v);
+            bp::extract<real_t> dex(v);
             if (dex.check())
             {
                 setValueOfDataPoint(id, dex());
@@ -1236,6 +1490,12 @@ Data::setValueOfDataPointToArray(int dataPointNo, const bp::object& obj)
     }
 
     WrappedArray w(obj);
+    if (w.isComplex() && (static_cast<unsigned int>(w.getRank())==0))
+    {
+	cplx_t v=w.getEltC();
+	setValueOfDataPointC(dataPointNo, v);
+	return;
+    }
     //
     // check rank
     if (static_cast<unsigned int>(w.getRank())<getDataPointRank())
@@ -1266,7 +1526,28 @@ Data::setValueOfDataPointToArray(int dataPointNo, const bp::object& obj)
 }
 
 void
-Data::setValueOfDataPoint(int dataPointNo, const double value)
+Data::setValueOfDataPoint(int dataPointNo, const real_t value)
+{
+    if (isProtected()) {
+        throw DataException("Error - attempt to update protected Data object.");
+    }
+    //
+    // make sure data is expanded:
+    exclusiveWrite();
+    if (!isExpanded()) {
+        expand();
+    }
+    if (getNumDataPointsPerSample()>0) {
+        int sampleNo = dataPointNo/getNumDataPointsPerSample();
+        int dataPointNoInSample = dataPointNo - sampleNo * getNumDataPointsPerSample();
+        m_data->copyToDataPoint(sampleNo, dataPointNoInSample,value);
+    } else {
+        m_data->copyToDataPoint(-1, 0,value);
+    }
+}
+
+void
+Data::setValueOfDataPointC(int dataPointNo, const cplx_t value)
 {
     if (isProtected()) {
         throw DataException("Error - attempt to update protected Data object.");
@@ -1286,10 +1567,12 @@ Data::setValueOfDataPoint(int dataPointNo, const double value)
     }
 }
 
+
 const
 bp::object
 Data::getValueOfGlobalDataPointAsTuple(int procNo, int dataPointNo)
 {
+    THROWONCOMPLEX
     // This could be lazier than it is now
     forceResolve();
 
@@ -1302,7 +1585,7 @@ Data::getValueOfGlobalDataPointAsTuple(int procNo, int dataPointNo)
     size_t length=DataTypes::noValues(dataPointShape);
 
     // added for the MPI communication
-    double *tmpData = new double[length];
+    real_t *tmpData = new real_t[length];
 
     // updated for the MPI case
     if( get_MPIRank()==procNo ){
@@ -1321,9 +1604,9 @@ Data::getValueOfGlobalDataPointAsTuple(int procNo, int dataPointNo)
                 throw DataException("Error - Data::getValueOfGlobalDataPointAsTuple: invalid dataPointNoInSample.");
             }
             // TODO: global error handling
-            DataTypes::ValueType::size_type offset=getDataOffset(sampleNo, dataPointNoInSample);
+            DataTypes::RealVectorType::size_type offset=getDataOffset(sampleNo, dataPointNoInSample);
 
-            memcpy(tmpData,&(getDataAtOffsetRO(offset)),length*sizeof(double));
+            memcpy(tmpData,&(getDataAtOffsetRO(offset)),length*sizeof(real_t));
         }
     }
 #ifdef ESYS_MPI
@@ -1342,6 +1625,7 @@ Data::getValueOfGlobalDataPointAsTuple(int procNo, int dataPointNo)
 bp::object
 Data::integrateToTuple_const() const
 {
+    THROWONCOMPLEX
     if (isLazy())
     {
         throw DataException("Error - cannot integrate for constant lazy data.");
@@ -1368,8 +1652,8 @@ Data::integrateWorker() const
 
     //
     // calculate the integral values
-    vector<double> integrals(dataPointSize);
-    vector<double> integrals_local(dataPointSize);
+    vector<real_t> integrals(dataPointSize);
+    vector<real_t> integrals_local(dataPointSize);
     const AbstractContinuousDomain* dom=dynamic_cast<const AbstractContinuousDomain*>(getDomain().get());
     if (dom==0)
     {                             
@@ -1378,8 +1662,8 @@ Data::integrateWorker() const
 #ifdef ESYS_MPI
     dom->setToIntegrals(integrals_local,*this);
     // Global sum: use an array instead of a vector because elements of array are guaranteed to be contiguous in memory
-    double *tmp = new double[dataPointSize];
-    double *tmp_local = new double[dataPointSize];
+    real_t *tmp = new real_t[dataPointSize];
+    real_t *tmp_local = new real_t[dataPointSize];
     for (int i=0; i<dataPointSize; i++) { tmp_local[i] = integrals_local[i]; }
     MPI_Allreduce( &tmp_local[0], &tmp[0], dataPointSize, MPI_DOUBLE, MPI_SUM, getDomain()->getMPIComm() );
     for (int i=0; i<dataPointSize; i++) { integrals[i] = tmp[i]; }
@@ -1388,10 +1672,7 @@ Data::integrateWorker() const
     delete[] tmp_local;
 #else
     dom->setToIntegrals(integrals,*this);
-/*  double *tmp = new double[dataPointSize];
-    for (int i=0; i<dataPointSize; i++) { tmp[i]=integrals[i]; }*/
     bp::tuple result=pointToTuple(shape,integrals);
-//   delete tmp;
 #endif
 
     return result;
@@ -1400,18 +1681,21 @@ Data::integrateWorker() const
 Data
 Data::besselFirstKind(int order)
 {
+    THROWONCOMPLEX
     return bessel(order,boost::math::cyl_bessel_j);
 }
 
 Data
 Data::besselSecondKind(int order)
 {
+    THROWONCOMPLEX
     return bessel(order,boost::math::cyl_neumann);
 }
 
 Data
-Data::bessel(int order, double (*besselfunc) (int,double) )
+Data::bessel(int order, real_t (*besselfunc) (int,real_t) )
 {
+    THROWONCOMPLEX
     if (isEmpty())  // do this before we attempt to interpolate
     {
      throw DataException("Error - Operations (bessel) not permitted on instances of DataEmpty.");
@@ -1431,9 +1715,9 @@ Data::bessel(int order, double (*besselfunc) (int,double) )
     Data res;
 
     if (arg_0_Z.isConstant()) {
-        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());      // DataConstant output
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-        double *ptr_2 = &(res.getDataAtOffsetRW(0));
+        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),false);      // DataConstant output
+        const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
+        real_t *ptr_2 = &(res.getDataAtOffsetRW(0));
         for (int i = 0; i < size0; ++i) {
             ptr_2[i] = besselfunc(order, ptr_0[i]);
         }
@@ -1444,13 +1728,13 @@ Data::bessel(int order, double (*besselfunc) (int,double) )
         DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
 
         // Prepare a DataTagged output 2
-        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());   // DataTagged output
+        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(), false);   // DataTagged output
         res.tag();
         DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
 
         // Get the pointers to the actual data
-        const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-        double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
+        const real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0));
+        real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0));
         // Compute a result for the default
         for (int i = 0; i < size0; ++i) {
             ptr_2[i] = besselfunc(order, ptr_0[i]);
@@ -1460,8 +1744,8 @@ Data::bessel(int order, double (*besselfunc) (int,double) )
         DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
         for (i=lookup_0.begin();i!=lookup_0.end();i++) {
             tmp_2->addTag(i->first);
-            const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-            double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+            const real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
+            real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
             for (int i = 0; i < size0; ++i) {
                 ptr_2[i] = besselfunc(order, ptr_0[i]);
             }
@@ -1484,8 +1768,8 @@ Data::bessel(int order, double (*besselfunc) (int,double) )
         //      for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
             int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
             int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-            const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-            double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+            const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+            real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
             for (int i = 0; i < size0*numDataPointsPerSample_0; ++i) {
                 ptr_2[i] = besselfunc(order, ptr_0[i]);
             }
@@ -1500,38 +1784,99 @@ Data::bessel(int order, double (*besselfunc) (int,double) )
 }
 
 Data
-Data::sin() const
+Data::conjugate() const
 {
-    MAKELAZYOP(SIN);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::sin);
+    if (isLazy())
+    {
+        Data temp(*this);
+        temp.resolve();
+        return temp.conjugate();
+    }
+    if (isComplex())
+    {
+        return C_TensorUnaryOperation(*this, escript::ES_optype::CONJ);      
+    }
+    else
+    {
+        return copySelf();
+    }
 }
 
+
 Data
-Data::cos() const
+Data::real() const
 {
-    MAKELAZYOP(COS);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::cos);
+    if (isLazy())
+    {
+        Data temp(*this);
+        temp.resolve();
+        return temp.real();
+    }
+    if (isComplex())
+    {
+        return C_TensorUnaryOperation(*this, escript::ES_optype::REAL);      
+    }
+    else
+    {
+        return copySelf();
+    }
 }
 
 Data
-Data::tan() const
+Data::imag() const
 {
-    MAKELAZYOP(TAN);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::tan);
+    if (isLazy())
+    {
+        Data temp(*this);
+        temp.resolve();
+        return temp.imag();
+    }
+    if (isComplex())
+    {
+        return C_TensorUnaryOperation(*this, escript::ES_optype::IMAG);      
+    }
+    else
+    {
+        return copySelf()*Data(0, m_data->getShape(), getFunctionSpace(),false);      // return an object with same tags etc but all values 0
+                                // This is not efficient, but why are you taking imag of R anyway?
+    }
 }
 
+
+
 Data
-Data::asin() const
+Data::sin() const
 {
-    MAKELAZYOP(ASIN);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::asin);
+    MAKELAZYOP(SIN);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::SIN);
+}
+
+Data
+Data::cos() const
+{
+    MAKELAZYOP(COS);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::COS);
+}
+
+Data
+Data::tan() const
+{
+    MAKELAZYOP(TAN);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::TAN);
+}
+
+Data
+Data::asin() const
+{
+    MAKELAZYOP(ASIN);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ASIN);
 }
 
 Data
 Data::acos() const
 {
     MAKELAZYOP(ACOS);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::acos);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ACOS);
 }
 
 
@@ -1539,28 +1884,28 @@ Data
 Data::atan() const
 {
     MAKELAZYOP(ATAN);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::atan);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ATAN);
 }
 
 Data
 Data::sinh() const
 {
     MAKELAZYOP(SINH);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::sinh);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::SINH);
 }
 
 Data
 Data::cosh() const
 {
     MAKELAZYOP(COSH);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::cosh);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::COSH);
 }
 
 Data
 Data::tanh() const
 {
     MAKELAZYOP(TANH);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::tanh);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::TANH);
 }
 
 
@@ -1571,7 +1916,7 @@ Data::erf() const
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
 #else
     MAKELAZYOP(ERF);
-    return C_TensorUnaryOperation(*this, ::erf);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ERF);
 #endif
 }
 
@@ -1579,73 +1924,62 @@ Data
 Data::asinh() const
 {
     MAKELAZYOP(ASINH);
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-    return C_TensorUnaryOperation(*this, escript::asinh_substitute);
-#else
-    return C_TensorUnaryOperation(*this, ::asinh);
-#endif
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ASINH);
 }
 
 Data
 Data::acosh() const
 {
     MAKELAZYOP(ACOSH);
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-    return C_TensorUnaryOperation(*this, escript::acosh_substitute);
-#else
-    return C_TensorUnaryOperation(*this, ::acosh);
-#endif
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ACOSH);    
 }
 
 Data
 Data::atanh() const
 {
     MAKELAZYOP(ATANH);
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-    return C_TensorUnaryOperation(*this, escript::atanh_substitute);
-#else
-    return C_TensorUnaryOperation(*this, ::atanh);
-#endif
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ATANH); 
 }
 
 Data
 Data::log10() const
 {
     MAKELAZYOP(LOG10);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::log10);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::LOG10);
 }
 
 Data
 Data::log() const
 {
     MAKELAZYOP(LOG);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::log);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::LOG);
 }
 
 Data
 Data::sign() const
 {
     MAKELAZYOP(SIGN);
-    return C_TensorUnaryOperation(*this, escript::fsign);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::SIGN);
 }
 
 Data
 Data::abs() const
 {
     MAKELAZYOP(ABS);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::fabs);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::ABS);
 }
 
 Data
 Data::neg() const
 {
     MAKELAZYOP(NEG);
-    return C_TensorUnaryOperation(*this, negate<double>());
+    return C_TensorUnaryOperation(*this, escript::ES_optype::NEG);
 }
 
 Data
 Data::pos() const
 {
+    THROWONCOMPLEX
     // not doing lazy check here is deliberate.
     // since a deep copy of lazy data should be cheap, I'll just let it happen now
     Data result;
@@ -1658,17 +1992,17 @@ Data
 Data::exp() const
 {
     MAKELAZYOP(EXP);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::exp);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::EXP);
 }
 
 Data
 Data::sqrt() const
 {
     MAKELAZYOP(SQRT);
-    return C_TensorUnaryOperation<double (*)(double)>(*this, ::sqrt);
+    return C_TensorUnaryOperation(*this, escript::ES_optype::SQRT);
 }
 
-double
+real_t
 Data::Lsup_const() const
 {
     if (isLazy())
@@ -1678,7 +2012,7 @@ Data::Lsup_const() const
     return LsupWorker();
 }
 
-double
+real_t
 Data::Lsup() 
 {
     if (isLazy())
@@ -1690,18 +2024,36 @@ Data::Lsup()
         else
         {
 #ifdef ESYS_MPI
-            return lazyAlgWorker<AbsMax>(0,MPI_MAX);
+            if (isComplex())
+            {
+                return lazyAlgWorker<AbsMax<cplx_t> >(0,MPI_MAX);
+            }
+            else
+            {
+                return lazyAlgWorker<AbsMax<cplx_t> >(0,MPI_MAX);             
+            }
 #else
-            return lazyAlgWorker<AbsMax>(0);
+            if (isComplex())
+            {
+                return lazyAlgWorker<AbsMax<real_t> >(0);
+            }
+            else
+            {
+                return lazyAlgWorker<AbsMax<real_t> >(0);
+            }
 #endif
         }
     }
     return LsupWorker();
 }
 
-double
+real_t
 Data::sup_const() const
 {
+    if (isComplex())
+    {
+        throw DataException("Error Cannot compute sup() for complex data.");
+    }  
     if (isLazy())
     {
         throw DataException("Error - cannot compute sup for constant lazy data.");
@@ -1709,9 +2061,13 @@ Data::sup_const() const
     return supWorker();
 }
 
-double
+real_t
 Data::sup() 
 {
+    if (isComplex())
+    {
+        throw DataException("Error Cannot compute sup() for complex data.");
+    }
     if (isLazy())
     {
         if (!actsExpanded() || CHECK_DO_CRES)
@@ -1721,18 +2077,22 @@ Data::sup()
         else
         {
 #ifdef ESYS_MPI
-            return lazyAlgWorker<FMax>(numeric_limits<double>::max()*-1, MPI_MAX);
+            return lazyAlgWorker<FMax>(numeric_limits<real_t>::max()*-1, MPI_MAX);
 #else
-            return lazyAlgWorker<FMax>(numeric_limits<double>::max()*-1);
+            return lazyAlgWorker<FMax>(numeric_limits<real_t>::max()*-1);
 #endif
         }
     }
     return supWorker();
 }
 
-double
+real_t
 Data::inf_const() const
 {
+    if (isComplex())
+    {
+        throw DataException("Error Cannot compute inf() for complex data.");
+    }  
     if (isLazy())
     {
         throw DataException("Error - cannot compute inf for constant lazy data.");
@@ -1740,9 +2100,13 @@ Data::inf_const() const
     return infWorker();
 }
 
-double
+real_t
 Data::inf() 
 {
+    if (isComplex())
+    {
+        throw DataException("Error Cannot compute inf() for complex data.");
+    }  
     if (isLazy())
     {
         if (!actsExpanded() || CHECK_DO_CRES)
@@ -1752,9 +2116,9 @@ Data::inf()
         else
         {
 #ifdef ESYS_MPI
-            return lazyAlgWorker<FMin>(numeric_limits<double>::max(), MPI_MIN);
+            return lazyAlgWorker<FMin>(numeric_limits<real_t>::max(), MPI_MIN);
 #else
-            return lazyAlgWorker<FMin>(numeric_limits<double>::max());
+            return lazyAlgWorker<FMin>(numeric_limits<real_t>::max());
 #endif
         }
     }
@@ -1762,11 +2126,11 @@ Data::inf()
 }
 
 template <class BinaryOp>
-double
+real_t
 #ifdef ESYS_MPI
-Data::lazyAlgWorker(double init, MPI_Op mpiop_type)
+Data::lazyAlgWorker(real_t init, MPI_Op mpiop_type)
 #else
-Data::lazyAlgWorker(double init)
+Data::lazyAlgWorker(real_t init)
 #endif
 {
     if (!isLazy() || !m_data->actsExpanded())
@@ -1774,27 +2138,27 @@ Data::lazyAlgWorker(double init)
         throw DataException("Error - lazyAlgWorker can only be called on lazy(expanded) data.");
     }
     DataLazy* dl=dynamic_cast<DataLazy*>(m_data.get());
-    EsysAssert((dl!=0), "Programming error - casting to DataLazy.");
-    double val=init;
+    ESYS_ASSERT(dl!=NULL, "Programming error - casting to DataLazy.");
+    real_t val=init;
     int i=0;
     const size_t numsamples=getNumSamples();
     const size_t samplesize=getNoValues()*getNumDataPointsPerSample();
     BinaryOp operation;
-    double localValue=0, globalValue;
+    real_t localValue=0, globalValue;
    #pragma omp parallel private(i)
     {
-        double localtot=init;
+        real_t localtot=init;
         #pragma omp for schedule(static) private(i)
         for (i=0;i<numsamples;++i)
         {
             size_t roffset=0;
-            const DataTypes::ValueType* v=dl->resolveSample(i, roffset);
+            const DataTypes::RealVectorType* v=dl->resolveSample(i, roffset);
             // Now we have the sample, run operation on all points
             for (size_t j=0;j<samplesize;++j)
             {
                 localtot=operation(localtot,(*v)[j+roffset]);
             }
-            if (DataMaths::vectorHasNaN(*v,roffset, samplesize))
+            if (escript::vectorHasNaN(*v,roffset, samplesize))
             {
                 #pragma omp critical
                 {
@@ -1834,7 +2198,7 @@ Data::hasNaN()
 }
 
 void
-Data::replaceNaN(double value)
+Data::replaceNaN(real_t value)
 {
     if (isLazy())
     {
@@ -1843,24 +2207,46 @@ Data::replaceNaN(double value)
     getReady()->replaceNaN(value); 
 }
 
+void
+Data::replaceNaN(cplx_t value)
+{
+    if (isLazy())
+    {
+        resolve();
+    }
+    getReady()->replaceNaN(value); 
+}
 
-
+void
+Data::replaceNaNPython(boost::python::object obj)
+{
+    boost::python::extract<DataTypes::real_t> exr(obj);
+    if (exr.check())
+    {
+	replaceNaN(exr());
+    }
+    else
+    {
+	replaceNaN(boost::python::extract<DataTypes::cplx_t>(obj)());
+    }
+}
 
 // Do not call this on Lazy Data use the proper entry point
-double
+real_t
 Data::LsupWorker() const
 {
     bool haveNaN=getReady()->hasNaN();
-    double localValue=0;
+
   
 #ifdef ESYS_MPI
+    int nanchecker=0;    
     if (haveNaN)
     {
-        localValue=1.0;
+        nanchecker=1.0;
     }
-    double globalValue;
-    MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
-    if (globalValue!=0)
+    int globalnan;
+    MPI_Allreduce( &nanchecker, &globalnan, 1, MPI_INT, MPI_MAX, getDomain()->getMPIComm() );
+    if (globalnan!=0)
     {
         return makeNaN();
     }
@@ -1873,30 +2259,48 @@ Data::LsupWorker() const
 
     //
     // set the initial absolute maximum value to zero
+    if (isComplex())
+    {
+        AbsMax<cplx_t> abs_max_func;
+        real_t localValue=0;
+        localValue = reduction(abs_max_func,0);
 
-    AbsMax abs_max_func;
-    localValue = algorithm(abs_max_func,0);
+    #ifdef ESYS_MPI
+        real_t globalValue=0;
+        MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
+        return globalValue;
+    #else
+        return localValue;
+    #endif
+    }
+    else
+    {  
+        AbsMax<real_t> abs_max_func;
+        real_t localValue=0;
+        localValue = reduction(abs_max_func,0);
 
-#ifdef ESYS_MPI
-    MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
-    return globalValue;
-#else
-    return localValue;
-#endif
+    #ifdef ESYS_MPI
+        real_t globalValue=0;   
+        MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
+        return globalValue;
+    #else
+        return localValue;
+    #endif
+    }
 }
 
-double
+real_t
 Data::supWorker() const
 {
     bool haveNaN=getReady()->hasNaN();
-    double localValue=0;
+    real_t localValue=0;
 
 #ifdef ESYS_MPI
     if (haveNaN)
     {
         localValue=1.0;
     }
-    double globalValue;
+    real_t globalValue;
     MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
     if (globalValue!=0)
     {
@@ -1910,9 +2314,16 @@ Data::supWorker() const
 #endif
 
     //
-    // set the initial maximum value to min possible double
+    // set the initial maximum value to min possible real_t
     FMax fmax_func;
-    localValue = algorithm(fmax_func,numeric_limits<double>::infinity()*-1);
+    if (hasNoSamples())
+    {
+	localValue = numeric_limits<real_t>::infinity()*-1;
+    }
+    else
+    {
+	localValue = reduction(fmax_func,numeric_limits<real_t>::infinity()*-1);      
+    }
     #ifdef ESYS_MPI
     MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
     return globalValue;
@@ -1921,18 +2332,18 @@ Data::supWorker() const
 #endif
 }
 
-double
+real_t
 Data::infWorker() const
 {
     bool haveNaN=getReady()->hasNaN();
-    double localValue=0;
+    real_t localValue=0;
 
 #ifdef ESYS_MPI
     if (haveNaN)
     {
         localValue=1.0;
     }
-    double globalValue;
+    real_t globalValue;
     MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MAX, getDomain()->getMPIComm() );
     if (globalValue!=0)
     {
@@ -1945,9 +2356,16 @@ Data::infWorker() const
     }
 #endif
     //
-    // set the initial minimum value to max possible double
+    // set the initial minimum value to max possible real_t
     FMin fmin_func;
-    localValue = algorithm(fmin_func,numeric_limits<double>::infinity());
+    if (hasNoSamples())
+    {
+	localValue = numeric_limits<real_t>::infinity();
+    }
+    else
+    {
+	localValue = reduction(fmin_func,numeric_limits<real_t>::infinity());
+    }
 #ifdef ESYS_MPI
     MPI_Allreduce( &localValue, &globalValue, 1, MPI_DOUBLE, MPI_MIN, getDomain()->getMPIComm() );
     return globalValue;
@@ -1962,26 +2380,29 @@ Data::infWorker() const
 inline Data
 Data::minval_nonlazy() const
 {
+    THROWONCOMPLEX
     //
-    // set the initial minimum value to max possible double
+    // set the initial minimum value to max possible real_t
     FMin fmin_func;
-    return dp_algorithm(fmin_func,numeric_limits<double>::max());
+    return dp_algorithm(fmin_func,numeric_limits<real_t>::max());
 }
 
 
 inline Data
 Data::maxval_nonlazy() const
 {
+    THROWONCOMPLEX
     //
-    // set the initial maximum value to min possible double
+    // set the initial maximum value to min possible real_t
     FMax fmax_func;
-    return dp_algorithm(fmax_func,numeric_limits<double>::max()*-1);
+    return dp_algorithm(fmax_func,numeric_limits<real_t>::max()*-1);
 }
 
 
 Data
 Data::maxval() const
 {
+    THROWONCOMPLEX
     MAKELAZYOP(MAXVAL);
     return maxval_nonlazy();
 }
@@ -1990,6 +2411,7 @@ Data::maxval() const
 Data
 Data::minval() const
 {
+    THROWONCOMPLEX
     MAKELAZYOP(MINVAL);
     return minval_nonlazy();
 }
@@ -2046,7 +2468,7 @@ Data::swapaxes(const int axis0, const int axis1) const
             ev_shape.push_back(s[i]);
         }
     }
-    Data ev(0.,ev_shape,getFunctionSpace());
+    Data ev(0.,ev_shape,getFunctionSpace(), false);
     ev.typeMatchRight(*this);
     m_data->swapaxes(ev.m_data.get(), axis0_tmp, axis1_tmp);
     return ev;
@@ -2069,50 +2491,118 @@ Data::symmetric() const
         throw DataException("Error - Data::symmetric can only be calculated for rank 2 or 4 object.");
     }
     MAKELAZYOP(SYM);
-    Data ev(0.,getDataPointShape(),getFunctionSpace());
+    Data ev(0.,getDataPointShape(),getFunctionSpace(), false);
     ev.typeMatchRight(*this);
     m_data->symmetric(ev.m_data.get());
     return ev;
 }
 
 Data
-Data::nonsymmetric() const
+Data::antisymmetric() const
+{
+    // check input
+    DataTypes::ShapeType s=getDataPointShape();
+    if (getDataPointRank()==2) {
+        if(s[0] != s[1])
+            throw DataException("Error - Data::antisymmetric can only be calculated for rank 2 object with equal first and second dimension.");
+	MAKELAZYOP(NSYM);
+        DataTypes::ShapeType ev_shape;
+        ev_shape.push_back(s[0]);
+        ev_shape.push_back(s[1]);
+        Data ev(0.,ev_shape,getFunctionSpace(),false);
+        ev.typeMatchRight(*this);
+        m_data->antisymmetric(ev.m_data.get());
+        return ev;
+    }
+    else if (getDataPointRank()==4) {
+        if(!(s[0] == s[2] && s[1] == s[3]))
+            throw DataException("Error - Data::antisymmetric can only be calculated for rank 4 object with dim0==dim2 and dim1==dim3.");
+	MAKELAZYOP(NSYM);
+        DataTypes::ShapeType ev_shape;
+        ev_shape.push_back(s[0]);
+        ev_shape.push_back(s[1]);
+        ev_shape.push_back(s[2]);
+        ev_shape.push_back(s[3]);
+        Data ev(0.,ev_shape,getFunctionSpace(),false);
+        ev.typeMatchRight(*this);
+        m_data->antisymmetric(ev.m_data.get());
+        return ev;
+    }
+    else {
+        throw DataException("Error - Data::antisymmetric can only be calculated for rank 2 or 4 object.");
+    }
+}
+
+Data
+Data::hermitian() const
+{
+    if (!isComplex())
+    {
+        return symmetric();
+    }
+    // check input
+    DataTypes::ShapeType s=getDataPointShape();
+    if (getDataPointRank()==2) {
+        if(s[0] != s[1])
+            throw DataException("Error - Data::hermitian can only be calculated for rank 2 object with equal first and second dimension.");
+    }
+    else if (getDataPointRank()==4) {
+        if(!(s[0] == s[2] && s[1] == s[3]))
+            throw DataException("Error - Data::hermitian can only be calculated for rank 4 object with dim0==dim2 and dim1==dim3.");
+    }
+    else {
+        throw DataException("Error - Data::hermitian can only be calculated for rank 2 or 4 object.");
+    }
+    MAKELAZYOP(HER);
+    Data ev(0.,getDataPointShape(),getFunctionSpace(), false);
+    ev.typeMatchRight(*this);
+    m_data->hermitian(ev.m_data.get());
+    return ev;
+}
+
+Data
+Data::antihermitian() const
 {
-    MAKELAZYOP(NSYM);
+    if (!isComplex())
+    {
+        return antisymmetric();
+    }  
     // check input
     DataTypes::ShapeType s=getDataPointShape();
     if (getDataPointRank()==2) {
         if(s[0] != s[1])
-            throw DataException("Error - Data::nonsymmetric can only be calculated for rank 2 object with equal first and second dimension.");
+            throw DataException("Error - Data::antisymmetric can only be calculated for rank 2 object with equal first and second dimension.");
+	MAKELAZYOP(NHER);
         DataTypes::ShapeType ev_shape;
         ev_shape.push_back(s[0]);
         ev_shape.push_back(s[1]);
-        Data ev(0.,ev_shape,getFunctionSpace());
+        Data ev(0.,ev_shape,getFunctionSpace(), false);
         ev.typeMatchRight(*this);
-        m_data->nonsymmetric(ev.m_data.get());
+        m_data->antihermitian(ev.m_data.get());
         return ev;
     }
     else if (getDataPointRank()==4) {
         if(!(s[0] == s[2] && s[1] == s[3]))
-            throw DataException("Error - Data::nonsymmetric can only be calculated for rank 4 object with dim0==dim2 and dim1==dim3.");
+            throw DataException("Error - Data::antisymmetric can only be calculated for rank 4 object with dim0==dim2 and dim1==dim3.");
+	MAKELAZYOP(NHER);
         DataTypes::ShapeType ev_shape;
         ev_shape.push_back(s[0]);
         ev_shape.push_back(s[1]);
         ev_shape.push_back(s[2]);
         ev_shape.push_back(s[3]);
-        Data ev(0.,ev_shape,getFunctionSpace());
+        Data ev(0.,ev_shape,getFunctionSpace(), false);
         ev.typeMatchRight(*this);
-        m_data->nonsymmetric(ev.m_data.get());
+        m_data->antihermitian(ev.m_data.get());
         return ev;
     }
     else {
-        throw DataException("Error - Data::nonsymmetric can only be calculated for rank 2 or 4 object.");
+        throw DataException("Error - Data::antisymmetric can only be calculated for rank 2 or 4 object.");
     }
 }
 
 Data
 Data::trace(int axis_offset) const
-{     
+{
     MAKELAZYOPOFF(TRACE,axis_offset);
     if ((axis_offset<0) || (axis_offset>getDataPointRank()))
     {
@@ -2121,7 +2611,7 @@ Data::trace(int axis_offset) const
     DataTypes::ShapeType s=getDataPointShape();
     if (getDataPointRank()==2) {
         DataTypes::ShapeType ev_shape;
-        Data ev(0.,ev_shape,getFunctionSpace());
+        Data ev(0.,ev_shape,getFunctionSpace(),false);
         ev.typeMatchRight(*this);
         m_data->trace(ev.m_data.get(), axis_offset);
         return ev;
@@ -2136,7 +2626,7 @@ Data::trace(int axis_offset) const
             int s0=s[0];
             ev_shape.push_back(s0);
         }
-        Data ev(0.,ev_shape,getFunctionSpace());
+        Data ev(0.,ev_shape,getFunctionSpace(),false);
         ev.typeMatchRight(*this);
         m_data->trace(ev.m_data.get(), axis_offset);
         return ev;
@@ -2155,7 +2645,7 @@ Data::trace(int axis_offset) const
             ev_shape.push_back(s[0]);
             ev_shape.push_back(s[1]);
         }
-        Data ev(0.,ev_shape,getFunctionSpace());
+        Data ev(0.,ev_shape,getFunctionSpace(),false);
         ev.typeMatchRight(*this);
         m_data->trace(ev.m_data.get(), axis_offset);
         return ev;
@@ -2167,7 +2657,7 @@ Data::trace(int axis_offset) const
 
 Data
 Data::transpose(int axis_offset) const
-{     
+{   
     MAKELAZYOPOFF(TRANS,axis_offset);
     DataTypes::ShapeType s=getDataPointShape();
     DataTypes::ShapeType ev_shape;
@@ -2183,7 +2673,7 @@ Data::transpose(int axis_offset) const
         int index = (axis_offset+i)%rank;
         ev_shape.push_back(s[index]); // Append to new shape
     }
-    Data ev(0.,ev_shape,getFunctionSpace());
+    Data ev(0.,ev_shape,getFunctionSpace(), false);
     ev.typeMatchRight(*this);
     m_data->transpose(ev.m_data.get(), axis_offset);
     return ev;
@@ -2204,17 +2694,22 @@ Data::eigenvalues() const
         throw DataException("Error - Data::eigenvalues can only be calculated for rank 2 object.");
     if(s[0] != s[1])
         throw DataException("Error - Data::eigenvalues can only be calculated for object with equal first and second dimension.");
+    if (isComplex() && (s[0]>2))
+    {
+        throw DataException("Error - Data::eigenvalues not supported for complex 3x3.");
+    }
     // create return
     DataTypes::ShapeType ev_shape(1,s[0]);
-    Data ev(0.,ev_shape,getFunctionSpace());
+    Data ev(0.,ev_shape,getFunctionSpace(),false);
     ev.typeMatchRight(*this);
     m_data->eigenvalues(ev.m_data.get());
     return ev;
 }
 
 const bp::tuple
-Data::eigenvalues_and_eigenvectors(const double tol) const
+Data::eigenvalues_and_eigenvectors(const real_t tol) const
 {
+    THROWONCOMPLEX
     if (isLazy())
     {
         Data temp(*this);       // to get around the fact that you can't resolve a const Data
@@ -2228,10 +2723,10 @@ Data::eigenvalues_and_eigenvectors(const double tol) const
         throw DataException("Error - Data::eigenvalues and eigenvectors can only be calculated for object with equal first and second dimension.");
     // create return
     DataTypes::ShapeType ev_shape(1,s[0]);
-    Data ev(0.,ev_shape,getFunctionSpace());
+    Data ev(0.,ev_shape,getFunctionSpace(), false);
     ev.typeMatchRight(*this);
     DataTypes::ShapeType V_shape(2,s[0]);
-    Data V(0.,V_shape,getFunctionSpace());
+    Data V(0.,V_shape,getFunctionSpace(), false);
     V.typeMatchRight(*this);
     m_data->eigenvalues_and_eigenvectors(ev.m_data.get(),V.m_data.get(),tol);
     return bp::make_tuple(bp::object(ev),bp::object(V));
@@ -2244,6 +2739,7 @@ Data::minGlobalDataPoint() const
     // abort (for unknown reasons) if there are openmp directives with it in the
     // surrounding function
 
+    THROWONCOMPLEX
     int DataPointNo;
     int ProcNo;
     calc_minGlobalDataPoint(ProcNo,DataPointNo);
@@ -2254,6 +2750,7 @@ void
 Data::calc_minGlobalDataPoint(int& ProcNo,
                         int& DataPointNo) const
 {
+    THROWONCOMPLEX
     if (isLazy())
     {
         Data temp(*this);   // to get around the fact that you can't resolve a const Data
@@ -2262,16 +2759,16 @@ Data::calc_minGlobalDataPoint(int& ProcNo,
     }
     int i,j;
     int lowi=0,lowj=0;
-    double min=numeric_limits<double>::max();
+    real_t min=numeric_limits<real_t>::max();
 
     Data temp=minval_nonlazy();   // need to do this to prevent autolazy from reintroducing laziness
 
     int numSamples=temp.getNumSamples();
     int numDPPSample=temp.getNumDataPointsPerSample();
 
-    double local_val, local_min;
+    real_t local_val, local_min;
 #ifdef ESYS_MPI
-    double next[2];
+    real_t next[2];
 #endif
     int local_lowi=0,local_lowj=0;        
 
@@ -2302,7 +2799,7 @@ Data::calc_minGlobalDataPoint(int& ProcNo,
     next[0] = min;
     next[1] = numSamples;
     int lowProc = 0;
-    double *globalMins = new double[get_MPISize()*2+1];
+    real_t *globalMins = new real_t[get_MPISize()*2+1];
     /*int error =*/ MPI_Gather (next, 2, MPI_DOUBLE, globalMins, 2, MPI_DOUBLE, 0, get_MPIComm() );
 
     if ( get_MPIRank()==0 ) {
@@ -2330,6 +2827,7 @@ Data::calc_minGlobalDataPoint(int& ProcNo,
 const bp::tuple
 Data::maxGlobalDataPoint() const
 {
+    THROWONCOMPLEX
     int DataPointNo;
     int ProcNo;
     calc_maxGlobalDataPoint(ProcNo,DataPointNo);
@@ -2346,19 +2844,20 @@ Data::calc_maxGlobalDataPoint(int& ProcNo,
         temp.resolve();
         return temp.calc_maxGlobalDataPoint(ProcNo,DataPointNo);
     }
+    THROWONCOMPLEX
     int i,j;
     int highi=0,highj=0;
     //-------------
-    double max= -numeric_limits<double>::max();
+    real_t max= -numeric_limits<real_t>::max();
 
     Data temp=maxval_nonlazy();   // need to do this to prevent autolazy from reintroducing laziness
 
     int numSamples=temp.getNumSamples();
     int numDPPSample=temp.getNumDataPointsPerSample();
 
-    double local_val, local_max;
+    real_t local_val, local_max;
 #ifdef ESYS_MPI
-    double next[2];
+    real_t next[2];
 #endif
     int local_highi=0,local_highj=0;      
 
@@ -2388,7 +2887,7 @@ Data::calc_maxGlobalDataPoint(int& ProcNo,
     next[0] = max;
     next[1] = numSamples;
     int highProc = 0;
-    double *globalMaxs = new double[get_MPISize()*2+1];
+    real_t *globalMaxs = new real_t[get_MPISize()*2+1];
     /*int error =*/ MPI_Gather ( next, 2, MPI_DOUBLE, globalMaxs, 2, MPI_DOUBLE, 0, get_MPIComm() );
     if( get_MPIRank()==0 ){
         for (highProc=0; highProc<get_MPISize(); highProc++)
@@ -2423,7 +2922,11 @@ Data::operator+=(const Data& right)
     }
     MAKELAZYBINSELF(right,ADD);    // for lazy + is equivalent to +=
     exclusiveWrite();                     // Since Lazy data does not modify its leaves we only need to worry here
-    binaryOp(right,plus<double>());
+    if (!isComplex() && right.isComplex())
+    {
+        complicate();
+    }
+    TensorSelfUpdateBinaryOperation(right, escript::ES_optype::ADD);  
     return (*this);
 }
 
@@ -2455,7 +2958,11 @@ Data::operator-=(const Data& right)
     }
     MAKELAZYBINSELF(right,SUB);
     exclusiveWrite();
-    binaryOp(right,minus<double>());
+    if (!isComplex() && right.isComplex())
+    {
+        complicate();
+    }
+    TensorSelfUpdateBinaryOperation(right, escript::ES_optype::SUB);
     return (*this);
 }
 
@@ -2478,7 +2985,11 @@ Data::operator*=(const Data& right)
     }
     MAKELAZYBINSELF(right,MUL);
     exclusiveWrite();
-    binaryOp(right,multiplies<double>());
+    if (!isComplex() && right.isComplex())
+    {
+        complicate();
+    }
+    TensorSelfUpdateBinaryOperation(right, escript::ES_optype::MUL);
     return (*this);
 }
 
@@ -2501,7 +3012,11 @@ Data::operator/=(const Data& right)
     }
     MAKELAZYBINSELF(right,DIV);
     exclusiveWrite();
-    binaryOp(right,divides<double>());
+    if (!isComplex() && right.isComplex())
+    {
+        complicate();
+    }
+    TensorSelfUpdateBinaryOperation(right, escript::ES_optype::DIV);
     return (*this);
 }
 
@@ -2523,16 +3038,18 @@ Changing this would mean that any resolve call would need to use MPI (to check f
 Data
 Data::matrixInverse() const
 {
-    if (isLazy())	// Cannot use lazy for this because individual inversions could throw.
+    if (isLazy())       // Cannot use lazy for this because individual inversions could throw.
     {
         Data d(*this);
         d.resolve();
         return d.matrixInverse();
     }
-
-    Data out(0.,getDataPointShape(),getFunctionSpace());
+    THROWONCOMPLEX
+    Data out(0.,getDataPointShape(),getFunctionSpace(),false);
     out.typeMatchRight(*this);
-    int errcode=m_data->matrixInverse(out.getReadyPtr().get());
+
+    DataReady* drp=out.getReadyPtr().get();
+    int errcode=m_data->matrixInverse(drp);
 #ifdef ESYS_MPI
     int globalval=0;
     MPI_Allreduce( &errcode, &globalval, 1, MPI_INT, MPI_MAX, get_MPIComm() );
@@ -2540,7 +3057,7 @@ Data::matrixInverse() const
 #endif
     if (errcode)
     {
-        DataMaths::matrixInverseError(errcode); // throws exceptions
+        escript::matrixInverseError(errcode); // throws exceptions
     }
     return out;
 }
@@ -2564,7 +3081,9 @@ Data
 Data::powD(const Data& right) const
 {
     MAKELAZYBIN(right,POW);
-    return C_TensorBinaryOperation<double (*)(double, double)>(*this, right, ::pow);
+    
+    return C_TensorBinaryOperation(*this, right, ES_optype::POW);    
+    
 }
 
 
@@ -2574,7 +3093,8 @@ Data
 escript::operator+(const Data& left, const Data& right)
 {
     MAKELAZYBIN2(left,right,ADD);
-    return C_TensorBinaryOperation(left, right, plus<double>());
+    
+    return C_TensorBinaryOperation(left, right, ES_optype::ADD);
 }
 
 //
@@ -2583,7 +3103,7 @@ Data
 escript::operator-(const Data& left, const Data& right)
 {
     MAKELAZYBIN2(left,right,SUB);
-    return C_TensorBinaryOperation(left, right, minus<double>());
+    return C_TensorBinaryOperation(left, right, ES_optype::SUB);    
 }
 
 //
@@ -2591,8 +3111,9 @@ escript::operator-(const Data& left, const Data& right)
 Data
 escript::operator*(const Data& left, const Data& right)
 {
-    MAKELAZYBIN2(left,right,MUL);
-    return C_TensorBinaryOperation(left, right, multiplies<double>());
+    MAKELAZYBIN2(left,right,MUL);    
+    
+    return C_TensorBinaryOperation(left, right, ES_optype::MUL);        
 }
 
 //
@@ -2601,7 +3122,7 @@ Data
 escript::operator/(const Data& left, const Data& right)
 {
     MAKELAZYBIN2(left,right,DIV);
-    return C_TensorBinaryOperation(left, right, divides<double>());
+    return C_TensorBinaryOperation(left, right, ES_optype::DIV);        
 }
 
 //
@@ -2713,7 +3234,7 @@ void
 Data::setItemO(const bp::object& key,
                const bp::object& value)
 {
-    Data tempData(value,getFunctionSpace());
+    Data tempData(value,getFunctionSpace(),false);
     setItemD(key,tempData);
 }
 
@@ -2755,6 +3276,10 @@ Data::typeMatchLeft(Data& right) const
     {
         right.resolve();
     }
+    if (isComplex())
+    {
+        right.complicate();
+    }
     if (isExpanded()) {
         right.expand();
     } else if (isTagged()) {
@@ -2771,6 +3296,10 @@ Data::typeMatchRight(const Data& right)
     {
         resolve();
     }
+    if (right.isComplex())
+    {
+	complicate();
+    }    
     if (isTagged()) {
         if (right.isExpanded()) {
             expand();
@@ -2819,17 +3348,55 @@ Data::setTaggedValue(int tagKey,
     if (isConstant()) tag();
     WrappedArray w(value);
 
-    DataVector temp_data2;
-    temp_data2.copyFromArray(w,1);
+    if (w.isComplex())
+    {
+        CplxVectorType temp_data2;
+        temp_data2.copyFromArray(w,1);
+
+        m_data->setTaggedValue(tagKey,w.getShape(), temp_data2);
 
-    m_data->setTaggedValue(tagKey,w.getShape(), temp_data2);
+    }
+    else
+    {
+        RealVectorType temp_data2;
+        temp_data2.copyFromArray(w,1);
+        if (isComplex())	// set real value in complex
+        {
+	    CplxVectorType temp_data3;
+            fillComplexFromReal(temp_data2,temp_data3);
+            m_data->setTaggedValue(tagKey,w.getShape(), temp_data3);
+        }
+        else
+        {
+            m_data->setTaggedValue(tagKey,w.getShape(), temp_data2);
+        }
+    }
 }
 
 
 void
 Data::setTaggedValueFromCPP(int tagKey,
                             const DataTypes::ShapeType& pointshape,
-                            const DataTypes::ValueType& value,
+                            const DataTypes::RealVectorType& value,
+                            int dataOffset)
+{
+    if (isProtected()) {
+        throw DataException("Error - attempt to update protected Data object.");
+    }
+    //
+    // Ensure underlying data object is of type DataTagged
+    forceResolve();
+    if (isConstant()) tag();
+    exclusiveWrite();
+    //
+    // Call DataAbstract::setTaggedValue
+    m_data->setTaggedValue(tagKey,pointshape, value, dataOffset);
+}
+
+void
+Data::setTaggedValueFromCPP(int tagKey,
+                            const DataTypes::ShapeType& pointshape,
+                            const DataTypes::CplxVectorType& value,
                             int dataOffset)
 {
     if (isProtected()) {
@@ -2845,6 +3412,7 @@ Data::setTaggedValueFromCPP(int tagKey,
     m_data->setTaggedValue(tagKey,pointshape, value, dataOffset);
 }
 
+
 int
 Data::getTagNumber(int dpno)
 {
@@ -2872,8 +3440,6 @@ escript::C_GeneralTensorProduct(Data& arg_0,
     // SM is the product of the last axis_offset entries in arg_0.getShape().
 
     // deal with any lazy data
-//   if (arg_0.isLazy()) {arg_0.resolve();}
-//   if (arg_1.isLazy()) {arg_1.resolve();}
     if (arg_0.isLazy() || arg_1.isLazy() || (AUTOLAZYON && (arg_0.isExpanded() || arg_1.isExpanded())))
     {
         DataLazy* c=new DataLazy(arg_0.borrowDataPtr(), arg_1.borrowDataPtr(), PROD, axis_offset,transpose);
@@ -2918,20 +3484,6 @@ escript::C_GeneralTensorProduct(Data& arg_0,
     for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
     for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
 
-#if 0
-    // For debugging: show shape after transpose
-    char tmp[100];
-    std::string shapeStr;
-    shapeStr = "(";
-    for (int i=0; i<rank0; i++)   { sprintf(tmp, "%d,", tmpShape0[i]); shapeStr += tmp; }
-    shapeStr += ")";
-    cout << "C_GeneralTensorProduct: Shape of arg0 is " << shapeStr << endl;
-    shapeStr = "(";
-    for (int i=0; i<rank1; i++)   { sprintf(tmp, "%d,", tmpShape1[i]); shapeStr += tmp; }
-    shapeStr += ")";
-    cout << "C_GeneralTensorProduct: Shape of arg1 is " << shapeStr << endl;
-#endif
-
     // Prepare for the loops of the product
     int SL=1, SM=1, SR=1;
     for (int i=0; i<rank0-axis_offset; i++) {
@@ -2965,12 +3517,58 @@ escript::C_GeneralTensorProduct(Data& arg_0,
     // Declare output Data object
     Data res;
 
+    bool complexresult=arg_0_Z.isComplex() || arg_1_Z.isComplex();
+    cplx_t dummyc=0;
+    real_t dummyr=0;    
     if (arg_0_Z.isConstant() && arg_1_Z.isConstant()) {
-        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace());        // DataConstant output
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-        const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0));
-        double *ptr_2 = &(res.getDataAtOffsetRW(0));
-        matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(), false);        // DataConstant output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {
+		cplx_t dummyc=0;
+	        res.complicate();
+		const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0,dummyc));
+		const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0,dummyc));
+		cplx_t *ptr_2 = &(res.getDataAtOffsetRW(0,dummyc));
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);		
+	    }
+	    else	// right is real
+	    {
+		cplx_t dummyc=0;
+		real_t dummyr=0;
+	        res.complicate();
+		const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0,dummyc));
+		const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0,dummyr));
+		cplx_t *ptr_2 = &(res.getDataAtOffsetRW(0,dummyc));
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);		
+	    }
+	}
+	else	// arg_0_Z is real
+	{
+	    if (arg_1_Z.isComplex())
+	    {
+		cplx_t dummyc=0;
+		real_t dummyr=0;
+	        res.complicate();
+		const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0,dummyr));
+		const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0,dummyc));
+		cplx_t *ptr_2 = &(res.getDataAtOffsetRW(0,dummyc));
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);	      
+	    }
+	    else
+	    {
+		const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0,0));
+		const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0,0));
+		real_t *ptr_2 = &(res.getDataAtOffsetRW(0));
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);	      
+	    }
+	}
+
     }
     else if (arg_0_Z.isConstant()   && arg_1_Z.isTagged()) {
 
@@ -2983,95 +3581,304 @@ escript::C_GeneralTensorProduct(Data& arg_0,
         if (tmp_1==0) { throw DataException("GTP_1 Programming error - casting to DataTagged."); }
 
         // Prepare a DataTagged output 2
-        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace());        // DataTagged output
+        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(), false);        // DataTagged output
         res.tag();
+	if (complexresult)
+	{
+	    res.complicate();
+	}	
         DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
         if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
 
-        // Prepare offset into DataConstant
-        int offset_0 = tmp_0->getPointOffset(0,0);
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-
-        const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-        double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-        // Compute an MVP for the default
-        matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        // Compute an MVP for each tag
-        const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-        DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-        for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-            tmp_2->addTag(i->first);
-
-            const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-            double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+	cplx_t dummyc=0;
+	real_t dummyr=0;
         
-            matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        }
-
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {        
+		// Prepare offset into DataConstant
+		int offset_0 = tmp_0->getPointOffset(0,0);
+		const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+
+		const cplx_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+
+		    const cplx_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0, dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+		
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}		
+	    }
+	    else
+	    {
+		// Prepare offset into DataConstant
+		int offset_0 = tmp_0->getPointOffset(0,0);
+		const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+
+		const real_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyr));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+
+		    const real_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0, dummyr));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+		
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}		      
+	    }
+	}
+	else	// arg_0 is real
+	{
+	    if (arg_1_Z.isComplex())
+	    {        
+		// Prepare offset into DataConstant
+		int offset_0 = tmp_0->getPointOffset(0,0);
+		const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+
+		const cplx_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0,dummyc));	// the result
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+
+		    const cplx_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0,dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0,dummyc));
+		
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	    else
+	    {
+		// Prepare offset into DataConstant
+		int offset_0 = tmp_0->getPointOffset(0,0);
+		const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+
+		const real_t *ptr_1 = &(tmp_1->getDefaultValueRO(0));
+		real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+
+		    const real_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
+		    real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+		
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	}
     }
     else if (arg_0_Z.isConstant() && arg_1_Z.isExpanded()) {
 
-        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-        DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-        DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-        DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-        if (tmp_0==0) { throw DataException("GTP Programming error - casting to DataConstant."); }
-        if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
-        if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
-        int sampleNo_1,dataPointNo_1;
-        int numSamples_1 = arg_1_Z.getNumSamples();
-        int numDataPointsPerSample_1 = arg_1_Z.getNumDataPointsPerSample();
-        int offset_0 = tmp_0->getPointOffset(0,0);
-#pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
-        for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
-            for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
-                int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
-                int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
-                const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-                const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-                double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-                matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-            }
-        }
+	res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+	if (complexresult)
+	{
+	    res.complicate();
+	}	
+	DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
+	DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
+	DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
+	if (tmp_0==0) { throw DataException("GTP Programming error - casting to DataConstant."); }
+	if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
+	if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
+	int sampleNo_1,dataPointNo_1;
+	int numSamples_1 = arg_1_Z.getNumSamples();
+	int numDataPointsPerSample_1 = arg_1_Z.getNumDataPointsPerSample();
+	int offset_0 = tmp_0->getPointOffset(0,0);	
+        
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
+		for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
+		    for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
+		for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
+		    for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+	else
+	{
+	    if (arg_1_Z.isComplex())
+	    {  
+		#pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
+		for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
+		    for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else	// both real
+	    {
+		#pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
+		for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
+		    for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+			real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyr));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }	  
+	}
     }
     else if (arg_0_Z.isTagged() && arg_1_Z.isConstant()) {
-
-        // Borrow DataTagged input from Data object
-        DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-        if (tmp_0==0) { throw DataException("GTP_0 Programming error - casting to DataTagged."); }
-
-        // Prepare the DataConstant input
-        DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-        if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataConstant."); }
-
-        // Prepare a DataTagged output 2
-        res = Data(0.0, shape2, arg_0_Z.getFunctionSpace());        // DataTagged output
-        res.tag();
-        DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-        if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
-
-        // Prepare offset into DataConstant
-        int offset_1 = tmp_1->getPointOffset(0,0);
-        const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-        const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-        double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-        // Compute an MVP for the default
-        matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        // Compute an MVP for each tag
-        const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-        DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-        for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-
-            tmp_2->addTag(i->first);
-            const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-            double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-            matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        }
+	// Borrow DataTagged input from Data object
+	DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
+	if (tmp_0==0) { throw DataException("GTP_0 Programming error - casting to DataTagged."); }
+
+	// Prepare the DataConstant input
+	DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
+	if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataConstant."); }
+
+	// Prepare a DataTagged output 2
+	res = Data(0.0, shape2, arg_0_Z.getFunctionSpace(), false);        // DataTagged output
+	res.tag();
+	if (complexresult)
+	{
+	    res.complicate();
+	}	
+	DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
+	if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
+
+	// Prepare offset into DataConstant
+	int offset_1 = tmp_1->getPointOffset(0,0);      
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+
+		const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+		const cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+
+		    tmp_2->addTag(i->first);
+		    const cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	    else	// arg_1_Z is real
+	    {
+		const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+		const cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+
+		    tmp_2->addTag(i->first);
+		    const cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	}
+	else
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+		const real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyr));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+
+		    tmp_2->addTag(i->first);
+		    const real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyr));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	    else
+	    {
+		const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
+		const real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0));
+		real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+
+		    tmp_2->addTag(i->first);
+		    const real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
+		    real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	}
     }
     else if (arg_0_Z.isTagged() && arg_1_Z.isTagged()) {
-
         // Borrow DataTagged input from Data object
         DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
         if (tmp_0==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
@@ -3081,66 +3888,224 @@ escript::C_GeneralTensorProduct(Data& arg_0,
         if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
 
         // Prepare a DataTagged output 2
-        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace());
+        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(), false);
         res.tag();  // DataTagged output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
         DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
         if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
-
-        const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-        const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-        double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-        // Compute an MVP for the default
-        matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        // Merge the tags
-        DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-        const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-        const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-        for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-            tmp_2->addTag(i->first); // use tmp_2 to get correct shape
-        }
-        for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-            tmp_2->addTag(i->first);
-        }
-        // Compute an MVP for each tag
-        const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
-        for (i=lookup_2.begin();i!=lookup_2.end();i++) {
-            const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-            const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-            double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-
-            matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-        }
+      
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		const cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyc));
+		const cplx_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Merge the tags
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+		    tmp_2->addTag(i->first); // use tmp_2 to get correct shape
+		}
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+		}
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
+		for (i=lookup_2.begin();i!=lookup_2.end();i++) {
+		    const cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyc));
+		    const cplx_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0, dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	    else	// arg_1_Z is real
+	    {
+		const cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyc));
+		const real_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyr));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Merge the tags
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+		    tmp_2->addTag(i->first); // use tmp_2 to get correct shape
+		}
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+		}
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
+		for (i=lookup_2.begin();i!=lookup_2.end();i++) {
+		    const cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyc));
+		    const real_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0, dummyr));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummyc));
+
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	}
+	else	// arg_0_Z is real
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		const real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0, dummyr));
+		const cplx_t *ptr_1 = &(tmp_1->getDefaultValueRO(0, dummyc));
+		cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0, dummyc));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Merge the tags
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+		    tmp_2->addTag(i->first); // use tmp_2 to get correct shape
+		}
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+		}
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
+		for (i=lookup_2.begin();i!=lookup_2.end();i++) {
+		    const real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummyr));
+		    const cplx_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0, dummyc));
+		    cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0,dummyc));
+
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}		      
+	    }
+	    else
+	    {
+		const real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0));
+		const real_t *ptr_1 = &(tmp_1->getDefaultValueRO(0));
+		real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0));
+
+		// Compute an MVP for the default
+		matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		// Merge the tags
+		DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+		const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+		const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
+		for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+		    tmp_2->addTag(i->first); // use tmp_2 to get correct shape
+		}
+		for (i=lookup_1.begin();i!=lookup_1.end();i++) {
+		    tmp_2->addTag(i->first);
+		}
+		// Compute an MVP for each tag
+		const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
+		for (i=lookup_2.begin();i!=lookup_2.end();i++) {
+		    const real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
+		    const real_t *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
+		    real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+
+		    matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		}	      
+	    }
+	}
     }
     else if (arg_0_Z.isTagged() && arg_1_Z.isExpanded()) {
-
-        // After finding a common function space above the two inputs have the same numSamples and num DPPS
-        res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-        DataTagged*   tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-        DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-        DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-        if (tmp_0==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
-        if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
-        if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
-        int sampleNo_0,dataPointNo_0;
-        int numSamples_0 = arg_0_Z.getNumSamples();
-        int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-        for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-            int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
-            const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-            for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-                int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-                const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-                double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-                matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-            }
-        }
+      
+	// After finding a common function space above the two inputs have the same numSamples and num DPPS
+	res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
+	DataTagged*   tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
+	DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
+	DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());    
+	if (tmp_0==0) { throw DataException("GTP Programming error - casting to DataTagged."); }
+	if (tmp_1==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
+	if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
+	int sampleNo_0,dataPointNo_0;
+	int numSamples_0 = arg_0_Z.getNumSamples();
+	int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();	
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
+		    const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else	// arg_1_Z is real
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
+		    const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+	else	// arg_0_Z is real
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
+		    const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
+		    const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
+			real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
     }
     else if (arg_0_Z.isExpanded() && arg_1_Z.isConstant()) {
-
         res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
         DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
         DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
         DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
@@ -3150,23 +4115,79 @@ escript::C_GeneralTensorProduct(Data& arg_0,
         int sampleNo_0,dataPointNo_0;
         int numSamples_0 = arg_0_Z.getNumSamples();
         int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-        int offset_1 = tmp_1->getPointOffset(0,0);
-#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-        for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-            for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-                int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-                const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-                const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-                double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-                matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-            }
-        }
+        int offset_1 = tmp_1->getPointOffset(0,0);      
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}		      
+	    }
+	    else	// arg_1_Z is real
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+	else	// arg_0_Z is real
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }	      
+		}
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
+			real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+
+
     }
     else if (arg_0_Z.isExpanded() && arg_1_Z.isTagged()) {
-
         // After finding a common function space above the two inputs have the same numSamples and num DPPS
         res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
         DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
         DataTagged*   tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
         DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
@@ -3175,24 +4196,83 @@ escript::C_GeneralTensorProduct(Data& arg_0,
         if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
         int sampleNo_0,dataPointNo_0;
         int numSamples_0 = arg_0_Z.getNumSamples();
-        int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-        for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-            int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
-            const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-            for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-                int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-                const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-                double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-                matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-            }
-        }
+        int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();      
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
+		    const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
+		    const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+	else
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
+		    const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
+		    const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+			real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+
+
     }
     else if (arg_0_Z.isExpanded() && arg_1_Z.isExpanded()) {
-
         // After finding a common function space above the two inputs have the same numSamples and num DPPS
         res = Data(0.0, shape2, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+	if (complexresult)
+	{
+	    res.complicate();
+	}
         DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
         DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
         DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
@@ -3201,19 +4281,75 @@ escript::C_GeneralTensorProduct(Data& arg_0,
         if (tmp_2==0) { throw DataException("GTP Programming error - casting to DataExpanded."); }
         int sampleNo_0,dataPointNo_0;
         int numSamples_0 = arg_0_Z.getNumSamples();
-        int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-        for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-            for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-                int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-                int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-                const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-                const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-                double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-                matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
-            }
-        }
+        int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();      
+	if (arg_0_Z.isComplex())
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyc));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyr));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+	else
+	{
+	    if (arg_1_Z.isComplex())
+	    {   
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummyr));
+			const cplx_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1, dummyc));
+			cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummyc));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	    else
+	    {
+		#pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+		for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+		    for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
+			int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
+			int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+			const real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+			const real_t *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
+			real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+			matrix_matrix_product(SL, SM, SR, ptr_0, ptr_1, ptr_2, transpose);
+		    }
+		}	      
+	    }
+	}
+
+
     }
     else {
         throw DataException("Error - C_GeneralTensorProduct: unknown combination of inputs");
@@ -3239,8 +4375,8 @@ Data::borrowDataPtr() const
 DataReady_ptr
 Data::borrowReadyPtr() const
 {
-    DataReady_ptr dr=boost::dynamic_pointer_cast<DataReady>(m_data);
-    EsysAssert((dr!=0), "Error - casting to DataReady.");
+    DataReady_ptr dr=REFCOUNTNS::dynamic_pointer_cast<DataReady>(m_data);
+    ESYS_ASSERT(dr!=NULL, "Casting to DataReady.");
     return dr;
 }
 
@@ -3253,7 +4389,7 @@ Data::toString() const
 #endif
     if (!m_data->isEmpty() &&
         !m_data->isLazy() && 
-        getLength()>escriptParams.getInt("TOO_MANY_LINES"))
+        getLength() > escriptParams.getTooManyLines())
     {
         localNeedSummary=1;
     }
@@ -3264,43 +4400,56 @@ Data::toString() const
 #endif
 
     if (localNeedSummary){
-        stringstream temp;
-        temp << "Summary: inf="<< inf_const() << " sup=" << sup_const() << " data points=" << getNumDataPoints();
-        return  temp.str();
+        if (isComplex())
+	{
+	    stringstream temp;
+	    temp << "Summary: Lsup="<< Lsup_const() << " data points=" << getNumDataPoints();
+	    return  temp.str();
+	}
+	else
+	{
+	    stringstream temp;
+	    temp << "Summary: inf="<< inf_const() << " sup=" << sup_const() << " data points=" << getNumDataPoints();
+	    return  temp.str();
+	}
     }
     return m_data->toString();
 }
 
 
 // This method is not thread-safe
-DataTypes::ValueType::reference
-Data::getDataAtOffsetRW(DataTypes::ValueType::size_type i)
+DataTypes::RealVectorType::reference
+Data::getDataAtOffsetRW(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy)
 {
     checkExclusiveWrite();
     return getReady()->getDataAtOffsetRW(i);
 }
 
 // This method is not thread-safe
-DataTypes::ValueType::const_reference
-Data::getDataAtOffsetRO(DataTypes::ValueType::size_type i)
+DataTypes::RealVectorType::const_reference
+Data::getDataAtOffsetRO(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy)
 {
     forceResolve();
     return getReady()->getDataAtOffsetRO(i);
 }
 
+// This method is not thread-safe
+DataTypes::CplxVectorType::reference
+Data::getDataAtOffsetRW(DataTypes::RealVectorType::size_type i, DataTypes::cplx_t dummy)
+{
+    checkExclusiveWrite();
+    return getReady()->getDataAtOffsetRWC(i);
+}
 
-// DataTypes::ValueType::const_reference
-// Data::getDataAtOffsetRO(DataTypes::ValueType::size_type i) const
-// {
-//     if (isLazy())
-//     {
-//      throw DataException("Programmer error - getDataAtOffsetRO() not permitted on Lazy Data (object is const which prevents resolving).");
-//     }
-//     return getReady()->getDataAtOffsetRO(i);
-// }
-
+// This method is not thread-safe
+DataTypes::CplxVectorType::const_reference
+Data::getDataAtOffsetRO(DataTypes::RealVectorType::size_type i, DataTypes::cplx_t dummy)
+{
+    forceResolve();
+    return getReady()->getDataAtOffsetROC(i);
+}
 
-DataTypes::ValueType::const_reference
+DataTypes::RealVectorType::const_reference
 Data::getDataPointRO(int sampleNo, int dataPointNo)
 {
     forceResolve();
@@ -3316,7 +4465,7 @@ Data::getDataPointRO(int sampleNo, int dataPointNo)
 }
 
 
-DataTypes::ValueType::reference
+DataTypes::RealVectorType::reference
 Data::getDataPointRW(int sampleNo, int dataPointNo)
 {
     checkExclusiveWrite();
@@ -3325,27 +4474,27 @@ Data::getDataPointRW(int sampleNo, int dataPointNo)
 }
 
 Data
-Data::interpolateFromTable3DP(bp::object table, double Amin, double Astep,
-                Data& B, double Bmin, double Bstep,
-                Data& C, double Cmin, double Cstep,
-                double undef, bool check_boundaries)
+Data::interpolateFromTable3DP(bp::object table, real_t Amin, real_t Astep,
+                Data& B, real_t Bmin, real_t Bstep,
+                Data& C, real_t Cmin, real_t Cstep,
+                real_t undef, bool check_boundaries)
 {
     WrappedArray t(table);
     return interpolateFromTable3D(t, Amin, Astep, undef, B, Bmin, Bstep, C, Cmin, Cstep, check_boundaries);
 }
 
 Data
-Data::interpolateFromTable2DP(bp::object table, double Amin, double Astep,
-                Data& B, double Bmin, double Bstep,
-                double undef, bool check_boundaries)
+Data::interpolateFromTable2DP(bp::object table, real_t Amin, real_t Astep,
+                Data& B, real_t Bmin, real_t Bstep,
+                real_t undef, bool check_boundaries)
 {
     WrappedArray t(table);
     return interpolateFromTable2D(t, Amin, Astep, undef, B, Bmin, Bstep,check_boundaries);
 }
 
 Data
-Data::interpolateFromTable1DP(bp::object table, double Amin, double Astep,
-                              double undef,bool check_boundaries)
+Data::interpolateFromTable1DP(bp::object table, real_t Amin, real_t Astep,
+                              real_t undef,bool check_boundaries)
 {
     WrappedArray t(table);
     return interpolateFromTable1D(t, Amin, Astep, undef, check_boundaries);
@@ -3353,8 +4502,8 @@ Data::interpolateFromTable1DP(bp::object table, double Amin, double Astep,
 
 
 Data
-Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
-                             double Astep, double undef, bool check_boundaries)
+Data::interpolateFromTable1D(const WrappedArray& table, real_t Amin,
+                             real_t Astep, real_t undef, bool check_boundaries)
 {
     table.convertArray(); // critical! Calling getElt on an unconverted array is not thread safe
     int error=0;
@@ -3378,8 +4527,8 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
     int numpts=getNumDataPoints();
     int twidth=table.getShape()[0]-1;       
     bool haserror=false;
-    const DataVector* adat=0;
-    DataVector* rdat=0;
+    const RealVectorType* adat=0;
+    RealVectorType* rdat=0;
     try
     {
         adat=&(getReady()->getVectorRO());
@@ -3399,7 +4548,7 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
 #pragma omp flush(haserror) // In case haserror was in register
             if (!haserror)                
             {
-                double a=(*adat)[l];
+                real_t a=(*adat)[l];
                 int x=static_cast<int>(((a-Amin)/Astep));
                 if (check_boundaries)
                 {
@@ -3419,7 +4568,7 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
                     try {
                         if (x==twidth) // value is on the far end of the table
                         {
-                            double e=table.getElt(x);
+                            real_t e=table.getElt(x);
                             if (e>undef)
                             {
                                 lerror=2;
@@ -3431,8 +4580,8 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
                         }
                         else            // x and y are in bounds
                         {
-                            double e=table.getElt(x);
-                            double w=table.getElt(x+1);
+                            real_t e=table.getElt(x);
+                            real_t w=table.getElt(x+1);
                             if ((e>undef) || (w>undef))
                             {
                                 lerror=2;
@@ -3440,7 +4589,7 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
                             else
                             {
                                 // map x*Astep <= a << (x+1)*Astep to [-1,1] 
-                                double la = 2.0*(a-Amin-(x*Astep))/Astep-1;
+                                real_t la = 2.0*(a-Amin-(x*Astep))/Astep-1;
                                 (*rdat)[l]=((1-la)*e + (1+la)*w)/2;
                             }
                         }
@@ -3481,9 +4630,9 @@ Data::interpolateFromTable1D(const WrappedArray& table, double Amin,
 }
 
 Data
-Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
-                             double Astep, double undef, Data& B, double Bmin,
-                             double Bstep, bool check_boundaries)
+Data::interpolateFromTable2D(const WrappedArray& table, real_t Amin,
+                             real_t Astep, real_t undef, Data& B, real_t Bmin,
+                             real_t Bstep, bool check_boundaries)
 {
     table.convertArray(); // critical! Calling getElt on an unconverted array is not thread safe
     int error=0;
@@ -3518,9 +4667,9 @@ Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
     Data res(0, DataTypes::scalarShape, getFunctionSpace(), true);
 
     int numpts=getNumDataPoints();
-    const DataVector* adat=0;
-    const DataVector* bdat=0;
-    DataVector* rdat=0;
+    const RealVectorType* adat=0;
+    const RealVectorType* bdat=0;
+    RealVectorType* rdat=0;
     const DataTypes::ShapeType& ts=table.getShape();
     try
     {
@@ -3546,8 +4695,8 @@ Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
            if (!haserror)               
            {
                 int lerror=0;
-                double a=(*adat)[l];
-                double b=(*bdat)[l];
+                real_t a=(*adat)[l];
+                real_t b=(*bdat)[l];
                 int x=static_cast<int>(((a-Amin)/Astep));
                 int y=static_cast<int>(((b-Bmin)/Bstep));
                 if (check_boundaries)
@@ -3573,9 +4722,9 @@ Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
                         int nx=x+1;
                         int ny=y+1;
 
-                        double la=0; // map position of a between x and nx to [-1,1]
-                        double lb=0;
-                        double weight=4;
+                        real_t la=0; // map position of a between x and nx to [-1,1]
+                        real_t lb=0;
+                        real_t weight=4;
 
                         // now we work out which terms we should be considering
                         bool usex=(x!=twx);
@@ -3584,15 +4733,12 @@ Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
                         la = 2.0*(a-Amin-(x*Astep))/Astep-1;
                         lb = 2.0*(b-Bmin-(y*Bstep))/Bstep-1;
 
-                        double sw=table.getElt(y,x);
-                        double nw=usey?table.getElt(ny,x):0; // 0 because if !usey ny does not actually exist
-                        double se=usex?table.getElt(y,nx):0;
-                        double ne=(usex&&usey)?table.getElt(ny,nx):0;
-
-// cout << a << "," << b << " -> " << x << "," << y << "   " <<  sw <<  "," << 
-// nw <<  "," <<  se <<  "," <<  ne <<  "\n";                   
+                        real_t sw=table.getElt(y,x);
+                        real_t nw=usey?table.getElt(ny,x):0; // 0 because if !usey ny does not actually exist
+                        real_t se=usex?table.getElt(y,nx):0;
+                        real_t ne=(usex&&usey)?table.getElt(ny,nx):0;                
 
-                        double ans=(1-la)*(1-lb)*sw +
+                        real_t ans=(1-la)*(1-lb)*sw +
                                    (1-la)*(1+lb)*nw +
                                    (1+la)*(1-lb)*se +
                                    (1+la)*(1+lb)*ne;
@@ -3640,9 +4786,9 @@ Data::interpolateFromTable2D(const WrappedArray& table, double Amin,
 
 
 Data
-Data::interpolateFromTable3D(const WrappedArray& table, double Amin,
-                             double Astep, double undef, Data& B, double Bmin,
-                             double Bstep, Data& C, double Cmin, double Cstep,
+Data::interpolateFromTable3D(const WrappedArray& table, real_t Amin,
+                             real_t Astep, real_t undef, Data& B, real_t Bmin,
+                             real_t Bstep, Data& C, real_t Cmin, real_t Cstep,
                              bool check_boundaries)
 {
     table.convertArray(); // critical! Calling getElt on an unconverted array is not thread safe
@@ -3688,10 +4834,10 @@ Data::interpolateFromTable3D(const WrappedArray& table, double Amin,
     Data res(0, DataTypes::scalarShape, getFunctionSpace(), true);
 
     int numpts=getNumDataPoints();
-    const DataVector* adat=0;
-    const DataVector* bdat=0;
-    const DataVector* cdat=0;
-    DataVector* rdat=0;
+    const RealVectorType* adat=0;
+    const RealVectorType* bdat=0;
+    const RealVectorType* cdat=0;
+    RealVectorType* rdat=0;
     const DataTypes::ShapeType& ts=table.getShape();
     try
     {
@@ -3706,10 +4852,6 @@ Data::interpolateFromTable3D(const WrappedArray& table, double Amin,
     }
     if (!error)
     {
-//      int twx=ts[0]-1;        // table width x
-//      int twy=ts[1]-1;        // table width y
-//      int twz=ts[2]-1;        // table width z
-
         int twx=ts[2]-1;        // table width x
         int twy=ts[1]-1;        // table width y
         int twz=ts[0]-1;        // table width z
@@ -3723,9 +4865,9 @@ Data::interpolateFromTable3D(const WrappedArray& table, double Amin,
            if (!haserror)               
            {
                 int lerror=0;
-                double a=(*adat)[l];
-                double b=(*bdat)[l];
-                double c=(*cdat)[l];
+                real_t a=(*adat)[l];
+                real_t b=(*bdat)[l];
+                real_t c=(*cdat)[l];
                 int x=static_cast<int>(((a-Amin)/Astep));
                 int y=static_cast<int>(((b-Bmin)/Bstep));
                 int z=static_cast<int>(((c-Cmin)/Cstep));
@@ -3754,86 +4896,30 @@ Data::interpolateFromTable3D(const WrappedArray& table, double Amin,
                         int nx=x+1;
                         int ny=y+1;
                         int nz=z+1;
-                        double la=0; // map position of a between x and nx to [-1,1]
-                        double lb=0;
-                        double lc=0;
-                        double weight=8;
+                        real_t la=0; // map position of a between x and nx to [-1,1]
+                        real_t lb=0;
+                        real_t lc=0;
+                        real_t weight=8;
 
                         // now we work out which terms we should be considering
                         bool usex=(x!=twx);
                         bool usey=(y!=twy);
                         bool usez=(z!=twz);
-//                      if (usex) {weight/=2;}
-//                      if (usey) {weight/=2;}
-//                      if (usez) {weight/=2;}
 
                         la = 2.0*(a-Amin-(x*Astep))/Astep-1;
                         lb = 2.0*(b-Bmin-(y*Bstep))/Bstep-1;
                         lc = 2.0*(c-Cmin-(z*Cstep))/Cstep-1;
 
-/*
-cerr << "Processing point " << l << " x=";
-cerr <<  x << "," << nx << " ";
-cerr <<  "y=" << y << "," << ny << " ";
-cerr <<  "z=" << z << "," << nz << "\n";
-
-cerr << "  usex=" << usex << "  usey=" << usey << "  usez=" << usez << endl;*/
-
-//                      double swb=table.getElt(x,y,z);
-//                      double swt=usez?table.getElt(x,y,nz):0;
-//                      double nwb=usey?table.getElt(x,ny,z):0;
-//                      double nwt=(usey&&usez)?table.getElt(x,ny,nz):0;
-//                      double seb=usex?table.getElt(nx,y,z):0;
-//                      double set=(usex&&usez)?table.getElt(nx,y,nz):0;
-//                      double neb=(usex&&usey)?table.getElt(nx,ny,z):0;
-//                      double net=(usex&&usey&&usez)?table.getElt(nx,ny,nz):0;
-
-                        double swb=table.getElt(z,y,x);
-                        double swt=usez?table.getElt(nz,y,x):0;
-                        double nwb=usey?table.getElt(z,ny,x):0;
-                        double nwt=(usey&&usez)?table.getElt(nz,ny,x):0;
-                        double seb=usex?table.getElt(z,y,nx):0;
-                        double set=(usex&&usez)?table.getElt(nz,y,nx):0;
-                        double neb=(usex&&usey)?table.getElt(z,ny,nx):0;
-                        double net=(usex&&usey&&usez)?table.getElt(nz,ny,nx):0;
-
-// cerr << "     +(0,1,0) " << table.getElt(0,1,0) << endl;
-// cerr << "     +(1,1,0) " << table.getElt(1,1,0) << endl;
-// cerr << "     +(0,0,1) " << table.getElt(0,0,1) << endl;
-// cerr << "     +(0,1,1) " << table.getElt(0,1,1) << endl;
-
-
-// cerr << "    " << swb << ", " << swt << ", ";
-// cerr << nwb << ", " << nwt << ", ";
-// cerr << seb << ", " << set << ", ";
-// cerr << neb << ", " << net << "\n";
-// 
-// cerr << "       la=" << la << " lb=" << lb << " lc="<< lc << endl;
-
-/*                      double la = 2.0*(a-Amin-(x*Astep))/Astep-1;*/
-/*                      double lb = 2.0*(b-Bmin-(y*Bstep))/Bstep-1;*/
-//                      double lc = 2.0*(c-Cmin-(z*Cstep))/Cstep-1;
-
-// cerr << "        swb=" << swb << endl;
-// cerr << "        swt=" << swt << endl;
-// cerr << "        nwb=" <<    nwb << endl;
-// cerr << "        nwt=" <<    nwt << endl;
-// cerr << "        seb=" <<    seb << endl;
-// cerr << "        set=" <<    set << endl;
-// cerr << "        neb=" <<    neb << endl;
-// cerr << "        net=" <<    net << endl;;
-// 
-// cerr << "      swb->" << (1-la)*(1-lb)*(1-lc)*swb << endl;
-// cerr << "      swt->" << (1-la)*(1-lb)*(1+lc)*swt << endl;
-// cerr << "      nwb->" << (1-la)*(1+lb)*(1-lc)*nwb << endl;
-// cerr << "      nwt->" << (1-la)*(1+lb)*(1+lc)*nwt << endl;
-// cerr << "      seb->" << (1+la)*(1-lb)*(1-lc)*seb << endl;
-// cerr << "      set->" << (1+la)*(1-lb)*(1+lc)*set << endl;
-// cerr << "      neb->" << (1+la)*(1+lb)*(1-lc)*neb << endl;
-// cerr << "      net->" << (1+la)*(1+lb)*(1+lc)*net << endl;;
-
-
-                        double ans=(1-la)*(1-lb)*(1-lc)*swb +
+                        real_t swb=table.getElt(z,y,x);
+                        real_t swt=usez?table.getElt(nz,y,x):0;
+                        real_t nwb=usey?table.getElt(z,ny,x):0;
+                        real_t nwt=(usey&&usez)?table.getElt(nz,ny,x):0;
+                        real_t seb=usex?table.getElt(z,y,nx):0;
+                        real_t set=(usex&&usez)?table.getElt(nz,y,nx):0;
+                        real_t neb=(usex&&usey)?table.getElt(z,ny,nx):0;
+                        real_t net=(usex&&usey&&usez)?table.getElt(nz,ny,nx):0;
+
+                        real_t ans=(1-la)*(1-lb)*(1-lc)*swb +
                                    (1-la)*(1-lb)*(1+lc)*swt +
                                    (1-la)*(1+lb)*(1-lc)*nwb +
                                    (1-la)*(1+lb)*(1+lc)*nwt +
@@ -3906,10 +4992,10 @@ Data Data::nonuniforminterp(boost::python::object in, boost::python::object out,
     expand();
     Data result(0, DataTypes::scalarShape, getFunctionSpace(), true);  
     int numpts=getNumDataPoints();
-    const DataVector& sdat=getReady()->getVectorRO();
-    DataVector& rdat=result.getReady()->getVectorRW();
-    double maxlimit=win.getElt(win.getShape()[0]-1);
-    double maxout=wout.getElt(wout.getShape()[0]-1);
+    const RealVectorType& sdat=getReady()->getVectorRO();
+    RealVectorType& rdat=result.getReady()->getVectorRW();
+    real_t maxlimit=win.getElt(win.getShape()[0]-1);
+    real_t maxout=wout.getElt(wout.getShape()[0]-1);
     int ipoints=win.getShape()[0];
     int l=0;
     bool error=false;
@@ -3917,42 +5003,42 @@ Data Data::nonuniforminterp(boost::python::object in, boost::python::object out,
     for (l=0; l<numpts; ++l)
     {
         if ((sdat)[l]<win.getElt(0))
-	{
-	   if (check_boundaries)
-	   {
-	       error=true;		// Could have done an early exit but I'm not sure it's worth it
-	   }
-	   else
-	   {
-	       rdat[l]=wout.getElt(0);
-	   }
-	}
-	else if (sdat[l]>maxlimit)
-	{
-	   if (check_boundaries)
-	   {
-	       error=true;		// Could have done an early exit but I'm not sure it's worth it
-	   }
-	   else
-	   {
-	       rdat[l]=maxout;
-	   }
-	}
+        {
+           if (check_boundaries)
+           {
+               error=true;              // Could have done an early exit but I'm not sure it's worth it
+           }
+           else
+           {
+               rdat[l]=wout.getElt(0);
+           }
+        }
+        else if (sdat[l]>maxlimit)
+        {
+           if (check_boundaries)
+           {
+               error=true;              // Could have done an early exit but I'm not sure it's worth it
+           }
+           else
+           {
+               rdat[l]=maxout;
+           }
+        }
         else
-	{
-	    int i=0;
-	    for (;i<ipoints-2;++i)
-	    {
-	        if (sdat[l]<win.getElt(i+1))
-		{
-		    break;
-		}
-	    }
-	    // we must have found one by this point or we would have triggered earlier branches
-	    rdat[l]=(wout.getElt(i+1)-wout.getElt(i))/(win.getElt(i+1)-win.getElt(i)) * (sdat[l]-win.getElt(i)) + wout.getElt(i);
-	}
+        {
+            int i=0;
+            for (;i<ipoints-2;++i)
+            {
+                if (sdat[l]<win.getElt(i+1))
+                {
+                    break;
+                }
+            }
+            // we must have found one by this point or we would have triggered earlier branches
+            rdat[l]=(wout.getElt(i+1)-wout.getElt(i))/(win.getElt(i+1)-win.getElt(i)) * (sdat[l]-win.getElt(i)) + wout.getElt(i);
+        }
     }
-    if (error)	// we had an illegal value (below the start threshold)
+    if (error)  // we had an illegal value (below the start threshold)
     {
         throw DataException("Data being interpolated contains a value outside the range given.");
     }
@@ -3982,9 +5068,9 @@ Data Data::nonuniformslope(boost::python::object in, boost::python::object out,
     expand();
     Data result(0, DataTypes::scalarShape, getFunctionSpace(), true);  
     int numpts=getNumDataPoints();
-    const DataVector& sdat=getReady()->getVectorRO();
-    DataVector& rdat=result.getReady()->getVectorRW();
-    double maxlimit=win.getElt(win.getShape()[0]-1);
+    const RealVectorType& sdat=getReady()->getVectorRO();
+    RealVectorType& rdat=result.getReady()->getVectorRW();
+    real_t maxlimit=win.getElt(win.getShape()[0]-1);
     int ipoints=win.getShape()[0];
     int l=0;
     bool error=false;
@@ -3992,42 +5078,42 @@ Data Data::nonuniformslope(boost::python::object in, boost::python::object out,
     for (l=0; l<numpts; ++l)
     {
         if ((sdat)[l]<win.getElt(0))
-	{
-	   if (check_boundaries)
-	   {
-	       error=true;		// Could have done an early exit but I'm not sure it's worth it
-	   }
-	   else
-	   {
-	       rdat[l]=0;
-	   }
-	}
-	else if (sdat[l]>maxlimit)
-	{
-	   if (check_boundaries)
-	   {
-	       error=true;		// Could have done an early exit but I'm not sure it's worth it
-	   }
-	   else
-	   {
-	       rdat[l]=0;
-	   }
-	}
+        {
+           if (check_boundaries)
+           {
+               error=true;              // Could have done an early exit but I'm not sure it's worth it
+           }
+           else
+           {
+               rdat[l]=0;
+           }
+        }
+        else if (sdat[l]>maxlimit)
+        {
+           if (check_boundaries)
+           {
+               error=true;              // Could have done an early exit but I'm not sure it's worth it
+           }
+           else
+           {
+               rdat[l]=0;
+           }
+        }
         else
-	{
-	    int i=0;
-	    for (;i<ipoints-2;++i)
-	    {
-	        if (sdat[l]<=win.getElt(i+1))
-		{
-		    break;
-		}
-	    }
-	    // we must have found one by this point or we would have triggered earlier branches
-	    rdat[l]=(wout.getElt(i+1)-wout.getElt(i))/(win.getElt(i+1)-win.getElt(i));
-	}
+        {
+            int i=0;
+            for (;i<ipoints-2;++i)
+            {
+                if (sdat[l]<=win.getElt(i+1))
+                {
+                    break;
+                }
+            }
+            // we must have found one by this point or we would have triggered earlier branches
+            rdat[l]=(wout.getElt(i+1)-wout.getElt(i))/(win.getElt(i+1)-win.getElt(i));
+        }
     }
-    if (error)	// we had an illegal value (below the start threwshold)
+    if (error)  // we had an illegal value (below the start threwshold)
     {
         throw DataException("Data being interpolated contains a value outside the range given.");
     }
@@ -4069,7 +5155,7 @@ Data::dump(const std::string fileName) const
     }
     catch (std::exception& e)
     {
-        cout << e.what() << endl;
+        std::cout << e.what() << std::endl;
     }
 }
 
@@ -4176,9 +5262,9 @@ escript::applyBinaryCFunction(bp::object cfunc, bp::tuple shape, escript::Data&
     }
     if (d.isConstant() && e.isConstant())
     {
-        const double* src=d.getSampleDataRO(0);
-        const double* src2=e.getSampleDataRO(0);
-        double* dest=res.getSampleDataRW(0);
+        const real_t* src=d.getSampleDataRO(0);
+        const real_t* src2=e.getSampleDataRO(0);
+        real_t* dest=res.getSampleDataRW(0);
         err=func(dest,src,src2,rpointsize, dpointsize, epointsize);
     }
     else if (d.isTagged() && e.isTagged())
@@ -4209,17 +5295,17 @@ escript::applyBinaryCFunction(bp::object cfunc, bp::tuple shape, escript::Data&
         for (std::list<int>::iterator j=alltags.begin();(j!=alltags.end()) && (err==0);++j)
         {
             destd.addTag(*j);
-            const double *ptr_0 = &(srcd.getDataByTagRO(*j,0));
-            const double *ptr_1 = &(srce.getDataByTagRO(*j,0));
-            double *ptr_2 = &(destd.getDataByTagRW(*j,0));
+            const real_t *ptr_0 = &(srcd.getDataByTagRO(*j,0));
+            const real_t *ptr_1 = &(srce.getDataByTagRO(*j,0));
+            real_t *ptr_2 = &(destd.getDataByTagRW(*j,0));
             err=func(ptr_2,ptr_0,ptr_1,rpointsize, dpointsize, epointsize);
         }
         if (err==0)
         {
             // now we do the default tag
-            const double *ptr_0 = &(srcd.getDefaultValueRO(0));
-            const double *ptr_1 = &(srce.getDefaultValueRO(0));
-            double *ptr_2 = &(destd.getDefaultValueRW(0));
+            const real_t *ptr_0 = &(srcd.getDefaultValueRO(0));
+            const real_t *ptr_1 = &(srce.getDefaultValueRO(0));
+            real_t *ptr_2 = &(destd.getDefaultValueRW(0));
             err=func(ptr_2,ptr_0,ptr_1,rpointsize, dpointsize, epointsize);
         }
     }
@@ -4239,9 +5325,9 @@ escript::applyBinaryCFunction(bp::object cfunc, bp::tuple shape, escript::Data&
            {
                 if(!localerr)
                 {
-                    const double* src=d.getSampleDataRO(sampleid);
-                    const double* src2=e.getSampleDataRO(sampleid);
-                    double* dest=res.getSampleDataRW(sampleid);
+                    const real_t* src=d.getSampleDataRO(sampleid);
+                    const real_t* src2=e.getSampleDataRO(sampleid);
+                    real_t* dest=res.getSampleDataRW(sampleid);
                     for (int pointnum=0;pointnum<dpps;++pointnum)
                     {
                         localerr=func(dest,src,src2,rpointsize, dpointsize, epointsize);
@@ -4359,7 +5445,7 @@ escript::condEval(escript::Data& mask, escript::Data& trueval, escript::Data& fa
         const DataTagged* tdat=dynamic_cast<const DataTagged*>(trueval.getReady());
         const DataTagged* fdat=dynamic_cast<const DataTagged*>(falseval.getReady());
         const DataTagged* mdat=dynamic_cast<DataTagged*>(mask.getReady());
-        DataVector::ConstValueType srcptr;
+        RealVectorType::const_pointer srcptr;
 
         // default value first
         if (mdat->getDefaultValueRO(0)>0)
@@ -4418,7 +5504,7 @@ escript::condEval(escript::Data& mask, escript::Data& trueval, escript::Data& fa
 #else
             size_t i;
 #endif
-            DataVector& rvec=result.getReady()->getVectorRW();      // don't need to get acquireWrite since we made it
+            RealVectorType& rvec=result.getReady()->getVectorRW();      // don't need to get acquireWrite since we made it
             unsigned int psize=result.getDataPointSize();
                 
             size_t numsamples=result.getNumSamples();
@@ -4428,8 +5514,8 @@ escript::condEval(escript::Data& mask, escript::Data& trueval, escript::Data& fa
             {
                 // We are assuming that the first datapoint in the sample determines which side to use
                 // for the whole sample.
-                const DataAbstract::ValueType::value_type* src=0;
-                const DataAbstract::ValueType::value_type* masksample=mask.getSampleDataRO(i);
+                const DataTypes::real_t* src=0;
+                const DataTypes::real_t* masksample=mask.getSampleDataRO(i);
                 if (masksample[0]>0)    // first scalar determines whole sample
                 {
                     src=trueval.getSampleDataRO(i);
@@ -4455,24 +5541,33 @@ escript::condEval(escript::Data& mask, escript::Data& trueval, escript::Data& fa
     }
 }
 
-DataTypes::ValueType& Data::getExpandedVectorReference()
+DataTypes::RealVectorType& Data::getExpandedVectorReference(DataTypes::real_t dummy)
 {
     if (!isExpanded())
     {
         expand();
     }
-    return getReady()->getVectorRW();
+    return getReady()->getTypedVectorRW(dummy);
+}
+
+DataTypes::CplxVectorType& Data::getExpandedVectorReference(DataTypes::cplx_t dummy)
+{
+    if (!isExpanded())
+    {
+        expand();
+    }
+    return getReady()->getTypedVectorRW(dummy);
 }
 
 size_t Data::getNumberOfTaggedValues() const
 {
     if (isTagged())
     {
-	return m_data->getTagCount();
+        return m_data->getTagCount();
     }
     else
     {
-	return 0;
+        return 0;
     }
 }
 
@@ -4492,28 +5587,12 @@ Data escript::randomData(const boost::python::tuple& shape,
     // does our domain support this?
     if (what.getDomain()->supportsFilter(filter))
     {
-	return what.getDomain()->randomFill(dataPointShape, what, seed, filter);
+        return what.getDomain()->randomFill(dataPointShape, what, seed, filter);
     }
     else
     {
-	throw DataException("The specified domain does not support those filter options.");
+        throw DataException("The specified domain does not support those filter options.");
     }
-    
-/*     This code below needs to be moved into the other domains' randomFill code */    
-    
-    
-//     }
-//     else
-//     {
-// 	Data towipe(0, shape, what, true);
-// 	DataExpanded* de=dynamic_cast<DataExpanded*>(towipe.m_data.get());
-// 	if (de==0) 
-// 	{
-// 	    throw DataException("Programmer Error: Expanded data is not expanded");
-// 	}
-// 	de->randomFill(seed);
-//         return towipe;
-//     }
 }
 
 
@@ -4545,7 +5624,7 @@ bp::object Data::__add__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(*this+Data(w, this->getFunctionSpace()));    
+        return bp::object(*this+Data(w, this->getFunctionSpace(), false));    
     }
     catch (DataException e)
     {
@@ -4569,7 +5648,7 @@ bp::object Data::__sub__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(*this-Data(w, this->getFunctionSpace()));  
+        return bp::object(*this-Data(w, this->getFunctionSpace(), false));  
     }
     catch (DataException e)
     {
@@ -4593,7 +5672,7 @@ bp::object Data::__rsub__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(Data(w, this->getFunctionSpace())-*this); 
+        return bp::object(Data(w, this->getFunctionSpace(),false)-*this); 
     }
     catch (DataException e)
     {
@@ -4618,7 +5697,7 @@ bp::object Data::__mul__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(*this*Data(w, this->getFunctionSpace()));  
+        return bp::object(*this*Data(w, this->getFunctionSpace(),false));  
     }
     catch (DataException e)
     {
@@ -4642,7 +5721,7 @@ bp::object Data::__div__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(*this/Data(w, this->getFunctionSpace()));  
+        return bp::object(*this/Data(w, this->getFunctionSpace(),false));  
     }
     catch (DataException e)
     {
@@ -4666,7 +5745,7 @@ bp::object Data::__rdiv__(const bp::object& right)
     {
         WrappedArray w(right);
         wrapok=true;
-        return bp::object(Data(w, this->getFunctionSpace())/(*this));  
+        return bp::object(Data(w, this->getFunctionSpace(),false)/(*this));  
     }
     catch (DataException e)
     {
@@ -4678,3 +5757,639 @@ bp::object Data::__rdiv__(const bp::object& right)
     }         
 }
 
+void Data::complicate()
+{
+    if (isProtected()) {
+        throw DataException("Error - attempt to update protected Data object.");
+    }  
+    m_data->complicate();
+}
+
+Data
+escript::C_TensorUnaryOperation(Data const &arg_0,
+                       escript::ES_optype operation,
+                       DataTypes::real_t tol)
+{
+  if (arg_0.isEmpty())  // do this before we attempt to interpolate
+  {
+     throw DataException("Error - Operations (C_TensorUnaryOperation) not permitted on instances of DataEmpty.");
+  }
+  if (arg_0.isLazy())
+  {
+     throw DataException("Error - Operations not permitted on lazy data.");
+  }
+  
+  if (arg_0.isComplex() && !supports_cplx(operation))
+  {
+      throw DataException("Error - the requested operation does not support complex values");
+  }
+  
+  // Interpolate if necessary and find an appropriate function space
+  Data arg_0_Z = Data(arg_0);
+
+  // Get rank and shape of inputs
+  const DataTypes::ShapeType& shape0 = arg_0_Z.getDataPointShape();
+  int size0 = arg_0_Z.getDataPointSize();
+  
+  // Declare output Data object
+  Data res;
+  bool emptyResult=(arg_0_Z.getNumSamples()==0);
+  if (arg_0_Z.isConstant()) {
+    if (arg_0_Z.isComplex())                    // this is not taking into account cplx->real
+    {
+        DataTypes::cplx_t dummy=0;
+        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),0);      // DataConstant output
+        const DataTypes::cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0, dummy));
+        if (always_real(operation))
+        {
+	    if (emptyResult)
+	    {
+		return res;
+	    }
+            DataTypes::real_t *ptr_2 = &(res.getDataAtOffsetRW(0, (real_t)(0)));
+            tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);       
+        }
+        else
+        {
+            res.complicate();
+	    if (emptyResult)
+	    {
+		return res;
+	    }	    
+            DataTypes::cplx_t *ptr_2 = &(res.getDataAtOffsetRW(0, dummy));
+            tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+        }
+    }
+    else
+    {
+        // This currently does not call the tensor_unary_array_operation_real
+        // functions like .real() and .imag() but they are caught in the Data interface
+        DataTypes::real_t dummy=0;
+        res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),false);      // DataConstant output
+	if (emptyResult)
+	{
+	    return res;
+	}
+	
+        const DataTypes::real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0, dummy));
+        DataTypes::real_t *ptr_2 = &(res.getDataAtOffsetRW(0, dummy));
+        if (always_real(operation))
+        {
+            tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);
+        }
+        else
+        {
+            tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+        }
+    }
+  }
+  else if (arg_0_Z.isTagged()) {
+
+    // Borrow DataTagged input from Data object
+    DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
+
+    // Prepare a DataTagged output 2
+    res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),false);   // DataTagged output
+
+
+    if (arg_0_Z.isComplex())
+    {
+        if (always_real(operation))
+        {
+            res.tag();
+	    if (emptyResult)
+	    {
+		return res;
+	    }
+	    
+            DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());      
+          
+            DataTypes::cplx_t dummy=0;
+            // Get the pointers to the actual data
+            const DataTypes::cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0,dummy));
+            DataTypes::real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0,real_t(0)));
+            // Compute a result for the default
+            tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);
+            // Compute a result for each tag
+            const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+            DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+            for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+              tmp_2->addTag(i->first);
+              const DataTypes::cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummy));
+              DataTypes::real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, real_t(0)));
+              tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);
+            }
+        }
+        else
+        {
+            res.complicate();
+            res.tag();
+	    if (emptyResult)
+	    {
+		return res;
+	    }
+	    
+            DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());      
+          
+            DataTypes::cplx_t dummy=0;
+            // Get the pointers to the actual data
+            const DataTypes::cplx_t *ptr_0 = &(tmp_0->getDefaultValueRO(0,dummy));
+            DataTypes::cplx_t *ptr_2 = &(tmp_2->getDefaultValueRW(0,dummy));
+            // Compute a result for the default
+            tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+            // Compute a result for each tag
+            const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+            DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+            for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+              tmp_2->addTag(i->first);
+              const DataTypes::cplx_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0, dummy));
+              DataTypes::cplx_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0, dummy));
+              tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+            }
+        }
+    }
+    else
+    {
+      
+        res.tag();
+	if (emptyResult)
+	{
+	    return res;
+	}
+	
+        DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());      
+      
+        // Get the pointers to the actual data
+        const DataTypes::real_t *ptr_0 = &(tmp_0->getDefaultValueRO(0));
+        DataTypes::real_t *ptr_2 = &(tmp_2->getDefaultValueRW(0));
+        // Compute a result for the default
+        if (always_real(operation))
+        {
+            tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);       
+        }
+        else
+        {
+            tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+        }
+        // Compute a result for each tag
+        const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
+        DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
+        for (i=lookup_0.begin();i!=lookup_0.end();i++) {
+          tmp_2->addTag(i->first);
+          const DataTypes::real_t *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
+          DataTypes::real_t *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
+          if (always_real(operation))
+          {
+              tensor_unary_array_operation_real(size0, ptr_0, ptr_2, operation, tol);
+          }
+          else
+          {
+              tensor_unary_array_operation(size0, ptr_0, ptr_2, operation, tol);
+          }
+        }
+    }
+  }
+  else if (arg_0_Z.isExpanded()) 
+  {
+
+    res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),true); // DataExpanded output
+    if (arg_0_Z.isComplex() && !always_real(operation))
+    {
+        res.complicate();
+    }
+    if (emptyResult)
+    {
+	return res;
+    }
+    
+    DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
+    DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
+
+    int sampleNo_0,dataPointNo_0;
+    int numSamples_0 = arg_0_Z.getNumSamples();
+    int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
+    if (arg_0_Z.isComplex())
+    {
+        if (always_real(operation))
+        {
+            DataTypes::cplx_t dummy=0;
+            #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+            for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+                dataPointNo_0=0;
+                int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+                const DataTypes::cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummy));
+                DataTypes::real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, real_t(0)));
+                tensor_unary_array_operation_real(size0*numDataPointsPerSample_0, ptr_0, ptr_2, operation, tol);
+            }             
+        }
+        else
+        {
+            DataTypes::cplx_t dummy=0;
+            #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
+            for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
+                dataPointNo_0=0;
+                int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
+                int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
+                const DataTypes::cplx_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0, dummy));
+                DataTypes::cplx_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2, dummy));
+                tensor_unary_array_operation(size0*numDataPointsPerSample_0, ptr_0, ptr_2, operation, tol);
+            }     
+        }
+    }
+    else
+    {
+        // we require storage to be contiguous so let's do it in one chunk
+        #pragma omp parallel private(sampleNo_0,dataPointNo_0)
+        {
+#ifdef _OPENMP
+            int tid=omp_get_thread_num();
+            int mt=omp_get_num_threads();
+            int rem=numSamples_0%mt;
+            size_t samples_per=numSamples_0/mt;
+            size_t startsample=samples_per*tid+((tid<rem)?tid:rem);
+            size_t nextsample=samples_per*(tid+1)+(((tid+1)<rem)?(tid+1):rem);
+            size_t ulimit=min<size_t>(nextsample, numSamples_0);
+            size_t samples=ulimit-startsample;    
+#else
+            size_t startsample=0;
+            size_t samples=numSamples_0;
+#endif      
+            if (startsample<numSamples_0)
+            {
+                size_t offset_0 = tmp_0->getPointOffset(startsample,0);
+                size_t offset_2 = tmp_2->getPointOffset(startsample,0);
+                const DataTypes::real_t *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
+                DataTypes::real_t *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
+                if (always_real(operation))
+                {
+                    tensor_unary_array_operation_real(size0*samples*numDataPointsPerSample_0, ptr_0, ptr_2, operation, tol);
+                }
+                else
+                {
+                    tensor_unary_array_operation(size0*samples*numDataPointsPerSample_0, ptr_0, ptr_2, operation, tol);
+                }
+            }
+        }           
+    }
+  }
+  else {
+    throw DataException("Error - C_TensorUnaryOperation: unknown combination of inputs");
+  }
+
+  return res;
+}
+
+Data
+escript::C_TensorBinaryOperation(Data const &arg_0,
+                        Data const &arg_1,
+                        escript::ES_optype operation)
+{
+  if (arg_0.isEmpty() || arg_1.isEmpty())
+  {
+     throw DataException("Error - Operations (C_TensorBinaryOperation) not permitted on instances of DataEmpty.");
+  }
+  if (arg_0.isLazy() || arg_1.isLazy())
+  {
+     throw DataException("Error - Operations not permitted on lazy data.");
+  }
+  
+  // Interpolate if necessary and find an appropriate function space
+  Data arg_0_Z, arg_1_Z;
+  FunctionSpace fsl=arg_0.getFunctionSpace();
+  FunctionSpace fsr=arg_1.getFunctionSpace();
+  if (fsl!=fsr) {
+     signed char intres=fsl.getDomain()->preferredInterpolationOnDomain(fsr.getTypeCode(), fsl.getTypeCode());
+     if (intres==0)
+     {
+         std::string msg="Error - C_TensorBinaryOperation: arguments have incompatible function spaces.";
+         msg+=fsl.toString();
+         msg+=" ";
+         msg+=fsr.toString();
+         throw DataException(msg.c_str());
+     } 
+     else if (intres==1)
+     {
+      arg_1_Z=arg_1.interpolate(arg_0.getFunctionSpace());
+      arg_0_Z =Data(arg_0);      
+     }
+     else	// reverse interpolation preferred
+     {
+      arg_0_Z = arg_0.interpolate(arg_1.getFunctionSpace());
+      arg_1_Z = Data(arg_1);
+     }    
+  } else {
+      arg_0_Z = Data(arg_0);
+      arg_1_Z = Data(arg_1);
+  }
+  DataTypes::ShapeType shape0 = arg_0_Z.getDataPointShape();
+  DataTypes::ShapeType shape1 = arg_1_Z.getDataPointShape();
+  
+  DataTypes::ShapeType resultshape=((arg_0_Z.getDataPointRank()!=0)?shape0:shape1);
+
+  bool emptyResult=((arg_0_Z.getNumSamples()==0) || (arg_1_Z.getNumSamples()==0));
+  if ((shape0==shape1) || (arg_0_Z.getDataPointRank()==0) || (arg_1_Z.getDataPointRank()==0))
+  {
+    if (arg_0_Z.isConstant()   && arg_1_Z.isConstant())
+    {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),false);      // DataConstant output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {      
+          binaryOpDataCCC(*dynamic_cast<DataConstant*>(res.borrowData()), *dynamic_cast<const DataConstant*>(arg_0_Z.borrowData()), *dynamic_cast<const DataConstant*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isConstant()   && arg_1_Z.isTagged())
+    {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(), false);      // DataTagged output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      res.tag();
+      if (!emptyResult)
+      {
+          binaryOpDataTCT(*dynamic_cast<DataTagged*>(res.borrowData()), *dynamic_cast<const DataConstant*>(arg_0_Z.borrowData()), *dynamic_cast<const DataTagged*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isConstant()   && arg_1_Z.isExpanded())
+    {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {
+          binaryOpDataECE(*dynamic_cast<DataExpanded*>(res.borrowData()), *dynamic_cast<const DataConstant*>(arg_0_Z.borrowData()), *dynamic_cast<const DataExpanded*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isTagged()     && arg_1_Z.isConstant())
+    {
+      Data res(0.0, resultshape, arg_0_Z.getFunctionSpace(),false);      // DataTagged output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      res.tag();
+      if (!emptyResult)
+      {
+          binaryOpDataTTC(*dynamic_cast<DataTagged*>(res.borrowData()), *dynamic_cast<const DataTagged*>(arg_0_Z.borrowData()), *dynamic_cast<const DataConstant*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isTagged()     && arg_1_Z.isTagged())
+    {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(), false);
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      res.tag();        // DataTagged output
+      if (!emptyResult)
+      {
+          binaryOpDataTTT(*dynamic_cast<DataTagged*>(res.borrowData()), *dynamic_cast<const DataTagged*>(arg_0_Z.borrowData()), *dynamic_cast<const DataTagged*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isTagged()     && arg_1_Z.isExpanded())
+    {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {
+          binaryOpDataETE(*dynamic_cast<DataExpanded*>(res.borrowData()), *dynamic_cast<const DataTagged*>(arg_0_Z.borrowData()), *dynamic_cast<const DataExpanded*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isExpanded()   && arg_1_Z.isConstant()) {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {
+          binaryOpDataEEC(*dynamic_cast<DataExpanded*>(res.borrowData()), *dynamic_cast<const DataExpanded*>(arg_0_Z.borrowData()), *dynamic_cast<const DataConstant*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isExpanded()   && arg_1_Z.isTagged()) {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {
+          binaryOpDataEET(*dynamic_cast<DataExpanded*>(res.borrowData()), *dynamic_cast<const DataExpanded*>(arg_0_Z.borrowData()), *dynamic_cast<const DataTagged*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else if (arg_0_Z.isExpanded()   && arg_1_Z.isExpanded()) {
+      Data res(0.0, resultshape, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
+      if (arg_0_Z.isComplex() || arg_1_Z.isComplex())
+      {
+        res.complicate();
+      }
+      if (!emptyResult)
+      {
+          binaryOpDataEEE(*dynamic_cast<DataExpanded*>(res.borrowData()), *dynamic_cast<const DataExpanded*>(arg_0_Z.borrowData()), *dynamic_cast<const DataExpanded*>(arg_1_Z.borrowData()), operation);
+      }
+      return res;
+    }
+    else {
+      throw DataException("Error - C_TensorBinaryOperation: unknown combination of inputs");
+    }
+  } else {
+    throw DataException("Error - C_TensorBinaryOperation: arguments have incompatible shapes");
+  }
+}
+
+
+void
+Data::TensorSelfUpdateBinaryOperation(const Data& right,
+                   escript::ES_optype operation)
+{
+   //
+   // if this has a rank of zero promote it to the rank of the RHS
+   if (getDataPointRank()==0 && right.getDataPointRank()!=0) {
+     throw DataException("Error - attempt to update rank zero object with object with rank bigger than zero.");
+   }
+
+   if (isLazy() || right.isLazy())
+   {
+     throw DataException("Programmer error - attempt to call binaryOp with Lazy Data.");
+   }
+   //
+   // initially make the temporary a shallow copy
+   Data tempRight(right);
+   FunctionSpace fsl=getFunctionSpace();
+   FunctionSpace fsr=right.getFunctionSpace();
+   if (fsl!=fsr) {
+     signed char intres=fsl.getDomain()->preferredInterpolationOnDomain(fsr.getTypeCode(), fsl.getTypeCode());
+     if (intres==0)
+     {
+         std::string msg="Error - attempt to combine incompatible FunctionSpaces.";
+         msg+=fsl.toString();
+         msg+="  ";
+         msg+=fsr.toString();
+         throw DataException(msg.c_str());
+     } 
+     else if (intres==1)
+     {
+       // an interpolation is required so create a new Data
+       tempRight=Data(right,fsl);
+     }
+     else       // reverse interpolation preferred
+     {
+        // interpolate onto the RHS function space
+       Data tempLeft(*this,fsr);
+       set_m_data(tempLeft.m_data);
+     }
+   }
+   operandCheck(tempRight);
+   //
+   // ensure this has the right type for the RHS
+   typeMatchRight(tempRight);
+   //
+   // Need to cast to the concrete types so that the correct binaryOp
+   // is called.
+   if (isExpanded()) {
+     //
+     // Expanded data will be done in parallel, the right hand side can be
+     // of any data type
+     DataExpanded* leftC=dynamic_cast<DataExpanded*>(m_data.get());
+     ESYS_ASSERT(leftC!=NULL, "Programming error - casting to DataExpanded.");
+     
+     if (right.isExpanded())
+     {
+	binaryOpDataEEE(*leftC, *leftC, *dynamic_cast<const DataExpanded*>(tempRight.getReady()), operation);
+     }
+     else if (right.isTagged())
+     {
+	binaryOpDataEET(*leftC, *leftC, *dynamic_cast<const DataTagged*>(tempRight.getReady()), operation);
+     }
+     else	// it's constant
+     {
+	binaryOpDataEEC(*leftC, *leftC, *dynamic_cast<const DataConstant*>(tempRight.getReady()), operation);
+     }
+       
+     //escript::binaryOpDataReady(*leftC,*(tempRight.getReady()),operation);
+   } else if (isTagged()) {
+     //
+     // Tagged data is operated on serially, the right hand side can be
+     // either DataConstant or DataTagged
+     DataTagged* leftC=dynamic_cast<DataTagged*>(m_data.get());
+     ESYS_ASSERT(leftC!=NULL, "Programming error - casting to DataTagged.");
+     if (right.isTagged()) {
+       DataTagged* rightC=dynamic_cast<DataTagged*>(tempRight.m_data.get());
+       ESYS_ASSERT(rightC!=NULL, "Programming error - casting to DataTagged.");
+       binaryOpDataTTT(*leftC, *leftC, *rightC, operation);
+       //escript::binaryOpDataReady(*leftC,*rightC,operation);
+     } else {
+       DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
+       ESYS_ASSERT(rightC!=NULL, "Programming error - casting to DataConstant.");
+       binaryOpDataTTC(*leftC, *leftC, *rightC, operation);
+       //escript::binaryOpDataReady(*leftC,*rightC,operation);
+     }
+   } else if (isConstant()) {
+     DataConstant* leftC=dynamic_cast<DataConstant*>(m_data.get());
+     DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
+     ESYS_ASSERT(leftC!=NULL && rightC!=NULL, "Programming error - casting to DataConstant.");
+     binaryOpDataCCC(*leftC, *leftC, *rightC, operation);
+     //escript::binaryOpDataReady(*leftC,*rightC,operation);
+   }  
+}
+
+#if 0
+void
+Data::binaryDataOp(const Data& right,
+                   escript::ES_optype operation)
+{
+   //
+   // if this has a rank of zero promote it to the rank of the RHS
+   if (getDataPointRank()==0 && right.getDataPointRank()!=0) {
+     throw DataException("Error - attempt to update rank zero object with object with rank bigger than zero.");
+   }
+
+   if (isLazy() || right.isLazy())
+   {
+     throw DataException("Programmer error - attempt to call binaryOp with Lazy Data.");
+   }
+   //
+   // initially make the temporary a shallow copy
+   Data tempRight(right);
+   FunctionSpace fsl=getFunctionSpace();
+   FunctionSpace fsr=right.getFunctionSpace();
+   if (fsl!=fsr) {
+     signed char intres=fsl.getDomain()->preferredInterpolationOnDomain(fsr.getTypeCode(), fsl.getTypeCode());
+     if (intres==0)
+     {
+         std::string msg="Error - attempt to combine incompatible FunctionSpaces.";
+         msg+=fsl.toString();
+         msg+="  ";
+         msg+=fsr.toString();
+         throw DataException(msg.c_str());
+     } 
+     else if (intres==1)
+     {
+       // an interpolation is required so create a new Data
+       tempRight=Data(right,fsl);
+     }
+     else       // reverse interpolation preferred
+     {
+        // interpolate onto the RHS function space
+       Data tempLeft(*this,fsr);
+       set_m_data(tempLeft.m_data);
+     }
+   }
+   operandCheck(tempRight);
+   //
+   // ensure this has the right type for the RHS
+   typeMatchRight(tempRight);
+   //
+   // Need to cast to the concrete types so that the correct binaryOp
+   // is called.
+   if (isExpanded()) {
+     //
+     // Expanded data will be done in parallel, the right hand side can be
+     // of any data type
+     DataExpanded* leftC=dynamic_cast<DataExpanded*>(m_data.get());
+     ESYS_ASSERT(leftC!=NULL, "Programming error - casting to DataExpanded.");
+     escript::binaryOpDataReady(*leftC,*(tempRight.getReady()),operation);
+   } else if (isTagged()) {
+     //
+     // Tagged data is operated on serially, the right hand side can be
+     // either DataConstant or DataTagged
+     DataTagged* leftC=dynamic_cast<DataTagged*>(m_data.get());
+     ESYS_ASSERT(leftC!=NULL, "Programming error - casting to DataTagged.");
+     if (right.isTagged()) {
+       DataTagged* rightC=dynamic_cast<DataTagged*>(tempRight.m_data.get());
+       ESYS_ASSERT(rightC!=NULL, "Programming error - casting to DataTagged.");
+       escript::binaryOpDataReady(*leftC,*rightC,operation);
+     } else {
+       DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
+       ESYS_ASSERT(rightC!=NULL, "Programming error - casting to DataConstant.");
+       escript::binaryOpDataReady(*leftC,*rightC,operation);
+     }
+   } else if (isConstant()) {
+     DataConstant* leftC=dynamic_cast<DataConstant*>(m_data.get());
+     DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
+     ESYS_ASSERT(leftC!=NULL && rightC!=NULL,
+             "Programming error - casting to DataConstant.");
+     escript::binaryOpDataReady(*leftC,*rightC,operation);
+   }  
+}
+
+#endif
+
diff --git a/escriptcore/src/Data.h b/escriptcore/src/Data.h
index c572fbd..cc6aa37 100644
--- a/escriptcore/src/Data.h
+++ b/escriptcore/src/Data.h
@@ -14,35 +14,30 @@
 *
 *****************************************************************************/
 
-
 /** \file Data.h */
 
-#ifndef DATA_H
-#define DATA_H
-#include "system_dep.h"
+#ifndef __ESCRIPT_DATA_H__
+#define __ESCRIPT_DATA_H__
 
-#include "DataTypes.h"
+#include "system_dep.h"
 #include "DataAbstract.h"
-#include "DataAlgorithm.h"
-#include "FunctionSpace.h"
-#include "BinaryOp.h"
-#include "UnaryOp.h"
 #include "DataException.h"
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "esysUtils/Esys_MPI.h"
-#include <string>
+#include "DataTypes.h"
+#include "EsysMPI.h"
+#include "FunctionSpace.h"
+#include "DataVectorOps.h"
 #include <algorithm>
+#include <string>
 #include <sstream>
 
-#include <boost/shared_ptr.hpp>
 #include <boost/python/object.hpp>
 #include <boost/python/tuple.hpp>
 #include <boost/math/special_functions/bessel.hpp>
 
+#ifndef ESCRIPT_MAX_DATA_RANK
+#define ESCRIPT_MAX_DATA_RANK 4
+#endif
+
 namespace escript {
 
 //
@@ -69,12 +64,6 @@ class Data {
 
   public:
 
-  // These typedefs allow function names to be cast to pointers
-  // to functions of the appropriate type when calling unaryOp etc.
-  typedef double (*UnaryDFunPtr)(double);
-  typedef double (*BinaryDFunPtr)(double,double);
-
-
   /**
      Constructors.
   */
@@ -84,7 +73,6 @@ class Data {
      Default constructor.
      Creates a DataEmpty object.
   */
-  ESCRIPT_DLL_API
   Data();
 
   /**
@@ -92,7 +80,6 @@ class Data {
      Copy constructor.
      WARNING: Only performs a shallow copy.
   */
-  ESCRIPT_DLL_API
   Data(const Data& inData);
 
   /**
@@ -101,36 +88,49 @@ class Data {
      function space of inData the inData are tried to be interpolated to what,
      otherwise a shallow copy of inData is returned.
   */
-  ESCRIPT_DLL_API
   Data(const Data& inData,
        const FunctionSpace& what);
 
   /**
-	\brief Copy Data from an existing vector
+     \brief Copy Data from an existing vector
   */ 
+  Data(const DataTypes::RealVectorType& value,
+                 const DataTypes::ShapeType& shape,
+                 const FunctionSpace& what,
+                 bool expanded);
 
-  ESCRIPT_DLL_API
-  Data(const DataTypes::ValueType& value,
-		 const DataTypes::ShapeType& shape,
-                 const FunctionSpace& what=FunctionSpace(),
-                 bool expanded=false);
+  /**
+     \brief
+     Constructor which creates a Data with points having the specified shape.
+
+     \param value - Input - Single real value applied to all Data.
+     \param dataPointShape - Input - The shape of each data point.
+     \param what - Input - A description of what this data represents.
+     \param expanded - Input - Flag, if true fill the entire container with
+                       the given value. Otherwise a more efficient storage
+                       mechanism will be used.
+  */
+  Data(DataTypes::real_t value,
+       const DataTypes::ShapeType& dataPointShape,
+       const FunctionSpace& what,
+       bool expanded);
 
   /**
      \brief
      Constructor which creates a Data with points having the specified shape.
 
-     \param value - Input - Single value applied to all Data.
+     \param value - Input - Single complex value applied to all Data.
      \param dataPointShape - Input - The shape of each data point.
      \param what - Input - A description of what this data represents.
      \param expanded - Input - Flag, if true fill the entire container with
                        the given value. Otherwise a more efficient storage
                        mechanism will be used.
   */
-  ESCRIPT_DLL_API
-  Data(double value,
-       const DataTypes::ShapeType& dataPointShape=DataTypes::ShapeType(),
-       const FunctionSpace& what=FunctionSpace(),
-       bool expanded=false);
+  explicit
+  Data(DataTypes::cplx_t value,
+       const DataTypes::ShapeType& dataPointShape,
+       const FunctionSpace& what,
+       bool expanded);
 
   /**
      \brief
@@ -139,24 +139,9 @@ class Data {
      \param inData - Input - Input Data object.
      \param region - Input - Region to copy.
   */
-  ESCRIPT_DLL_API
   Data(const Data& inData,
        const DataTypes::RegionType& region);
 
-  /**
-     \brief
-     Constructor which copies data from any object that can be treated like a python array/sequence.
-
-     \param value - Input - Input data.
-     \param what - Input - A description of what this data represents.
-     \param expanded - Input - Flag, if true fill the entire container with
-                       the value. Otherwise a more efficient storage
-                       mechanism will be used.
-  */
-  ESCRIPT_DLL_API
-  Data(const boost::python::object& value,
-       const FunctionSpace& what=FunctionSpace(),
-       bool expanded=false);
 
   /**
      \brief
@@ -168,9 +153,8 @@ class Data {
                        the value. Otherwise a more efficient storage
                        mechanism will be used.
   */       
-  ESCRIPT_DLL_API     
   Data(const WrappedArray& w, const FunctionSpace& what,
-           bool expanded=false);       
+           bool expanded);       
        
 
   /**
@@ -182,66 +166,77 @@ class Data {
      \param value - Input - Input data.
      \param other - Input - contains all other parameters.
   */
-  ESCRIPT_DLL_API
   Data(const boost::python::object& value,
        const Data& other);
-
+  
   /**
-     \brief
-     Constructor which creates a DataConstant of "shape" with constant value.
-  */
-  ESCRIPT_DLL_API
-  Data(double value,
+     This constructor subsumes a number of previous python ones.
+     
+  Data(const boost::python::object& value,
+       const FunctionSpace& what=FunctionSpace(),
+       bool expanded=false);
+       
+  Data(DataTypes::real_t value,
        const boost::python::tuple& shape=boost::python::make_tuple(),
        const FunctionSpace& what=FunctionSpace(),
        bool expanded=false);
+       
+  and a new 
+  
+  Data(cplx_t value,
+       const boost::python::tuple& shape=boost::python::make_tuple(),
+       const FunctionSpace& what=FunctionSpace(),
+       bool expanded=false);  
+  
+  */
+  Data(boost::python::object value,
+       boost::python::object par1=boost::python::object(),
+       boost::python::object par2=boost::python::object(),
+       boost::python::object par3=boost::python::object());  
+  
+  
+  
+  
 
   /**
-	\brief Create a Data using an existing DataAbstract. Warning: The new object assumes ownership of the pointer!
-	Once you have passed the pointer, do not delete it.
+        \brief Create a Data using an existing DataAbstract. Warning: The new object assumes ownership of the pointer!
+        Once you have passed the pointer, do not delete it.
   */
-  ESCRIPT_DLL_API
   explicit Data(DataAbstract* underlyingdata);
 
   /**
-	\brief Create a Data based on the supplied DataAbstract
+        \brief Create a Data based on the supplied DataAbstract
   */
-  ESCRIPT_DLL_API
   explicit Data(DataAbstract_ptr underlyingdata);
 
   /**
      \brief
      Destructor
   */
-  ESCRIPT_DLL_API
   ~Data();
 
   /**
      \brief Make this object a deep copy of "other".
   */
-  ESCRIPT_DLL_API
   void
   copy(const Data& other);
 
   /**
      \brief Return a pointer to a deep copy of this object.
   */
-  ESCRIPT_DLL_API
   Data
-  copySelf();
+  copySelf() const;
 
 
   /**
      \brief produce a delayed evaluation version of this Data.
   */
-  ESCRIPT_DLL_API
   Data
   delay();
 
   /**
      \brief convert the current data into lazy data.
   */
-  ESCRIPT_DLL_API
   void 
   delaySelf();
 
@@ -255,7 +250,6 @@ class Data {
      switches on update protection
 
   */
-  ESCRIPT_DLL_API
   void
   setProtection();
 
@@ -264,7 +258,6 @@ class Data {
      Returns true, if the data object is protected against update
 
   */
-  ESCRIPT_DLL_API
   bool
   isProtected() const;
 
@@ -273,7 +266,6 @@ class Data {
    \brief 
    Return the value of a data point as a python tuple.
 */
-  ESCRIPT_DLL_API
   const boost::python::object
   getValueOfDataPointAsTuple(int dataPointNo);
 
@@ -281,7 +273,6 @@ class Data {
      \brief
      sets the values of a data-point from a python object on this process
   */
-  ESCRIPT_DLL_API
   void
   setValueOfDataPointToPyObject(int dataPointNo, const boost::python::object& py_object);
 
@@ -289,7 +280,6 @@ class Data {
      \brief
      sets the values of a data-point from a array-like object on this process
   */
-  ESCRIPT_DLL_API
   void
   setValueOfDataPointToArray(int dataPointNo, const boost::python::object&);
 
@@ -297,14 +287,16 @@ class Data {
      \brief
      sets the values of a data-point on this process
   */
-  ESCRIPT_DLL_API
   void
-  setValueOfDataPoint(int dataPointNo, const double);
+  setValueOfDataPoint(int dataPointNo, const DataTypes::real_t);
+  
+  void
+  setValueOfDataPointC(int dataPointNo, const DataTypes::cplx_t);  
+  
 
   /**
      \brief Return a data point across all processors as a python tuple.
   */
-  ESCRIPT_DLL_API
   const boost::python::object
   getValueOfGlobalDataPointAsTuple(int procNo, int dataPointNo);
 
@@ -312,7 +304,6 @@ class Data {
   /**
      \brief Set the value of a global data point
   */
-  ESCRIPT_DLL_API
   void
   setTupleForGlobalDataPoint(int id, int proc, boost::python::object);
   
@@ -321,7 +312,6 @@ class Data {
      Return the tag number associated with the given data-point.
 
   */
-  ESCRIPT_DLL_API
   int
   getTagNumber(int dpno);
 
@@ -330,7 +320,6 @@ class Data {
      \brief
      Write the data as a string. For large amounts of data, a summary is printed.
   */
-  ESCRIPT_DLL_API
   std::string
   toString() const;
 
@@ -338,7 +327,6 @@ class Data {
      \brief
      Whatever the current Data type make this into a DataExpanded.
   */
-  ESCRIPT_DLL_API
   void
   expand();
 
@@ -348,7 +336,6 @@ class Data {
      Constant data to be converted to tagged. An attempt to convert
      Expanded data to tagged will throw an exception.
   */
-  ESCRIPT_DLL_API
   void
   tag();
 
@@ -356,7 +343,6 @@ class Data {
     \brief If this data is lazy, then convert it to ready data.
     What type of ready data depends on the expression. For example, Constant+Tagged==Tagged.
   */
-  ESCRIPT_DLL_API
   void
   resolve();
 
@@ -365,16 +351,29 @@ class Data {
   \warning This is dependent on the ability to reliably detect NaNs on your compiler.
    See the nancheck function in LocalOps for details.
   */
-  ESCRIPT_DLL_API
   bool
   hasNaN();
 
   /**
   \brief replaces all NaN values with value 
   */
-  ESCRIPT_DLL_API
   void
-  replaceNaN(double value);
+  replaceNaN(DataTypes::real_t value);
+  
+  /**
+  \brief replaces all NaN values with value 
+  */
+  void
+  replaceNaN(DataTypes::cplx_t value);  
+  
+  /**
+  \brief replaces all NaN values with value 
+  */
+  void
+  replaceNaNPython(boost::python::object obj);  
+
+
+  
 
   /**
    \brief Ensures data is ready for write access.
@@ -383,7 +382,6 @@ class Data {
   Do not create any Data objects from this one between calling requireWrite and getSampleDataRW.
   Doing so might introduce additional sharing.
   */
-  ESCRIPT_DLL_API
   void
   requireWrite();
 
@@ -392,7 +390,6 @@ class Data {
      Return true if this Data is expanded.
      \note To determine if a sample will contain separate values for each datapoint. Use actsExpanded instead.
   */
-  ESCRIPT_DLL_API
   bool
   isExpanded() const;
 
@@ -401,7 +398,6 @@ class Data {
      Return true if this Data is expanded or resolves to expanded.
      That is, if it has a separate value for each datapoint in the sample.
   */
-  ESCRIPT_DLL_API
   bool
   actsExpanded() const;
   
@@ -410,7 +406,6 @@ class Data {
      \brief
      Return true if this Data is tagged.
   */
-  ESCRIPT_DLL_API
   bool
   isTagged() const;
 
@@ -418,21 +413,18 @@ class Data {
      \brief
      Return true if this Data is constant.
   */
-  ESCRIPT_DLL_API
   bool
   isConstant() const;
 
   /**
      \brief Return true if this Data is lazy.
   */
-  ESCRIPT_DLL_API
   bool
   isLazy() const;
 
   /**
      \brief Return true if this data is ready.
   */
-  ESCRIPT_DLL_API
   bool
   isReady() const;
 
@@ -441,15 +433,20 @@ class Data {
      Return true if this Data holds an instance of DataEmpty. This is _not_ the same as asking if the object 
 contains datapoints.
   */
-  ESCRIPT_DLL_API
   bool
   isEmpty() const;
 
   /**
+    \brief
+    True if components of this data are stored as complex
+  */
+  bool
+  isComplex() const;
+
+  /**
      \brief
      Return the function space.
   */
-  ESCRIPT_DLL_API
   inline
   const FunctionSpace&
   getFunctionSpace() const
@@ -461,7 +458,6 @@ contains datapoints.
      \brief
      Return the domain.
   */
-  ESCRIPT_DLL_API
   inline
 //   const AbstractDomain&
   const_Domain_ptr
@@ -476,7 +472,6 @@ contains datapoints.
      Return the domain.
      TODO: For internal use only.   This should be removed.
   */
-  ESCRIPT_DLL_API
   inline
 //   const AbstractDomain&
   Domain_ptr
@@ -489,7 +484,6 @@ contains datapoints.
      \brief
      Return the rank of the point data.
   */
-  ESCRIPT_DLL_API
   inline
   unsigned int
   getDataPointRank() const
@@ -501,7 +495,6 @@ contains datapoints.
      \brief
      Return the number of data points
   */
-  ESCRIPT_DLL_API
   inline
   int
   getNumDataPoints() const
@@ -512,7 +505,6 @@ contains datapoints.
      \brief
      Return the number of samples.
   */
-  ESCRIPT_DLL_API
   inline
   int
   getNumSamples() const
@@ -524,7 +516,6 @@ contains datapoints.
      \brief
      Return the number of data points per sample.
   */
-  ESCRIPT_DLL_API
   inline
   int
   getNumDataPointsPerSample() const
@@ -537,7 +528,6 @@ contains datapoints.
      Returns true if the number of data points per sample and the number of
      samples match the respective argument. DataEmpty always returns true.
   */
-  ESCRIPT_DLL_API
   inline
   bool numSamplesEqual(int numDataPointsPerSample, int numSamples) const
   {
@@ -560,10 +550,9 @@ contains datapoints.
   }
 
   /**
-	\brief
-	Return the number of values in the shape for this object.
+        \brief
+        Return the number of values in the shape for this object.
   */
-  ESCRIPT_DLL_API
   int
   getNoValues() const
   {
@@ -575,7 +564,6 @@ contains datapoints.
      \brief
      dumps the object into a netCDF file
   */
-  ESCRIPT_DLL_API
   void
   dump(const std::string fileName) const;
 
@@ -585,7 +573,6 @@ contains datapoints.
   \param scalarastuple If true, scalar data will produce single valued tuples [(1,) (2,) ...]
 If false, the result is a list of scalars [1, 2, ...]
  */
-  ESCRIPT_DLL_API
   const boost::python::object
   toListOfTuples(bool scalarastuple=true);
 
@@ -597,11 +584,12 @@ If false, the result is a list of scalars [1, 2, ...]
     \param sampleNo - Input - the given sample no.
     \return pointer to the sample data.
 */
-  ESCRIPT_DLL_API
-  inline
-  const DataAbstract::ValueType::value_type*
-  getSampleDataRO(DataAbstract::ValueType::size_type sampleNo) const;
+  const DataTypes::real_t*
+  getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy=0) const;
 
+  const DataTypes::cplx_t*
+  getSampleDataRO(DataTypes::CplxVectorType::size_type sampleNo, DataTypes::cplx_t dummy) const;
+  
 
   /**
      \brief
@@ -610,11 +598,13 @@ If false, the result is a list of scalars [1, 2, ...]
      \param sampleNo - Input - the given sample no.
      \return pointer to the sample data.
   */
-  ESCRIPT_DLL_API
-  inline
-  DataAbstract::ValueType::value_type*
-  getSampleDataRW(DataAbstract::ValueType::size_type sampleNo);
+  DataTypes::real_t*
+  getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy=0);
 
+  DataTypes::cplx_t*
+  getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy);  
+  
+  
 
  /**
     \brief
@@ -622,10 +612,13 @@ If false, the result is a list of scalars [1, 2, ...]
     \warning please avoid using this method since it by-passes possible lazy improvements. May be removed without notice.
     \return pointer to the data.
 */
-  ESCRIPT_DLL_API
-  inline
-  const DataAbstract::ValueType::value_type*
-  getDataRO() const;  
+  const DataTypes::real_t*
+  getDataRO(DataTypes::real_t dummy=0) const;  
+  
+  const DataTypes::cplx_t*
+  getDataRO(DataTypes::cplx_t dummy) const;    
+  
+  
   
   /**
      \brief
@@ -633,13 +626,20 @@ If false, the result is a list of scalars [1, 2, ...]
      access data that isn't tagged an exception will be thrown.
      \param tag - Input - the tag key.
   */
-  ESCRIPT_DLL_API
   inline
-  DataAbstract::ValueType::value_type*
-  getSampleDataByTag(int tag)
+  DataTypes::real_t*
+  getSampleDataByTag(int tag, DataTypes::real_t dummy=0)
   {
-    return m_data->getSampleDataByTag(tag);
+    return m_data->getSampleDataByTag(tag, dummy);
   }
+  
+  inline
+  DataTypes::cplx_t*
+  getSampleDataByTag(int tag, DataTypes::cplx_t dummy)
+  {
+    return m_data->getSampleDataByTag(tag, dummy);
+  }  
+  
 
   /**
      \brief
@@ -647,8 +647,7 @@ If false, the result is a list of scalars [1, 2, ...]
      \param sampleNo - Input -
      \param dataPointNo - Input -
   */
-  ESCRIPT_DLL_API
-  DataTypes::ValueType::const_reference
+  DataTypes::RealVectorType::const_reference
   getDataPointRO(int sampleNo, int dataPointNo);
 
   /**
@@ -657,8 +656,7 @@ If false, the result is a list of scalars [1, 2, ...]
      \param sampleNo - Input -
      \param dataPointNo - Input -
   */
-  ESCRIPT_DLL_API
-  DataTypes::ValueType::reference
+  DataTypes::RealVectorType::reference
   getDataPointRW(int sampleNo, int dataPointNo);
 
 
@@ -667,9 +665,8 @@ If false, the result is a list of scalars [1, 2, ...]
      \brief 
      Return the offset for the given sample and point within the sample
   */
-  ESCRIPT_DLL_API
   inline
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getDataOffset(int sampleNo,
                int dataPointNo)
   {
@@ -680,19 +677,17 @@ If false, the result is a list of scalars [1, 2, ...]
      \brief
      Return a reference to the data point shape.
   */
-  ESCRIPT_DLL_API
   inline
   const DataTypes::ShapeType&
   getDataPointShape() const
   {
-	return m_data->getShape();
+        return m_data->getShape();
   }
 
   /**
      \brief
      Return the data point shape as a tuple of integers.
   */
-  ESCRIPT_DLL_API
   const boost::python::tuple
   getShapeTuple() const;
 
@@ -701,7 +696,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return the size of the data point. It is the product of the
      data point shape dimensions.
   */
-  ESCRIPT_DLL_API
   int
   getDataPointSize() const;
 
@@ -709,19 +703,17 @@ If false, the result is a list of scalars [1, 2, ...]
      \brief
      Return the number of doubles stored for this Data.
   */
-  ESCRIPT_DLL_API
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
   /**
   \brief Return true if this object contains no samples.
   This is not the same as isEmpty() 
   */
-  ESCRIPT_DLL_API
   bool
   hasNoSamples() const
   {
-	return getLength()==0;
+        return m_data->getNumSamples()==0;
   }
 
   /**
@@ -732,7 +724,6 @@ If false, the result is a list of scalars [1, 2, ...]
      \param name - Input - name of tag.
      \param value - Input - Value to associate with given key.
   */
-  ESCRIPT_DLL_API
   void
   setTaggedValueByName(std::string name,
                        const boost::python::object& value);
@@ -746,7 +737,6 @@ If false, the result is a list of scalars [1, 2, ...]
      \param value - Input - Value to associate with given key.
     ==>*
   */
-  ESCRIPT_DLL_API
   void
   setTaggedValue(int tagKey,
                  const boost::python::object& value);
@@ -761,20 +751,23 @@ If false, the result is a list of scalars [1, 2, ...]
      \param value - Input - Value to associate with given key.
      \param dataOffset - Input - Offset of the begining of the point within the value parameter
   */
-  ESCRIPT_DLL_API
   void
   setTaggedValueFromCPP(int tagKey,
-			const DataTypes::ShapeType& pointshape,
-                        const DataTypes::ValueType& value,
-			int dataOffset=0);
+                        const DataTypes::ShapeType& pointshape,
+                        const DataTypes::RealVectorType& value,
+                        int dataOffset=0);
 
 
+  void
+  setTaggedValueFromCPP(int tagKey,
+                        const DataTypes::ShapeType& pointshape,
+                        const DataTypes::CplxVectorType& value,
+                        int dataOffset=0);  
 
   /**
     \brief
     Copy other Data object into this Data object where mask is positive.
   */
-  ESCRIPT_DLL_API
   void
   copyWithMask(const Data& other,
                const Data& mask);
@@ -788,7 +781,6 @@ If false, the result is a list of scalars [1, 2, ...]
      set all values to zero
      *
   */
-  ESCRIPT_DLL_API
   void
   setToZero();
 
@@ -798,48 +790,39 @@ If false, the result is a list of scalars [1, 2, ...]
      the result as a Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   interpolate(const FunctionSpace& functionspace) const;
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable3D(const WrappedArray& table, double Amin, double Astep,
-                       double undef, Data& B, double Bmin, double Bstep, Data& C, 
-			double Cmin, double Cstep, bool check_boundaries);
+  interpolateFromTable3D(const WrappedArray& table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                       DataTypes::real_t undef, Data& B, DataTypes::real_t Bmin, DataTypes::real_t Bstep, Data& C, 
+                        DataTypes::real_t Cmin, DataTypes::real_t Cstep, bool check_boundaries);
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable2D(const WrappedArray& table, double Amin, double Astep,
-                       double undef, Data& B, double Bmin, double Bstep,bool check_boundaries);
+  interpolateFromTable2D(const WrappedArray& table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                       DataTypes::real_t undef, Data& B, DataTypes::real_t Bmin, DataTypes::real_t Bstep,bool check_boundaries);
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable1D(const WrappedArray& table, double Amin, double Astep,
-                       double undef,bool check_boundaries);
+  interpolateFromTable1D(const WrappedArray& table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                       DataTypes::real_t undef,bool check_boundaries);
 
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable3DP(boost::python::object table, double Amin, double Astep,
-                        Data& B, double Bmin, double Bstep, Data& C, double Cmin, double Cstep, double undef,bool check_boundaries);
+  interpolateFromTable3DP(boost::python::object table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                        Data& B, DataTypes::real_t Bmin, DataTypes::real_t Bstep, Data& C, DataTypes::real_t Cmin, DataTypes::real_t Cstep, DataTypes::real_t undef,bool check_boundaries);
 
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable2DP(boost::python::object table, double Amin, double Astep,
-                        Data& B, double Bmin, double Bstep, double undef,bool check_boundaries);
+  interpolateFromTable2DP(boost::python::object table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                        Data& B, DataTypes::real_t Bmin, DataTypes::real_t Bstep, DataTypes::real_t undef,bool check_boundaries);
 
-  ESCRIPT_DLL_API
   Data
-  interpolateFromTable1DP(boost::python::object table, double Amin, double Astep,
-                        double undef,bool check_boundaries);
+  interpolateFromTable1DP(boost::python::object table, DataTypes::real_t Amin, DataTypes::real_t Astep,
+                        DataTypes::real_t undef,bool check_boundaries);
   
-  ESCRIPT_DLL_API
   Data
   nonuniforminterp(boost::python::object in, boost::python::object out, bool check_boundaries);
 
-  ESCRIPT_DLL_API
   Data
   nonuniformslope(boost::python::object in, boost::python::object out, bool check_boundaries);  
   
@@ -849,11 +832,9 @@ If false, the result is a list of scalars [1, 2, ...]
      If functionspace is not present the function space of Function(getDomain()) is used.
      *
   */
-  ESCRIPT_DLL_API
   Data
   gradOn(const FunctionSpace& functionspace) const;
 
-  ESCRIPT_DLL_API
   Data
   grad() const;
 
@@ -861,7 +842,6 @@ If false, the result is a list of scalars [1, 2, ...]
     \brief
      Calculate the integral over the function space domain as a python tuple.
   */
-  ESCRIPT_DLL_API
   boost::python::object
   integrateToTuple_const() const;
 
@@ -870,7 +850,6 @@ If false, the result is a list of scalars [1, 2, ...]
     \brief
      Calculate the integral over the function space domain as a python tuple.
   */
-  ESCRIPT_DLL_API
   boost::python::object
   integrateToTuple();
 
@@ -881,7 +860,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Returns 1./ Data object
      *
   */
-  ESCRIPT_DLL_API
   Data
   oneOver() const;
   /**
@@ -889,7 +867,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return a Data with a 1 for +ive values and a 0 for 0 or -ive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
   wherePositive() const;
 
@@ -898,7 +875,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return a Data with a 1 for -ive values and a 0 for +ive or 0 values.
      *
   */
-  ESCRIPT_DLL_API
   Data
   whereNegative() const;
 
@@ -907,7 +883,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return a Data with a 1 for +ive or 0 values and a 0 for -ive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
   whereNonNegative() const;
 
@@ -916,7 +891,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return a Data with a 1 for -ive or 0 values and a 0 for +ive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
   whereNonPositive() const;
 
@@ -925,18 +899,16 @@ If false, the result is a list of scalars [1, 2, ...]
      Return a Data with a 1 for 0 values and a 0 for +ive or -ive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
-  whereZero(double tol=0.0) const;
+  whereZero(DataTypes::real_t tol=0.0) const;
 
   /**
      \brief
      Return a Data with a 0 for 0 values and a 1 for +ive or -ive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
-  whereNonZero(double tol=0.0) const;
+  whereNonZero(DataTypes::real_t tol=0.0) const;
 
   /**
      \brief
@@ -949,12 +921,10 @@ If false, the result is a list of scalars [1, 2, ...]
      For Data which contain no samples (or tagged Data for which no tags in use have a value)
      zero is returned.
   */
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   Lsup();
 
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   Lsup_const() const;
 
 
@@ -969,12 +939,10 @@ If false, the result is a list of scalars [1, 2, ...]
      For Data which contain no samples (or tagged Data for which no tags in use have a value)
      a large negative value is returned.
   */
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   sup();
 
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   sup_const() const;
 
 
@@ -989,12 +957,10 @@ If false, the result is a list of scalars [1, 2, ...]
      For Data which contain no samples (or tagged Data for which no tags in use have a value)
      a large positive value is returned.
   */
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   inf();
 
-  ESCRIPT_DLL_API
-  double
+  DataTypes::real_t
   inf_const() const;
 
 
@@ -1004,7 +970,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return the absolute value of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   abs() const;
 
@@ -1013,7 +978,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return the maximum value of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   maxval() const;
 
@@ -1022,7 +986,6 @@ If false, the result is a list of scalars [1, 2, ...]
      Return the minimum value of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   minval() const;
 
@@ -1033,7 +996,6 @@ If false, the result is a list of scalars [1, 2, ...]
      \note If you are working in python, please consider using Locator 
 instead of manually manipulating process and point IDs.
   */
-  ESCRIPT_DLL_API
   const boost::python::tuple
   minGlobalDataPoint() const;
 
@@ -1044,7 +1006,6 @@ instead of manually manipulating process and point IDs.
      \note If you are working in python, please consider using Locator 
 instead of manually manipulating process and point IDs.
   */
-  ESCRIPT_DLL_API
   const boost::python::tuple
   maxGlobalDataPoint() const;
 
@@ -1056,7 +1017,6 @@ instead of manually manipulating process and point IDs.
      -1 for negative values, zero for zero values, 1 for positive values.
      *
   */
-  ESCRIPT_DLL_API
   Data
   sign() const;
 
@@ -1065,25 +1025,39 @@ instead of manually manipulating process and point IDs.
      Return the symmetric part of a matrix which is half the matrix plus its transpose.
      *
   */
-  ESCRIPT_DLL_API
   Data
   symmetric() const;
 
   /**
      \brief
-     Return the nonsymmetric part of a matrix which is half the matrix minus its transpose.
+     Return the antisymmetric part of a matrix which is half the matrix minus its transpose.
      *
   */
-  ESCRIPT_DLL_API
   Data
-  nonsymmetric() const;
+  antisymmetric() const;
+
+
+  /**
+     \brief
+     Return the hermitian part of a matrix which is half the matrix plus its adjoint.
+     *
+  */
+  Data
+  hermitian() const;
+
+  /**
+     \brief
+     Return the anti-hermitian part of a matrix which is half the matrix minus its hermitian.
+     *
+  */
+  Data
+  antihermitian() const;
 
   /**
      \brief
      Return the trace of a matrix
      *
   */
-  ESCRIPT_DLL_API
   Data
   trace(int axis_offset) const;
 
@@ -1092,7 +1066,6 @@ instead of manually manipulating process and point IDs.
      Transpose each data point of this Data object around the given axis.
      *
   */
-  ESCRIPT_DLL_API
   Data
   transpose(int axis_offset) const;
 
@@ -1102,7 +1075,6 @@ instead of manually manipulating process and point IDs.
      Currently this function is restricted to rank 2, square shape, and dimension 3.
      *
   */
-  ESCRIPT_DLL_API
   Data
   eigenvalues() const;
 
@@ -1115,16 +1087,14 @@ instead of manually manipulating process and point IDs.
      Currently this function is restricted to rank 2, square shape, and dimension 3
      *
   */
-  ESCRIPT_DLL_API
   const boost::python::tuple
-  eigenvalues_and_eigenvectors(const double tol=1.e-12) const;
+  eigenvalues_and_eigenvectors(const DataTypes::real_t tol=1.e-12) const;
 
   /**
      \brief
      swaps the components axis0 and axis1
      *
   */
-  ESCRIPT_DLL_API
   Data
   swapaxes(const int axis0, const int axis1) const;
 
@@ -1133,16 +1103,29 @@ instead of manually manipulating process and point IDs.
      Return the error function erf of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   erf() const;
 
+
+  /**
+     \brief
+     For complex values return the conjugate values.
+     For non-complex data return a copy
+  */
+  Data
+  conjugate() const;
+  
+  Data
+  real() const;  
+  
+  Data
+  imag() const;  
+
   /**
      \brief
      Return the sin of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   sin() const;
 
@@ -1151,7 +1134,6 @@ instead of manually manipulating process and point IDs.
      Return the cos of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   cos() const;
 
@@ -1160,9 +1142,8 @@ instead of manually manipulating process and point IDs.
      Bessel worker function.
      *
   */
-  ESCRIPT_DLL_API
   Data
-  bessel(int order, double (*besselfunc) (int,double) );
+  bessel(int order, DataTypes::real_t (*besselfunc) (int,DataTypes::real_t) );
   
 
   /**
@@ -1170,7 +1151,6 @@ instead of manually manipulating process and point IDs.
      Return the Bessel function of the first kind for each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   besselFirstKind(int order);
 
@@ -1179,7 +1159,6 @@ instead of manually manipulating process and point IDs.
      Return the Bessel function of the second kind for each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   besselSecondKind(int order);
 
@@ -1189,7 +1168,6 @@ instead of manually manipulating process and point IDs.
      Return the tan of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   tan() const;
 
@@ -1198,7 +1176,6 @@ instead of manually manipulating process and point IDs.
      Return the asin of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   asin() const;
 
@@ -1207,7 +1184,6 @@ instead of manually manipulating process and point IDs.
      Return the acos of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   acos() const;
 
@@ -1216,7 +1192,6 @@ instead of manually manipulating process and point IDs.
      Return the atan of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   atan() const;
 
@@ -1225,7 +1200,6 @@ instead of manually manipulating process and point IDs.
      Return the sinh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   sinh() const;
 
@@ -1234,7 +1208,6 @@ instead of manually manipulating process and point IDs.
      Return the cosh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   cosh() const;
 
@@ -1243,7 +1216,6 @@ instead of manually manipulating process and point IDs.
      Return the tanh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   tanh() const;
 
@@ -1252,7 +1224,6 @@ instead of manually manipulating process and point IDs.
      Return the asinh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   asinh() const;
 
@@ -1261,7 +1232,6 @@ instead of manually manipulating process and point IDs.
      Return the acosh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   acosh() const;
 
@@ -1270,7 +1240,6 @@ instead of manually manipulating process and point IDs.
      Return the atanh of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   atanh() const;
 
@@ -1279,7 +1248,6 @@ instead of manually manipulating process and point IDs.
      Return the log to base 10 of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   log10() const;
 
@@ -1288,7 +1256,6 @@ instead of manually manipulating process and point IDs.
      Return the natural log of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   log() const;
 
@@ -1297,7 +1264,6 @@ instead of manually manipulating process and point IDs.
      Return the exponential function of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   exp() const;
 
@@ -1306,7 +1272,6 @@ instead of manually manipulating process and point IDs.
      Return the square root of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   sqrt() const;
 
@@ -1315,7 +1280,6 @@ instead of manually manipulating process and point IDs.
      Return the negation of each data point of this Data object.
      *
   */
-  ESCRIPT_DLL_API
   Data
   neg() const;
 
@@ -1325,7 +1289,6 @@ instead of manually manipulating process and point IDs.
      Simply returns this object unmodified.
      *
   */
-  ESCRIPT_DLL_API
   Data
   pos() const;
 
@@ -1336,7 +1299,6 @@ instead of manually manipulating process and point IDs.
      \param right Input - the power to raise the object to.
      *
   */
-  ESCRIPT_DLL_API
   Data
   powD(const Data& right) const;
 
@@ -1347,7 +1309,6 @@ instead of manually manipulating process and point IDs.
      \param right Input - the power to raise the object to.
      *
    */
-  ESCRIPT_DLL_API
   Data
   powO(const boost::python::object& right) const;
 
@@ -1359,7 +1320,6 @@ instead of manually manipulating process and point IDs.
      *
    */
 
-  ESCRIPT_DLL_API
   Data
   rpowO(const boost::python::object& left) const;
 
@@ -1369,12 +1329,9 @@ instead of manually manipulating process and point IDs.
      \param right - Input - The right hand side.
      *
   */
-  ESCRIPT_DLL_API
   Data& operator+=(const Data& right);
-  ESCRIPT_DLL_API
   Data& operator+=(const boost::python::object& right);
 
-  ESCRIPT_DLL_API
   Data& operator=(const Data& other);
 
   /**
@@ -1383,9 +1340,7 @@ instead of manually manipulating process and point IDs.
      \param right - Input - The right hand side.
      *
   */
-  ESCRIPT_DLL_API
   Data& operator-=(const Data& right);
-  ESCRIPT_DLL_API
   Data& operator-=(const boost::python::object& right);
 
  /**
@@ -1394,9 +1349,7 @@ instead of manually manipulating process and point IDs.
      \param right - Input - The right hand side.
      *
   */
-  ESCRIPT_DLL_API
   Data& operator*=(const Data& right);
-  ESCRIPT_DLL_API
   Data& operator*=(const boost::python::object& right);
 
  /**
@@ -1405,37 +1358,31 @@ instead of manually manipulating process and point IDs.
      \param right - Input - The right hand side.
      *
   */
-  ESCRIPT_DLL_API
   Data& operator/=(const Data& right);
-  ESCRIPT_DLL_API
   Data& operator/=(const boost::python::object& right);
 
   /**
     \brief
     Newer style division operator for python
   */
-  ESCRIPT_DLL_API
   Data truedivD(const Data& right);
 
   /**
     \brief
     Newer style division operator for python
   */
-  ESCRIPT_DLL_API
   Data truedivO(const boost::python::object& right);
 
   /**
     \brief
     Newer style division operator for python
   */
-  ESCRIPT_DLL_API
   Data rtruedivO(const boost::python::object& left);
 
   /**
     \brief
     wrapper for python add operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __add__(const boost::python::object& right);
   
 
@@ -1443,41 +1390,35 @@ instead of manually manipulating process and point IDs.
     \brief
     wrapper for python subtract operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __sub__(const boost::python::object& right);
   
   /**
     \brief
     wrapper for python reverse subtract operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __rsub__(const boost::python::object& right);  
 
   /**
     \brief
     wrapper for python multiply operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __mul__(const boost::python::object& right);
     
   /**
     \brief
     wrapper for python divide operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __div__(const boost::python::object& right);
   
   /**
     \brief
     wrapper for python reverse divide operation
   */
-  ESCRIPT_DLL_API
   boost::python::object __rdiv__(const boost::python::object& right);    
   
   /**
-	\brief return inverse of matricies.
+        \brief return inverse of matricies.
   */
-  ESCRIPT_DLL_API
   Data
   matrixInverse() const;
 
@@ -1485,7 +1426,6 @@ instead of manually manipulating process and point IDs.
      \brief
      Returns true if this can be interpolated to functionspace.
   */
-  ESCRIPT_DLL_API
   bool
   probeInterpolation(const FunctionSpace& functionspace) const;
 
@@ -1504,7 +1444,6 @@ instead of manually manipulating process and point IDs.
      \param key - Input - python slice tuple specifying
      slice to return.
   */
-  ESCRIPT_DLL_API
   Data
   getItem(const boost::python::object& key) const;
 
@@ -1519,12 +1458,10 @@ instead of manually manipulating process and point IDs.
      slice to copy from value.
      \param value - Input - Data object to copy from.
   */
-  ESCRIPT_DLL_API
   void
   setItemD(const boost::python::object& key,
            const Data& value);
 
-  ESCRIPT_DLL_API
   void
   setItemO(const boost::python::object& key,
            const boost::python::object& value);
@@ -1537,7 +1474,6 @@ instead of manually manipulating process and point IDs.
      this Data object.
   */
   template <class UnaryFunction>
-  ESCRIPT_DLL_API
   inline
   void
   unaryOp2(UnaryFunction operation);
@@ -1549,7 +1485,6 @@ instead of manually manipulating process and point IDs.
      \param region - Input - Region to copy.
      *
   */
-  ESCRIPT_DLL_API
   Data
   getSlice(const DataTypes::RegionType& region) const;
 
@@ -1561,7 +1496,6 @@ instead of manually manipulating process and point IDs.
      \param region - Input - Region to copy.
      *
   */
-  ESCRIPT_DLL_API
   void
   setSlice(const Data& value,
            const DataTypes::RegionType& region);
@@ -1570,9 +1504,8 @@ instead of manually manipulating process and point IDs.
      \brief
      print the data values to stdout. Used for debugging
   */
-  ESCRIPT_DLL_API
   void
-        print(void);
+  print(void);
 
   /**
      \brief
@@ -1580,7 +1513,6 @@ instead of manually manipulating process and point IDs.
                  MPI_COMM_WORLD is assumed and the result of MPI_Comm_size()
                  is returned
   */
-  ESCRIPT_DLL_API
         int
         get_MPIRank(void) const;
 
@@ -1590,7 +1522,6 @@ instead of manually manipulating process and point IDs.
                  MPI_COMM_WORLD is assumed and the result of MPI_Comm_rank()
                  is returned
   */
-  ESCRIPT_DLL_API
         int
         get_MPISize(void) const;
 
@@ -1599,24 +1530,20 @@ instead of manually manipulating process and point IDs.
      return the MPI rank number of the local data
                  MPI_COMM_WORLD is assumed and returned.
   */
-  ESCRIPT_DLL_API
         MPI_Comm
         get_MPIComm(void) const;
 
   /**
      \brief
      return the object produced by the factory, which is a DataConstant or DataExpanded
-	TODO Ownership of this object should be explained in doco.
+        TODO Ownership of this object should be explained in doco.
   */
-  ESCRIPT_DLL_API
         DataAbstract*
         borrowData(void) const;
 
-  ESCRIPT_DLL_API
         DataAbstract_ptr
         borrowDataPtr(void) const;
 
-  ESCRIPT_DLL_API
         DataReady_ptr
         borrowReadyPtr(void) const;
 
@@ -1629,14 +1556,18 @@ instead of manually manipulating process and point IDs.
      \param i - position(offset) in the underlying datastructure
   */
 
-  ESCRIPT_DLL_API
-        DataTypes::ValueType::const_reference
-        getDataAtOffsetRO(DataTypes::ValueType::size_type i);
+        DataTypes::RealVectorType::const_reference
+        getDataAtOffsetRO(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0);
 
+        DataTypes::RealVectorType::reference
+        getDataAtOffsetRW(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0);
+        
+        DataTypes::CplxVectorType::const_reference
+        getDataAtOffsetRO(DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy);
 
-  ESCRIPT_DLL_API
-        DataTypes::ValueType::reference
-        getDataAtOffsetRW(DataTypes::ValueType::size_type i);
+        DataTypes::CplxVectorType::reference
+        getDataAtOffsetRW(DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy);     
+        
 
   /**
     \brief Ensures that the Data is expanded and returns its underlying vector
@@ -1647,38 +1578,48 @@ instead of manually manipulating process and point IDs.
     to allow quick initialisation of Data by domain; not as a bypass around 
     escript's other mechanisms.
   */
-  ESCRIPT_DLL_API
-  DataTypes::ValueType&
-  getExpandedVectorReference();
+  DataTypes::RealVectorType&
+  getExpandedVectorReference(DataTypes::real_t dummy=0);
+
+  DataTypes::CplxVectorType&
+  getExpandedVectorReference(DataTypes::cplx_t dummy);
   
   
   /**
    * \brief For tagged Data returns the number of tags with values.
    * For non-tagged data will return 0 (even Data which has been expanded from tagged).
   */ 
-  ESCRIPT_DLL_API
   size_t
   getNumberOfTaggedValues() const;
+
+  /*
+  * \brief make the data complex
+  */
+  void complicate();
  
  protected:
 
  private:
+   void init_from_data_and_fs(const Data& inData,
+           const FunctionSpace& functionspace);   
+   
+   
 
 template <class BinaryOp>
-  double 
+  DataTypes::real_t 
 #ifdef ESYS_MPI
-  lazyAlgWorker(double init, MPI_Op mpiop_type);
+  lazyAlgWorker(DataTypes::real_t init, MPI_Op mpiop_type);
 #else
-  lazyAlgWorker(double init);
+  lazyAlgWorker(DataTypes::real_t init);
 #endif
 
-  double
+  DataTypes::real_t
   LsupWorker() const;
 
-  double
+  DataTypes::real_t
   supWorker() const;
 
-  double
+  DataTypes::real_t
   infWorker() const;
 
   boost::python::object
@@ -1720,9 +1661,9 @@ template <class BinaryOp>
   */
   template <class BinaryFunction>
   inline
-  double
-  algorithm(BinaryFunction operation,
-            double initial_value) const;
+  DataTypes::real_t
+  reduction(BinaryFunction operation,
+            DataTypes::real_t initial_value) const;
 
   /**
      \brief
@@ -1735,21 +1676,7 @@ template <class BinaryOp>
   inline
   Data
   dp_algorithm(BinaryFunction operation,
-               double initial_value) const;
-
-  /**
-     \brief
-     Perform the given binary operation on all of the data's elements.
-     The underlying type of the right hand side (right) determines the final
-     type of *this after the operation. For example if the right hand side
-     is expanded *this will be expanded if necessary.
-     RHS is a Data object.
-  */
-  template <class BinaryFunction>
-  inline
-  void
-  binaryOp(const Data& right,
-           BinaryFunction operation);
+               DataTypes::real_t initial_value) const;
 
   /**
      \brief
@@ -1773,10 +1700,16 @@ template <class BinaryOp>
   */
 
   void
-  initialise(const DataTypes::ValueType& value,
-	     const DataTypes::ShapeType& shape,
+  initialise(const DataTypes::RealVectorType& value,
+             const DataTypes::ShapeType& shape,
              const FunctionSpace& what,
              bool expanded);
+  
+  void
+  initialise(const DataTypes::CplxVectorType& value,
+             const DataTypes::ShapeType& shape,
+             const FunctionSpace& what,
+             bool expanded);  
 
   void
   initialise(const WrappedArray& value,
@@ -1784,15 +1717,19 @@ template <class BinaryOp>
                  bool expanded);
 
   void
-  initialise(const double value,
-	     const DataTypes::ShapeType& shape,
+  initialise(const DataTypes::real_t value,
+             const DataTypes::ShapeType& shape,
              const FunctionSpace& what,
              bool expanded);
 
+  void
+  initialise(const DataTypes::cplx_t value,
+             const DataTypes::ShapeType& shape,
+             const FunctionSpace& what,
+             bool expanded);  
   //
   // flag to protect the data object against any update
   bool m_protected;
-  mutable bool m_shared;
   bool m_lazy;
 
   //
@@ -1803,31 +1740,40 @@ template <class BinaryOp>
 // If possible please use getReadyPtr instead.
 // But see warning below.
   const DataReady*
-  getReady() const;
+  getReady() const
+{
+   const DataReady* dr=dynamic_cast<const DataReady*>(m_data.get());
+   ESYS_ASSERT(dr!=0, "error casting to DataReady.");
+   return dr;
+}  
 
   DataReady*
-  getReady();
+  getReady()
+{
+   DataReady* dr=dynamic_cast<DataReady*>(m_data.get());
+   ESYS_ASSERT(dr!=0, "error casting to DataReady.");
+   return dr;
+}  
 
 
 // Be wary of using this for local operations since it (temporarily) increases reference count.
 // If you are just using this to call a method on DataReady instead of DataAbstract consider using 
 // getReady() instead
   DataReady_ptr
-  getReadyPtr();
+  getReadyPtr()
+{
+   DataReady_ptr dr=REFCOUNTNS::dynamic_pointer_cast<DataReady>(m_data);
+   ESYS_ASSERT(dr.get()!=0, "error casting to DataReady.");
+   return dr;
+}  
 
   const_DataReady_ptr
-  getReadyPtr() const;
-
-
-  /**
-   \brief Update the Data's shared flag
-   This indicates that the DataAbstract used by this object is now shared (or no longer shared).
-   For internal use only.
-  */
-  void updateShareStatus(bool nowshared) const
-  {
-	m_shared=nowshared;		// m_shared is mutable
-  }
+  getReadyPtr() const
+{
+   const_DataReady_ptr dr=REFCOUNTNS::dynamic_pointer_cast<const DataReady>(m_data);
+   ESYS_ASSERT(dr.get()!=0, "error casting to DataReady.");
+   return dr;
+}    
 
   // In the isShared() method below:
   // A problem would occur if m_data (the address pointed to) were being modified 
@@ -1845,28 +1791,25 @@ template <class BinaryOp>
   // For any threads executing before the flag switches they will assume the object is still shared.
   bool isShared() const
   {
-	return m_shared;
-/*	if (m_shared) return true;
-	if (m_data->isShared())			
-	{					
-		updateShareStatus(true);
-		return true;
-	}
-	return false;*/
+#ifdef SLOWSHARECHECK        
+	return m_data->isShared();      // single threadsafe check for this
+#else
+	return !m_data.unique();
+#endif	
   }
 
   void forceResolve()
   {
-	if (isLazy())
-	{
-	    #ifdef _OPENMP
-	    if (omp_in_parallel())
-	    {	// Yes this is throwing an exception out of an omp thread which is forbidden.
-		throw DataException("Please do not call forceResolve() in a parallel region.");
-	    }
-	    #endif
-	    resolve();
-	}
+        if (isLazy())
+        {
+            #ifdef _OPENMP
+            if (omp_in_parallel())
+            {   // Yes this is throwing an exception out of an omp thread which is forbidden.
+                throw DataException("Please do not call forceResolve() in a parallel region.");
+            }
+            #endif
+            resolve();
+        }
   }
 
   /**
@@ -1878,19 +1821,18 @@ template <class BinaryOp>
 #ifdef _OPENMP
   if (omp_in_parallel())
   {
-// *((int*)0)=17;
-	throw DataException("Programming error. Please do not run exclusiveWrite() in multi-threaded sections.");
+        throw DataException("Programming error. Please do not run exclusiveWrite() in multi-threaded sections.");
   }
 #endif
-	forceResolve();
-	if (isShared())
-	{
-		DataAbstract* t=m_data->deepCopy();
-   		set_m_data(DataAbstract_ptr(t));
-	}
-#ifdef EXWRITECHK		
-	m_data->exclusivewritecalled=true;
-#endif	
+        forceResolve();
+        if (isShared())
+        {
+                DataAbstract* t=m_data->deepCopy();
+                set_m_data(DataAbstract_ptr(t));
+        }
+#ifdef EXWRITECHK               
+        m_data->exclusivewritecalled=true;
+#endif  
   }
 
   /**
@@ -1898,10 +1840,12 @@ template <class BinaryOp>
   */
   void checkExclusiveWrite()
   {
-	if  (isLazy() || isShared())
-	{
-		throw DataException("Programming error. ExclusiveWrite required - please call requireWrite()");
-	}
+        if  (isLazy() || isShared())
+        {
+                std::ostringstream oss;
+                oss << "Programming error. ExclusiveWrite required - please call requireWrite() isLazy=" << isLazy() << " isShared()=" << isShared(); 
+                throw DataException(oss.str());
+        }
   }
 
   /**
@@ -1912,25 +1856,25 @@ template <class BinaryOp>
   */
   void set_m_data(DataAbstract_ptr p);
 
-  friend class DataAbstract;		// To allow calls to updateShareStatus
-  friend class TestDomain;		// so its getX will work quickly
+  
+  void TensorSelfUpdateBinaryOperation(const Data& right, escript::ES_optype operation);  
+  
+  friend class DataAbstract;            // To allow calls to updateShareStatus
+  friend class TestDomain;              // so its getX will work quickly
 #ifdef IKNOWWHATIMDOING
-  friend ESCRIPT_DLL_API Data applyBinaryCFunction(boost::python::object cfunc, boost::python::tuple shape, escript::Data& d, escript::Data& e);
+  friend Data applyBinaryCFunction(boost::python::object cfunc, boost::python::tuple shape, escript::Data& d, escript::Data& e);
 #endif
-  friend ESCRIPT_DLL_API Data condEval(escript::Data& mask, escript::Data& trueval, escript::Data& falseval);
-  friend ESCRIPT_DLL_API Data randomData(const boost::python::tuple& shape, const FunctionSpace& what, long seed, const boost::python::tuple& filter);
+  friend Data condEval(escript::Data& mask, escript::Data& trueval, escript::Data& falseval);
+  friend Data randomData(const boost::python::tuple& shape, const FunctionSpace& what, long seed, const boost::python::tuple& filter);
 
 };
 
 
 #ifdef IKNOWWHATIMDOING
-ESCRIPT_DLL_API
 Data
 applyBinaryCFunction(boost::python::object func, boost::python::tuple shape, escript::Data& d, escript::Data& e);
 #endif
 
-
-ESCRIPT_DLL_API
 Data
 condEval(escript::Data& mask, escript::Data& trueval, escript::Data& falseval);
 
@@ -1939,7 +1883,6 @@ condEval(escript::Data& mask, escript::Data& trueval, escript::Data& falseval);
 /**
  \brief Create a new Expanded Data object filled with pseudo-random data.
 */
-ESCRIPT_DLL_API
 Data randomData(const boost::python::tuple& shape,
        const FunctionSpace& what,
        long seed, const boost::python::tuple& filter);
@@ -1953,57 +1896,39 @@ Data randomData(const boost::python::tuple& shape,
 // so that I can dynamic cast between them below.
 #include "DataReady.h"
 #include "DataLazy.h"
+#include "DataExpanded.h"
+#include "DataConstant.h"
+#include "DataTagged.h"
 
 namespace escript
 {
 
-inline
-const DataReady*
-Data::getReady() const
-{
-   const DataReady* dr=dynamic_cast<const DataReady*>(m_data.get());
-   EsysAssert((dr!=0), "Error - casting to DataReady.");
-   return dr;
-}
-
-inline
-DataReady*
-Data::getReady()
-{
-   DataReady* dr=dynamic_cast<DataReady*>(m_data.get());
-   EsysAssert((dr!=0), "Error - casting to DataReady.");
-   return dr;
-}
-
-// Be wary of using this for local operations since it (temporarily) increases reference count.
-// If you are just using this to call a method on DataReady instead of DataAbstract consider using 
-// getReady() instead
-inline
-DataReady_ptr
-Data::getReadyPtr()
-{
-   DataReady_ptr dr=boost::dynamic_pointer_cast<DataReady>(m_data);
-   EsysAssert((dr.get()!=0), "Error - casting to DataReady.");
-   return dr;
-}
 
 
 inline
-const_DataReady_ptr
-Data::getReadyPtr() const
+DataTypes::real_t*
+Data::getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy)
 {
-   const_DataReady_ptr dr=boost::dynamic_pointer_cast<const DataReady>(m_data);
-   EsysAssert((dr.get()!=0), "Error - casting to DataReady.");
-   return dr;
+   if (isLazy())
+   {
+        throw DataException("Error, attempt to acquire RW access to lazy data. Please call requireWrite() first.");
+   }
+#ifdef EXWRITECHK
+   if (!getReady()->exclusivewritecalled)
+   {
+        throw DataException("Error, call to Data::getSampleDataRW without a preceeding call to requireWrite/exclusiveWrite.");
+   }
+#endif
+   return getReady()->getSampleDataRW(sampleNo, dummy);
 }
 
 inline
-DataAbstract::ValueType::value_type*
-Data::getSampleDataRW(DataAbstract::ValueType::size_type sampleNo)
+DataTypes::cplx_t*
+Data::getSampleDataRW(DataTypes::CplxVectorType::size_type sampleNo, DataTypes::cplx_t dummy)
 {
    if (isLazy())
    {
-	throw DataException("Error, attempt to acquire RW access to lazy data. Please call requireWrite() first.");
+        throw DataException("Error, attempt to acquire RW access to lazy data. Please call requireWrite() first.");
    }
 #ifdef EXWRITECHK
    if (!getReady()->exclusivewritecalled)
@@ -2011,26 +1936,40 @@ Data::getSampleDataRW(DataAbstract::ValueType::size_type sampleNo)
         throw DataException("Error, call to Data::getSampleDataRW without a preceeding call to requireWrite/exclusiveWrite.");
    }
 #endif
-   return getReady()->getSampleDataRW(sampleNo);
+   return getReady()->getSampleDataRW(sampleNo, dummy);
+}
+
+
+inline
+const DataTypes::real_t*
+Data::getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo,DataTypes::real_t dummy) const
+{
+   DataLazy* l=dynamic_cast<DataLazy*>(m_data.get());
+   if (l!=0)
+   {
+        size_t offset=0;
+        const DataTypes::RealVectorType* res=l->resolveSample(sampleNo,offset);
+        return &((*res)[offset]);
+   }
+   return getReady()->getSampleDataRO(sampleNo, dummy);
 }
 
 inline
-const DataAbstract::ValueType::value_type*
-Data::getSampleDataRO(DataAbstract::ValueType::size_type sampleNo) const
+const DataTypes::cplx_t*
+Data::getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy) const
 {
    DataLazy* l=dynamic_cast<DataLazy*>(m_data.get());
    if (l!=0)
    {
-	size_t offset=0;
-	const DataTypes::ValueType* res=l->resolveSample(sampleNo,offset);
-	return &((*res)[offset]);
+	throw DataException("Programming error: complex lazy objects are not supported.");	
    }
-   return getReady()->getSampleDataRO(sampleNo);
+   return getReady()->getSampleDataRO(sampleNo, dummy);
 }
 
+
 inline
-const DataAbstract::ValueType::value_type*
-Data::getDataRO() const
+const DataTypes::real_t*
+Data::getDataRO(DataTypes::real_t dummy) const
 {
     if (isLazy())
     {
@@ -2038,11 +1977,29 @@ Data::getDataRO() const
     }
     if (getNumSamples()==0)
     {
-	return 0;
+        return 0;
     }
     else
     {
-	return &(getReady()->getVectorRO()[0]);
+        return &(getReady()->getTypedVectorRO(0)[0]);
+    }
+}
+
+inline
+const DataTypes::cplx_t*
+Data::getDataRO(DataTypes::cplx_t dummy) const
+{
+    if (isLazy())
+    {
+        throw DataException("Programmer error - getDataRO must not be called on Lazy Data.");
+    }
+    if (getNumSamples()==0)
+    {
+        return 0;
+    }
+    else
+    {
+        return &(getReady()->getTypedVectorRO(dummy)[0]);
     }
 }
 
@@ -2050,7 +2007,7 @@ Data::getDataRO() const
 /**
    Binary Data object operators.
 */
-inline double rpow(double x,double y)
+inline DataTypes::real_t rpow(DataTypes::real_t x,DataTypes::real_t y)
 {
     return pow(y,x);
 }
@@ -2060,28 +2017,28 @@ inline double rpow(double x,double y)
   Operator+
   Takes two Data objects.
 */
-ESCRIPT_DLL_API Data operator+(const Data& left, const Data& right);
+Data operator+(const Data& left, const Data& right);
 
 /**
   \brief
   Operator-
   Takes two Data objects.
 */
-ESCRIPT_DLL_API Data operator-(const Data& left, const Data& right);
+Data operator-(const Data& left, const Data& right);
 
 /**
   \brief
   Operator*
   Takes two Data objects.
 */
-ESCRIPT_DLL_API Data operator*(const Data& left, const Data& right);
+Data operator*(const Data& left, const Data& right);
 
 /**
   \brief
   Operator/
   Takes two Data objects.
 */
-ESCRIPT_DLL_API Data operator/(const Data& left, const Data& right);
+Data operator/(const Data& left, const Data& right);
 
 /**
   \brief
@@ -2089,7 +2046,7 @@ ESCRIPT_DLL_API Data operator/(const Data& left, const Data& right);
   Takes LHS Data object and RHS python::object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator+(const Data& left, const boost::python::object& right);
+Data operator+(const Data& left, const boost::python::object& right);
 
 /**
   \brief
@@ -2097,7 +2054,7 @@ ESCRIPT_DLL_API Data operator+(const Data& left, const boost::python::object& ri
   Takes LHS Data object and RHS python::object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator-(const Data& left, const boost::python::object& right);
+Data operator-(const Data& left, const boost::python::object& right);
 
 /**
   \brief
@@ -2105,7 +2062,7 @@ ESCRIPT_DLL_API Data operator-(const Data& left, const boost::python::object& ri
   Takes LHS Data object and RHS python::object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator*(const Data& left, const boost::python::object& right);
+Data operator*(const Data& left, const boost::python::object& right);
 
 /**
   \brief
@@ -2113,7 +2070,7 @@ ESCRIPT_DLL_API Data operator*(const Data& left, const boost::python::object& ri
   Takes LHS Data object and RHS python::object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator/(const Data& left, const boost::python::object& right);
+Data operator/(const Data& left, const boost::python::object& right);
 
 /**
   \brief
@@ -2121,7 +2078,7 @@ ESCRIPT_DLL_API Data operator/(const Data& left, const boost::python::object& ri
   Takes LHS python::object and RHS Data object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator+(const boost::python::object& left, const Data& right);
+Data operator+(const boost::python::object& left, const Data& right);
 
 /**
   \brief
@@ -2129,7 +2086,7 @@ ESCRIPT_DLL_API Data operator+(const boost::python::object& left, const Data& ri
   Takes LHS python::object and RHS Data object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator-(const boost::python::object& left, const Data& right);
+Data operator-(const boost::python::object& left, const Data& right);
 
 /**
   \brief
@@ -2137,7 +2094,7 @@ ESCRIPT_DLL_API Data operator-(const boost::python::object& left, const Data& ri
   Takes LHS python::object and RHS Data object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator*(const boost::python::object& left, const Data& right);
+Data operator*(const boost::python::object& left, const Data& right);
 
 /**
   \brief
@@ -2145,7 +2102,7 @@ ESCRIPT_DLL_API Data operator*(const boost::python::object& left, const Data& ri
   Takes LHS python::object and RHS Data object.
   python::object must be convertable to Data type.
 */
-ESCRIPT_DLL_API Data operator/(const boost::python::object& left, const Data& right);
+Data operator/(const boost::python::object& left, const Data& right);
 
 
 
@@ -2153,7 +2110,7 @@ ESCRIPT_DLL_API Data operator/(const boost::python::object& left, const Data& ri
   \brief
   Output operator
 */
-ESCRIPT_DLL_API std::ostream& operator<<(std::ostream& o, const Data& data);
+std::ostream& operator<<(std::ostream& o, const Data& data);
 
 /**
   \brief
@@ -2163,7 +2120,6 @@ ESCRIPT_DLL_API std::ostream& operator<<(std::ostream& o, const Data& data);
   \param axis_offset - Input - axis offset
   \param transpose - Input - 0: transpose neither, 1: transpose arg0, 2: transpose arg1
 */
-ESCRIPT_DLL_API
 Data
 C_GeneralTensorProduct(Data& arg_0,
                      Data& arg_1,
@@ -2208,90 +2164,7 @@ Data::rtruedivO(const boost::python::object& left)
     return tmp.truedivD(*this);
 }
 
-/**
-  \brief
-  Perform the given binary operation with this and right as operands.
-  Right is a Data object.
-*/
-template <class BinaryFunction>
-inline
-void
-Data::binaryOp(const Data& right,
-               BinaryFunction operation)
-{
-   //
-   // if this has a rank of zero promote it to the rank of the RHS
-   if (getDataPointRank()==0 && right.getDataPointRank()!=0) {
-     throw DataException("Error - attempt to update rank zero object with object with rank bigger than zero.");
-   }
 
-   if (isLazy() || right.isLazy())
-   {
-     throw DataException("Programmer error - attempt to call binaryOp with Lazy Data.");
-   }
-   //
-   // initially make the temporary a shallow copy
-   Data tempRight(right);
-   FunctionSpace fsl=getFunctionSpace();
-   FunctionSpace fsr=right.getFunctionSpace();
-   if (fsl!=fsr) {
-     signed char intres=fsl.getDomain()->preferredInterpolationOnDomain(fsr.getTypeCode(), fsl.getTypeCode());
-     if (intres==0)
-     {
-         std::string msg="Error - attempt to combine incompatible FunctionSpaces.";
-	 msg+=fsl.toString();
-	 msg+="  ";
-	 msg+=fsr.toString();
-         throw DataException(msg.c_str());
-     } 
-     else if (intres==1)
-     {
-       // an interpolation is required so create a new Data
-       tempRight=Data(right,fsl);
-     }
-     else	// reverse interpolation preferred
-     {
-        // interpolate onto the RHS function space
-       Data tempLeft(*this,fsr);
-       set_m_data(tempLeft.m_data);
-     }
-   }
-   operandCheck(tempRight);
-   //
-   // ensure this has the right type for the RHS
-   typeMatchRight(tempRight);
-   //
-   // Need to cast to the concrete types so that the correct binaryOp
-   // is called.
-   if (isExpanded()) {
-     //
-     // Expanded data will be done in parallel, the right hand side can be
-     // of any data type
-     DataExpanded* leftC=dynamic_cast<DataExpanded*>(m_data.get());
-     EsysAssert((leftC!=0), "Programming error - casting to DataExpanded.");
-     escript::binaryOp(*leftC,*(tempRight.getReady()),operation);
-   } else if (isTagged()) {
-     //
-     // Tagged data is operated on serially, the right hand side can be
-     // either DataConstant or DataTagged
-     DataTagged* leftC=dynamic_cast<DataTagged*>(m_data.get());
-     EsysAssert((leftC!=0), "Programming error - casting to DataTagged.");
-     if (right.isTagged()) {
-       DataTagged* rightC=dynamic_cast<DataTagged*>(tempRight.m_data.get());
-       EsysAssert((rightC!=0), "Programming error - casting to DataTagged.");
-       escript::binaryOp(*leftC,*rightC,operation);
-     } else {
-       DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
-       EsysAssert((rightC!=0), "Programming error - casting to DataConstant.");
-       escript::binaryOp(*leftC,*rightC,operation);
-     }
-   } else if (isConstant()) {
-     DataConstant* leftC=dynamic_cast<DataConstant*>(m_data.get());
-     DataConstant* rightC=dynamic_cast<DataConstant*>(tempRight.m_data.get());
-     EsysAssert((leftC!=0 && rightC!=0), "Programming error - casting to DataConstant.");
-     escript::binaryOp(*leftC,*rightC,operation);
-   }
-}
 
 /**
   \brief
@@ -2302,21 +2175,69 @@ Data::binaryOp(const Data& right,
 */
 template <class BinaryFunction>
 inline
-double
-Data::algorithm(BinaryFunction operation, double initial_value) const
+DataTypes::real_t
+Data::reduction(BinaryFunction operation, DataTypes::real_t initial_value) const
 {
   if (isExpanded()) {
     DataExpanded* leftC=dynamic_cast<DataExpanded*>(m_data.get());
-    EsysAssert((leftC!=0), "Programming error - casting to DataExpanded.");
-    return escript::algorithm(*leftC,operation,initial_value);
+    ESYS_ASSERT(leftC!=0, "Programming error - casting to DataExpanded.");
+
+    DataExpanded& data=*leftC;
+    int i,j;
+    int numDPPSample=data.getNumDPPSample();
+    int numSamples=data.getNumSamples();
+    DataTypes::real_t global_current_value=initial_value;
+    DataTypes::real_t local_current_value;
+    const auto& vec=data.getTypedVectorRO(typename BinaryFunction::first_argument_type(0));
+    const DataTypes::ShapeType& shape=data.getShape();
+    // calculate the reduction operation value for each data point
+    // reducing the result for each data-point into the current_value variables
+    #pragma omp parallel private(local_current_value)
+    {
+	local_current_value=initial_value;
+	#pragma omp for private(i,j) schedule(static)
+	for (i=0;i<numSamples;i++) {
+	  for (j=0;j<numDPPSample;j++) {
+	    local_current_value=operation(local_current_value,escript::reductionOpVector(vec,shape,data.getPointOffset(i,j),operation,initial_value));
+
+	  }
+	}
+	#pragma omp critical
+	global_current_value=operation(global_current_value,local_current_value);
+    }
+    return global_current_value;
   } else if (isTagged()) {
     DataTagged* leftC=dynamic_cast<DataTagged*>(m_data.get());
-    EsysAssert((leftC!=0), "Programming error - casting to DataTagged.");
-    return escript::algorithm(*leftC,operation,initial_value);
+    ESYS_ASSERT(leftC!=0, "Programming error - casting to DataTagged.");
+    
+    DataTagged& data=*leftC;
+    DataTypes::real_t current_value=initial_value;
+
+    const auto& vec=data.getTypedVectorRO(typename BinaryFunction::first_argument_type(0));
+    const DataTypes::ShapeType& shape=data.getShape();
+    const DataTagged::DataMapType& lookup=data.getTagLookup();
+    const std::list<int> used=data.getFunctionSpace().getListOfTagsSTL();
+    for (std::list<int>::const_iterator i=used.begin();i!=used.end();++i)
+    {
+      int tag=*i;
+      if (tag==0)	// check for the default tag
+      {
+	  current_value=operation(current_value,escript::reductionOpVector(vec,shape,data.getDefaultOffset(),operation,initial_value));
+      }
+      else
+      {
+	  DataTagged::DataMapType::const_iterator it=lookup.find(tag);
+	  if (it!=lookup.end())
+	  {
+		  current_value=operation(current_value,escript::reductionOpVector(vec,shape,it->second,operation,initial_value));
+	  }
+      }
+    }
+    return current_value;    
   } else if (isConstant()) {
     DataConstant* leftC=dynamic_cast<DataConstant*>(m_data.get());
-    EsysAssert((leftC!=0), "Programming error - casting to DataConstant.");
-    return escript::algorithm(*leftC,operation,initial_value);
+    ESYS_ASSERT(leftC!=0, "Programming error - casting to DataConstant.");
+    return escript::reductionOpVector(leftC->getTypedVectorRO(typename BinaryFunction::first_argument_type(0)),leftC->getShape(),0,operation,initial_value);    
   } else if (isEmpty()) {
     throw DataException("Error - Operations (algorithm) not permitted on instances of DataEmpty.");
   } else if (isLazy()) {
@@ -2337,7 +2258,7 @@ Data::algorithm(BinaryFunction operation, double initial_value) const
 template <class BinaryFunction>
 inline
 Data
-Data::dp_algorithm(BinaryFunction operation, double initial_value) const
+Data::dp_algorithm(BinaryFunction operation, DataTypes::real_t initial_value) const
 {
   if (isEmpty()) {
     throw DataException("Error - Operations (dp_algorithm) not permitted on instances of DataEmpty.");
@@ -2346,27 +2267,67 @@ Data::dp_algorithm(BinaryFunction operation, double initial_value) const
     Data result(0,DataTypes::ShapeType(),getFunctionSpace(),isExpanded());
     DataExpanded* dataE=dynamic_cast<DataExpanded*>(m_data.get());
     DataExpanded* resultE=dynamic_cast<DataExpanded*>(result.m_data.get());
-    EsysAssert((dataE!=0), "Programming error - casting data to DataExpanded.");
-    EsysAssert((resultE!=0), "Programming error - casting result to DataExpanded.");
-    escript::dp_algorithm(*dataE,*resultE,operation,initial_value);
+    ESYS_ASSERT(dataE!=0, "Programming error - casting data to DataExpanded.");
+    ESYS_ASSERT(resultE!=0, "Programming error - casting result to DataExpanded.");
+    
+    
+    
+    int i,j;
+    int numSamples=dataE->getNumSamples();
+    int numDPPSample=dataE->getNumDPPSample();
+  //  DataArrayView dataView=data.getPointDataView();
+  //  DataArrayView resultView=result.getPointDataView();
+    const auto& dataVec=dataE->getTypedVectorRO(initial_value);
+    const DataTypes::ShapeType& shape=dataE->getShape();
+    auto& resultVec=resultE->getTypedVectorRW(initial_value);
+    // perform the operation on each data-point and assign
+    // this to the corresponding element in result
+    #pragma omp parallel for private(i,j) schedule(static)
+    for (i=0;i<numSamples;i++) {
+      for (j=0;j<numDPPSample;j++) {
+	resultVec[resultE->getPointOffset(i,j)] =
+	  escript::reductionOpVector(dataVec, shape, dataE->getPointOffset(i,j),operation,initial_value);
+
+      }
+    }    
+    //escript::dp_algorithm(*dataE,*resultE,operation,initial_value);
     return result;
   }
   else if (isTagged()) {
     DataTagged* dataT=dynamic_cast<DataTagged*>(m_data.get());
-    EsysAssert((dataT!=0), "Programming error - casting data to DataTagged.");
-    DataTypes::ValueType defval(1);
+    ESYS_ASSERT(dataT!=0, "Programming error - casting data to DataTagged.");
+    DataTypes::RealVectorType defval(1);
     defval[0]=0;
     DataTagged* resultT=new DataTagged(getFunctionSpace(), DataTypes::scalarShape, defval, dataT);
-    escript::dp_algorithm(*dataT,*resultT,operation,initial_value);
+    
+    
+    const DataTypes::ShapeType& shape=dataT->getShape();
+    const auto& vec=dataT->getTypedVectorRO(initial_value);
+    const DataTagged::DataMapType& lookup=dataT->getTagLookup();
+    for (DataTagged::DataMapType::const_iterator i=lookup.begin(); i!=lookup.end(); i++) {
+      resultT->getDataByTagRW(i->first,0) =
+	  escript::reductionOpVector(vec,shape,dataT->getOffsetForTag(i->first),operation,initial_value);
+    }    
+    resultT->getTypedVectorRW(initial_value)[resultT->getDefaultOffset()] = escript::reductionOpVector(dataT->getTypedVectorRO(initial_value),dataT->getShape(),dataT->getDefaultOffset(),operation,initial_value);
+    
+    
+    
+    
+    //escript::dp_algorithm(*dataT,*resultT,operation,initial_value);
     return Data(resultT);   // note: the Data object now owns the resultT pointer
   } 
   else if (isConstant()) {
     Data result(0,DataTypes::ShapeType(),getFunctionSpace(),isExpanded());
     DataConstant* dataC=dynamic_cast<DataConstant*>(m_data.get());
     DataConstant* resultC=dynamic_cast<DataConstant*>(result.m_data.get());
-    EsysAssert((dataC!=0), "Programming error - casting data to DataConstant.");
-    EsysAssert((resultC!=0), "Programming error - casting result to DataConstant.");
-    escript::dp_algorithm(*dataC,*resultC,operation,initial_value);
+    ESYS_ASSERT(dataC!=0, "Programming error - casting data to DataConstant.");
+    ESYS_ASSERT(resultC!=0, "Programming error - casting result to DataConstant.");
+    
+    DataConstant& data=*dataC;
+    resultC->getTypedVectorRW(initial_value)[0] =
+	escript::reductionOpVector(data.getTypedVectorRO(initial_value),data.getShape(),0,operation,initial_value);    
+    
+    //escript::dp_algorithm(*dataC,*resultC,operation,initial_value);
     return result;
   } else if (isLazy()) {
     throw DataException("Error - Operations not permitted on instances of DataLazy.");
@@ -2375,6 +2336,7 @@ Data::dp_algorithm(BinaryFunction operation, double initial_value) const
   }
 }
 
+
 /**
   \brief
   Compute a tensor operation with two Data objects
@@ -2382,898 +2344,18 @@ Data::dp_algorithm(BinaryFunction operation, double initial_value) const
   \param arg_1 - Input - Data object
   \param operation - Input - Binary op functor
 */
-template <typename BinaryFunction>
-inline
 Data
 C_TensorBinaryOperation(Data const &arg_0,
                         Data const &arg_1,
-                        BinaryFunction operation)
-{
-  if (arg_0.isEmpty() || arg_1.isEmpty())
-  {
-     throw DataException("Error - Operations (C_TensorBinaryOperation) not permitted on instances of DataEmpty.");
-  }
-  if (arg_0.isLazy() || arg_1.isLazy())
-  {
-     throw DataException("Error - Operations not permitted on lazy data.");
-  }
-  // Interpolate if necessary and find an appropriate function space
-  Data arg_0_Z, arg_1_Z;
-  FunctionSpace fsl=arg_0.getFunctionSpace();
-  FunctionSpace fsr=arg_1.getFunctionSpace();
-  if (fsl!=fsr) {
-     signed char intres=fsl.getDomain()->preferredInterpolationOnDomain(fsr.getTypeCode(), fsl.getTypeCode());
-     if (intres==0)
-     {
-         std::string msg="Error - C_TensorBinaryOperation: arguments have incompatible function spaces.";
-         msg+=fsl.toString();
-         msg+=" ";
-         msg+=fsr.toString();
-         throw DataException(msg.c_str());
-     } 
-     else if (intres==1)
-     {
-      arg_1_Z=arg_1.interpolate(arg_0.getFunctionSpace());
-      arg_0_Z =Data(arg_0);      
-     }
-     else	// reverse interpolation preferred
-     {
-      arg_0_Z = arg_0.interpolate(arg_1.getFunctionSpace());
-      arg_1_Z = Data(arg_1);
-     }    
-  } else {
-      arg_0_Z = Data(arg_0);
-      arg_1_Z = Data(arg_1);
-  }
-  // Get rank and shape of inputs
-  int rank0 = arg_0_Z.getDataPointRank();
-  int rank1 = arg_1_Z.getDataPointRank();
-  DataTypes::ShapeType shape0 = arg_0_Z.getDataPointShape();
-  DataTypes::ShapeType shape1 = arg_1_Z.getDataPointShape();
-  int size0 = arg_0_Z.getDataPointSize();
-  int size1 = arg_1_Z.getDataPointSize();
-  // Declare output Data object
-  Data res;
-
-  if (shape0 == shape1) {
-    if (arg_0_Z.isConstant()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());      // DataConstant output
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0));
-      double *ptr_2 = &(res.getDataAtOffsetRW(0));
-
-      tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isTagged()) {
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-
-      // Get the pointers to the actual data
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-
-        tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isExpanded()) {
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_1,dataPointNo_1;
-      int numSamples_1 = arg_1_Z.getNumSamples();
-      int numDataPointsPerSample_1 = arg_1_Z.getNumDataPointsPerSample();
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
-      for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
-        for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1)); 
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2)); 
-          tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isConstant()) {
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_1 = tmp_1->getPointOffset(0,0);
-
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-        tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isTagged()) {
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());
-      res.tag();        // DataTagged output
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      // Merge the tags
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first); // use tmp_2 to get correct shape
-      }
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-      }
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
-      for (i=lookup_2.begin();i!=lookup_2.end();i++) {
-
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-
-        tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isExpanded()) {
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataTagged*   tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      int offset_1 = tmp_1->getPointOffset(0,0);
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-
-
-          tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isTagged()) {
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataTagged*   tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
-        const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isExpanded()) {
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-	  dataPointNo_0=0;
-//        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0*numDataPointsPerSample_0, ptr_0, ptr_1, ptr_2, operation);
-//       }
-      }
-
-    }
-    else {
-      throw DataException("Error - C_TensorBinaryOperation: unknown combination of inputs");
-    }
-
-  } else if (0 == rank0) {
-    if (arg_0_Z.isConstant()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace());      // DataConstant output
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0));
-      double *ptr_2 = &(res.getDataAtOffsetRW(0));
-      tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isTagged()) {
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-        tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isExpanded()) {
-
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_1;
-      int numSamples_1 = arg_1_Z.getNumSamples();
-      int numDataPointsPerSample_1 = arg_1_Z.getNumDataPointsPerSample();
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      const double *ptr_src = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-      double ptr_0 = ptr_src[0];
-      int size = size1*numDataPointsPerSample_1;
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_1) schedule(static)
-      for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
-//        for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_1,0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_1,0);
-//          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size, ptr_0, ptr_1, ptr_2, operation);
-
-//        }
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isConstant()) {
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape1, arg_0_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_1 = tmp_1->getPointOffset(0,0);
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-
-      // Compute a result for the default
-      tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-
-        tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isTagged()) {
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace());
-      res.tag();        // DataTagged output
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      // Merge the tags
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first); // use tmp_2 to get correct shape
-      }
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-      }
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
-      for (i=lookup_2.begin();i!=lookup_2.end();i++) {
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-
-        tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isExpanded()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataTagged*   tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      int offset_1 = tmp_1->getPointOffset(0,0);
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-        }
-      }
-
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isTagged()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataTagged*   tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
-        const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isExpanded()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape1, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size1, ptr_0[0], ptr_1, ptr_2, operation);
-        }
-      }
-
-    }
-    else {
-      throw DataException("Error - C_TensorBinaryOperation: unknown combination of inputs");
-    }
-
-  } else if (0 == rank1) {
-    if (arg_0_Z.isConstant()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());      // DataConstant output
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(0));
-      double *ptr_2 = &(res.getDataAtOffsetRW(0));
-      tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isTagged()) {
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-
-      //Get the pointers to the actual data
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-        tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      }
-    }
-    else if (arg_0_Z.isConstant()   && arg_1_Z.isExpanded()) {
-
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataConstant* tmp_0=dynamic_cast<DataConstant*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_1,dataPointNo_1;
-      int numSamples_1 = arg_1_Z.getNumSamples();
-      int numDataPointsPerSample_1 = arg_1_Z.getNumDataPointsPerSample();
-      int offset_0 = tmp_0->getPointOffset(0,0);
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_1,dataPointNo_1) schedule(static)
-      for (sampleNo_1 = 0; sampleNo_1 < numSamples_1; sampleNo_1++) {
-        for (dataPointNo_1 = 0; dataPointNo_1 < numDataPointsPerSample_1; dataPointNo_1++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_1,dataPointNo_1);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_1,dataPointNo_1);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-        }
-      }
+                        ES_optype operation);
 
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isConstant()) {
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Prepare the DataConstant input
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());      // DataTagged output
-      res.tag();
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Prepare offset into DataConstant
-      int offset_1 = tmp_1->getPointOffset(0,0);
-      const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first);
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-        tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      }
 
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isTagged()) {
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-      // Borrow DataTagged input from Data object
-      DataTagged* tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-
-      // Prepare a DataTagged output 2
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace());
-      res.tag();        // DataTagged output
-      DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-      // Get the pointers to the actual data
-      const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-      const double *ptr_1 = &(tmp_1->getDefaultValueRO(0));
-      double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-
-      // Compute a result for the default
-      tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      // Merge the tags
-      DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-      const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-      const DataTagged::DataMapType& lookup_1=tmp_1->getTagLookup();
-      for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-        tmp_2->addTag(i->first); // use tmp_2 to get correct shape
-      }
-      for (i=lookup_1.begin();i!=lookup_1.end();i++) {
-        tmp_2->addTag(i->first);
-      }
-      // Compute a result for each tag
-      const DataTagged::DataMapType& lookup_2=tmp_2->getTagLookup();
-      for (i=lookup_2.begin();i!=lookup_2.end();i++) {
-        const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-        const double *ptr_1 = &(tmp_1->getDataByTagRO(i->first,0));
-        double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-        tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-      }
-
-    }
-    else if (arg_0_Z.isTagged()     && arg_1_Z.isExpanded()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataTagged*   tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_0 = tmp_0->getPointOffset(sampleNo_0,0); // They're all the same, so just use #0
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isConstant()) {
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataConstant* tmp_1=dynamic_cast<DataConstant*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      int offset_1 = tmp_1->getPointOffset(0,0);
-      const double *ptr_src = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-      double ptr_1 = ptr_src[0];
-      int size = size0 * numDataPointsPerSample_0;
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-//        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-//          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size, ptr_0, ptr_1, ptr_2, operation);
-//        }
-      }
-
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isTagged()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataTagged*   tmp_1=dynamic_cast<DataTagged*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        int offset_1 = tmp_1->getPointOffset(sampleNo_0,0);
-        const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-        }
-      }
-
-    }
-    else if (arg_0_Z.isExpanded()   && arg_1_Z.isExpanded()) {
-
-      // After finding a common function space above the two inputs have the same numSamples and num DPPS
-      res = Data(0.0, shape0, arg_1_Z.getFunctionSpace(),true); // DataExpanded output
-      DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-      DataExpanded* tmp_1=dynamic_cast<DataExpanded*>(arg_1_Z.borrowData());
-      DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-      int sampleNo_0,dataPointNo_0;
-      int numSamples_0 = arg_0_Z.getNumSamples();
-      int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-      res.requireWrite();
-      #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-      for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-        for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-          int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_1 = tmp_1->getPointOffset(sampleNo_0,dataPointNo_0);
-          int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-          const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-          const double *ptr_1 = &(arg_1_Z.getDataAtOffsetRO(offset_1));
-          double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-          tensor_binary_operation(size0, ptr_0, ptr_1[0], ptr_2, operation);
-        }
-      }
-
-    }
-    else {
-      throw DataException("Error - C_TensorBinaryOperation: unknown combination of inputs");
-    }
-
-  } else {
-    throw DataException("Error - C_TensorBinaryOperation: arguments have incompatible shapes");
-  }
-
-  return res;
-}
-
-template <typename UnaryFunction>
 Data
 C_TensorUnaryOperation(Data const &arg_0,
-                       UnaryFunction operation)
-{
-  if (arg_0.isEmpty())	// do this before we attempt to interpolate
-  {
-     throw DataException("Error - Operations (C_TensorUnaryOperation) not permitted on instances of DataEmpty.");
-  }
-  if (arg_0.isLazy())
-  {
-     throw DataException("Error - Operations not permitted on lazy data.");
-  }
-  // Interpolate if necessary and find an appropriate function space
-  Data arg_0_Z = Data(arg_0);
+                       escript::ES_optype operation,
+                       DataTypes::real_t tol=0);
 
-  // Get rank and shape of inputs
-  const DataTypes::ShapeType& shape0 = arg_0_Z.getDataPointShape();
-  int size0 = arg_0_Z.getDataPointSize();
+} // namespace escript
 
-  // Declare output Data object
-  Data res;
+#endif // __ESCRIPT_DATA_H__
 
-  if (arg_0_Z.isConstant()) {
-    res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());      // DataConstant output
-    const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(0));
-    double *ptr_2 = &(res.getDataAtOffsetRW(0));
-    tensor_unary_operation(size0, ptr_0, ptr_2, operation);
-  }
-  else if (arg_0_Z.isTagged()) {
-
-    // Borrow DataTagged input from Data object
-    DataTagged* tmp_0=dynamic_cast<DataTagged*>(arg_0_Z.borrowData());
-
-    // Prepare a DataTagged output 2
-    res = Data(0.0, shape0, arg_0_Z.getFunctionSpace());   // DataTagged output
-    res.tag();
-    DataTagged* tmp_2=dynamic_cast<DataTagged*>(res.borrowData());
-
-    // Get the pointers to the actual data
-    const double *ptr_0 = &(tmp_0->getDefaultValueRO(0));
-    double *ptr_2 = &(tmp_2->getDefaultValueRW(0));
-    // Compute a result for the default
-    tensor_unary_operation(size0, ptr_0, ptr_2, operation);
-    // Compute a result for each tag
-    const DataTagged::DataMapType& lookup_0=tmp_0->getTagLookup();
-    DataTagged::DataMapType::const_iterator i; // i->first is a tag, i->second is an offset into memory
-    for (i=lookup_0.begin();i!=lookup_0.end();i++) {
-      tmp_2->addTag(i->first);
-      const double *ptr_0 = &(tmp_0->getDataByTagRO(i->first,0));
-      double *ptr_2 = &(tmp_2->getDataByTagRW(i->first,0));
-      tensor_unary_operation(size0, ptr_0, ptr_2, operation);
-    }
-
-  }
-  else if (arg_0_Z.isExpanded()) {
-
-    res = Data(0.0, shape0, arg_0_Z.getFunctionSpace(),true); // DataExpanded output
-    DataExpanded* tmp_0=dynamic_cast<DataExpanded*>(arg_0_Z.borrowData());
-    DataExpanded* tmp_2=dynamic_cast<DataExpanded*>(res.borrowData());
-
-    int sampleNo_0,dataPointNo_0;
-    int numSamples_0 = arg_0_Z.getNumSamples();
-    int numDataPointsPerSample_0 = arg_0_Z.getNumDataPointsPerSample();
-    #pragma omp parallel for private(sampleNo_0,dataPointNo_0) schedule(static)
-    for (sampleNo_0 = 0; sampleNo_0 < numSamples_0; sampleNo_0++) {
-	dataPointNo_0=0;
-//      for (dataPointNo_0 = 0; dataPointNo_0 < numDataPointsPerSample_0; dataPointNo_0++) {
-        int offset_0 = tmp_0->getPointOffset(sampleNo_0,dataPointNo_0);
-        int offset_2 = tmp_2->getPointOffset(sampleNo_0,dataPointNo_0);
-        const double *ptr_0 = &(arg_0_Z.getDataAtOffsetRO(offset_0));
-        double *ptr_2 = &(res.getDataAtOffsetRW(offset_2));
-        tensor_unary_operation(size0*numDataPointsPerSample_0, ptr_0, ptr_2, operation);
-//      }
-    }
-  }
-  else {
-    throw DataException("Error - C_TensorUnaryOperation: unknown combination of inputs");
-  }
-
-  return res;
-}
-
-}
-#endif
diff --git a/escriptcore/src/DataAbstract.cpp b/escriptcore/src/DataAbstract.cpp
index e830bf7..5ecb9a8 100644
--- a/escriptcore/src/DataAbstract.cpp
+++ b/escriptcore/src/DataAbstract.cpp
@@ -14,15 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataAbstract.h"
+#include "Data.h" // So we can update the shared status when things change
 #include "DataException.h"
 #include "DataLazy.h"
 
-#include "Data.h" // So we can update the shared status when things change
 
 using namespace std;
 
@@ -63,45 +59,27 @@ const_DataAbstract_ptr DataAbstract::getPtr() const
   }
 }
 
-
-// Warning - this method uses .use_count() which the boost doco labels inefficient.
-// If this method needs to be called in debug contexts, we may need to do some
-// timing experiments to determine how inefficient and possibly switch over to
-// invasive pointers which can answer these questions faster
-bool DataAbstract::checkNoSharing() const
-{
-
-  return !m_lazyshared && (m_owners.size()<2);
-
-/*  if (_internal_weak_this.expired())  // there is no shared_ptr for this object yet
-  {
-    return true;
-  }
-  if (shared_from_this().use_count()==2)    // shared_from_this will increase the ref count
-  {                     // which is the reason .unique is no use.
-    return true;
-  }
-std::cerr << "-<"<<shared_from_this().use_count() << ">-" << endl;
-  return false;*/
-}
-
 bool
 DataAbstract::isLazy() const
 {
     return (dynamic_cast<const DataLazy*>(this)!=0);
 }
 
+bool
+DataAbstract::isComplex() const
+{
+    return m_iscompl;
+}
 
 
-DataAbstract::DataAbstract(const FunctionSpace& what, const ShapeType& shape, bool isDataEmpty):
-    m_lazyshared(false),
+DataAbstract::DataAbstract(const FunctionSpace& what, const ShapeType& shape, bool isDataEmpty, bool isCplx):
     m_noSamples(what.getNumSamples()),
     m_noDataPointsPerSample(what.getNumDPPSample()),
+    m_iscompl(isCplx),
     m_functionSpace(what),
     m_shape(shape),
     m_novalues(DataTypes::noValues(shape)),
     m_rank(DataTypes::getRank(shape))
-
 {
 #ifdef EXWRITECHK
     exclusivewritecalled=false;
@@ -162,12 +140,19 @@ DataAbstract::dump(const std::string fileName) const
 
 
 
-DataAbstract::ValueType::value_type*
-DataAbstract::getSampleDataByTag(int tag)
+DataTypes::real_t*
+DataAbstract::getSampleDataByTag(int tag, DataTypes::real_t dummy)
 {
     throw DataException("Error - DataAbstract::getSampleDataByTag: Data type does not have tag values.");
 }
 
+
+DataTypes::cplx_t*
+DataAbstract::getSampleDataByTag(int tag, DataTypes::cplx_t dummy)
+{
+    throw DataException("Error - DataAbstract::getSampleDataByTag_C: Data type does not have complex tag values.");
+}
+
 size_t
 DataAbstract::getTagCount() const
 {
@@ -179,12 +164,20 @@ DataAbstract::getTagCount() const
 void  
 DataAbstract::setTaggedValue(int tagKey,
            const DataTypes::ShapeType& pointshape,
-               const DataTypes::ValueType& value,
+           const DataTypes::RealVectorType& value,
            int dataOffset)
 {
     throw DataException("Error - DataAbstract::setTaggedValue: Data type does not have tag values.");
 }
 
+void  
+DataAbstract::setTaggedValue(int tagKey,
+           const DataTypes::ShapeType& pointshape,
+           const DataTypes::CplxVectorType& value,
+           int dataOffset)
+{
+    throw DataException("Error - DataAbstract::setTaggedValue: Data type does not have tag values.");
+}
 
 int
 DataAbstract::getTagNumber(int dpno)
@@ -194,7 +187,13 @@ DataAbstract::getTagNumber(int dpno)
 }
 
 void
-DataAbstract::copyToDataPoint(const int sampleNo, const int dataPointNo, const double value)
+DataAbstract::copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::real_t value)
+{
+    throw DataException("Error - DataAbstract::copying data from double value to a single data point is not supported.");
+}
+
+void
+DataAbstract::copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::cplx_t value)
 {
     throw DataException("Error - DataAbstract::copying data from double value to a single data point is not supported.");
 }
@@ -214,9 +213,21 @@ DataAbstract::symmetric(DataAbstract* ev)
 }
 
 void
-DataAbstract::nonsymmetric(DataAbstract* ev) 
+DataAbstract::antisymmetric(DataAbstract* ev) 
 {
-    throw DataException("Error - DataAbstract::nonsymmetric is not supported.");
+    throw DataException("Error - DataAbstract::antisymmetric is not supported.");
+}
+
+void
+DataAbstract::hermitian(DataAbstract* ev) 
+{
+    throw DataException("Error - DataAbstract::hermitian is not supported.");
+}
+
+void
+DataAbstract::antihermitian(DataAbstract* ev) 
+{
+    throw DataException("Error - DataAbstract::antihermitian is not supported.");
 }
 
 void
@@ -261,57 +272,16 @@ DataAbstract::setToZero()
 }
 
 void
-DataAbstract::reorderByReferenceIDs(dim_t *reference_ids)
+DataAbstract::reorderByReferenceIDs(DataTypes::dim_t* reference_ids)
 {
     throw DataException("Error - DataAbstract:: cannot reorder by reference ids.");
 }
 
-
-void DataAbstract::addOwner(Data* d)
-{
-  for (size_t i=0;i<m_owners.size();++i)
-  {
-    if (m_owners[i]==d)
-    {
-        return;
-    }
-  }
-  m_owners.push_back(d);
-// cerr << "Adding " << d << " as an owner of " << this << " now O=" << m_owners.size() << endl;
-  if (m_owners.size()==2)   // Means it used to be 1 so we need to tell people
-  {
-    for (size_t i=0;i<m_owners.size();++i)
-    {
-        m_owners[i]->updateShareStatus(true);
-    }
-  }
-}
-
-void DataAbstract::removeOwner(Data* d)
+void DataAbstract::complicate()
 {
-  for (size_t i=0;i<m_owners.size();++i)
-  {
-    if (m_owners[i]==d)
-    {
-        m_owners.erase(m_owners.begin()+i,m_owners.begin()+(i+1));  // remove the element
-        break;
-    }
-  }
-  if (m_owners.size()==1)   // Means it used to be 2 so we need to tell people
-  {
-    m_owners[0]->updateShareStatus(isShared());     // could still be lazy shared
-  }
+    throw DataException("This type does not support converting to complex.");
 }
 
 
-void DataAbstract::makeLazyShared()
-{
-    m_lazyshared=true;  // now we need to inform all the owners
-    for (size_t i=0;i<m_owners.size();++i)
-    {
-        m_owners[i]->updateShareStatus(true);
-    }
-}   
-
-
 }  // end of namespace
+
diff --git a/escriptcore/src/DataAbstract.h b/escriptcore/src/DataAbstract.h
index 95c7191..305bebf 100644
--- a/escriptcore/src/DataAbstract.h
+++ b/escriptcore/src/DataAbstract.h
@@ -14,12 +14,12 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_DATAABSTRACT_H__
+#define __ESCRIPT_DATAABSTRACT_H__
 
-#if !defined escript_DataAbstract_20040315_H
-#define escript_DataAbstract_20040315_H
 #include "system_dep.h"
-
 #include "DataTypes.h"
+#include "DataVector.h"
 #include "FunctionSpace.h"
 
 #include <boost/scoped_ptr.hpp>
@@ -45,7 +45,7 @@ namespace escript {
    array of data points where one dimension corresponds to the number of samples
    and the other to the number of data points per sample as defined by the function
    space associated with each Data object. The data points themselves are arrays of
-   doubles of rank 0-4.
+   reals or complexes of rank 0-4.
 */
 
 class DataAbstract;
@@ -63,7 +63,6 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
 
  public:
 
-  typedef DataTypes::ValueType ValueType;
   typedef DataTypes::ShapeType ShapeType;
 
    /**
@@ -93,7 +92,7 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
      \param shape - Input - Shape of each data value.
      \param isDataEmpty - Input - Is this an instance of DataEmpty (for internal use only)
   */
-  DataAbstract(const FunctionSpace& what, const ShapeType& shape, bool isDataEmpty=false);
+  DataAbstract(const FunctionSpace& what, const ShapeType& shape, bool isDataEmpty=false,bool isCplx=false);
 
   /**
     \brief
@@ -115,7 +114,7 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
   */
   virtual
   DataAbstract*
-  deepCopy()=0;
+  deepCopy() const =0 ;
 
   /**
      \brief Return a data object with all points resolved.
@@ -145,6 +144,12 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
   */
   int
   getNumSamples() const;
+  
+  bool
+  hasNoSamples() const
+  {
+      return getNumSamples()==0;
+  }
 
   /**
      \brief
@@ -173,14 +178,10 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
     \param dataPointNo - Input - data point number.
   */
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo) const = 0;
 
-  virtual
-  ValueType::size_type
-  getPointOffset(int sampleNo,
-                 int dataPointNo) = 0;
 
 
   /**
@@ -188,18 +189,26 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
      Return the number of doubles stored for this Data object.
   */
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const = 0;
 
   /**
      \brief
-     Return the sample data for the given tag key.
+     Return the real sample data for the given tag key.
      NB: If the data isn't tagged an exception will be thrown.
   */
   virtual
-  double*
-  getSampleDataByTag(int tag);
+  DataTypes::real_t*
+  getSampleDataByTag(int tag, DataTypes::real_t dummy=0);
 
+  /**
+     \brief
+     Return the complex sample data for the given tag key.
+     NB: If the data isn't tagged an exception will be thrown.
+  */
+  virtual
+  DataTypes::cplx_t*
+  getSampleDataByTag(int tag, DataTypes::cplx_t dummy);
 
   /**
      \brief Return number of tagged values stored in the data object
@@ -276,10 +285,16 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
   virtual
   void
   setTaggedValue(int tagKey,
-		 const DataTypes::ShapeType& pointshape,
-                 const DataTypes::ValueType& value,
-		 int dataOffset=0);
+                 const DataTypes::ShapeType& pointshape,
+                 const DataTypes::RealVectorType& value,
+                 int dataOffset=0);
 
+  virtual
+  void
+  setTaggedValue(int tagKey,
+                 const DataTypes::ShapeType& pointshape,
+                 const DataTypes::CplxVectorType& value,
+                 int dataOffset=0);
 
   /**
      \brief
@@ -293,7 +308,10 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
      \param value Input - new values for the data point
   */
   virtual void
-  copyToDataPoint(const int sampleNo, const int dataPointNo, const double value);
+  copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::real_t value);
+
+  virtual void
+  copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::cplx_t value);
 
   /**
      \brief
@@ -330,13 +348,33 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
 
   /**
      \brief
-     Computes a nonsymmetric matrix (A - AT) / 2
+     Computes a antisymmetric matrix (A - AT) / 2
 
      \param ev - Output - a nonsymmetric matrix
 
   */
   virtual void
-  nonsymmetric(DataAbstract* ev);
+  antisymmetric(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes a symmetric matrix (A + A*) / 2
+
+     \param ev - Output - an hermitian matrix
+
+  */
+  virtual void
+  hermitian(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes a antisymmetric matrix (A - A*) / 2
+
+     \param ev - Output - an antihermitian matrix
+
+  */
+  virtual void
+  antihermitian(DataAbstract* ev);
 
   /**
      \brief
@@ -415,19 +453,19 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
      \param reference_ids - Input - reference_ids used for current ordering
   */
   virtual void
-  reorderByReferenceIDs(dim_t *reference_ids);
+  reorderByReferenceIDs(DataTypes::dim_t *reference_ids);
 
 
 
   /**
-	\brief
-	Return the number of values in the shape for this object.
+        \brief
+        Return the number of values in the shape for this object.
   */
   unsigned int
   getNoValues() const;
 
 
-  bool isLazy() const;	// a test to determine if this object is an instance of DataLazy
+  bool isLazy() const;  // a test to determine if this object is an instance of DataLazy
 
   virtual
   bool
@@ -451,59 +489,55 @@ class ESCRIPT_DLL_API DataAbstract : public REFCOUNT_BASE_CLASS(DataAbstract)
   bool
   isTagged() const {return false;}
 
-  bool isEmpty() const;	// a fast test to determine if this object is an instance of DataEmpty
-
+  bool isEmpty() const; // a fast test to determine if this object is an instance of DataEmpty
 
   /**
-  	\warning should only be used in single threaded code (or inside a single/critical section)
+   \brief true if the components of datapoints are complex
   */
-  void
-  addOwner(Data*);
-
-  /**
-  	\warning should only be used in single threaded code (or inside a single/critical section)
-  */
-  void
-  removeOwner(Data*);
+  bool isComplex() const;
 
+#ifdef SLOWSHARECHECK   
+  
+  // For this to be threadsafe, we need to be sure that this is the
+  // only way shared-ness is tested.
   /**
-	\brief Is this object owned by more than one Data object
+        \brief Is this object owned by more than one Data object
   */
   bool
   isShared() const
   {
-	return m_lazyshared || (m_owners.size()>1);
+    bool shared=false;
+    #pragma omp critical        // because two treads could try
+    {                   // this check at the same time
+      try               // and shared_from_this increments count
+      {
+        shared=shared_from_this().use_count()>2;
+      }
+      catch (...)
+      {
+      }
+    }
+    return shared;
   }
+#endif    
 
 #ifdef EXWRITECHK
-  bool exclusivewritecalled;	// used to check for some potential programming faults 
-				// involving shared data.
-				// This flag only asserts that exclusive write has been called
-				// on this object, it does not definitively guarantee that
-				// sharing has not occurred since that call
-				// This flag is for internal use only may be removed without warning
+  bool exclusivewritecalled;    // used to check for some potential programming faults
+                                // involving shared data.
+                                // This flag only asserts that exclusive write has been called
+                                // on this object, it does not definitively guarantee that
+                                // sharing has not occurred since that call
+                                // This flag is for internal use only may be removed without warning
 #endif
-  
-protected:
-    /**
-    \brief Returns true if this object is not shared.
-    For internal use only. - It may not be particularly fast
-    */
-    bool checkNoSharing() const;
-
-    /**
-    \brief Marks this DataAbstract shared as LazyData
-    For internal use only.
-    */
-    void
-    makeLazyShared();	
 
-    friend class DataLazy;
+/*
+ * Make the object complex
+*/
+ virtual void complicate();
 
-    std::vector<Data*> m_owners;
-    bool m_lazyshared;
+protected:
+    friend class DataLazy;
 
-private:
   //
   // The number of samples in this Data object.
   // This is derived directly from the FunctionSpace.
@@ -515,6 +549,11 @@ private:
   int m_noDataPointsPerSample;
 
   //
+  // is the data made of complex components
+  bool m_iscompl;
+private:
+
+  //
   // A FunctionSpace which provides a description of the data associated
   // with this Data object.
   FunctionSpace m_functionSpace;
@@ -540,7 +579,7 @@ inline
 bool
 DataAbstract::isEmpty() const
 {
-	return m_isempty;
+        return m_isempty;
 }
 
 inline
@@ -563,7 +602,7 @@ DataAbstract::getNumDPPSample() const
 {
   if (isEmpty())
   {
-     	throw DataException("Error - Operations (getNumDPPSample) not permitted on instances of DataEmpty.");
+        throw DataException("Error - Operations (getNumDPPSample) not permitted on instances of DataEmpty.");
   }
   return m_noDataPointsPerSample;
 }
@@ -574,7 +613,7 @@ DataAbstract::getNumSamples() const
 {
   if (isEmpty())
   {
-     	throw DataException("Error - Operations (getNumSamples) not permitted on instances of DataEmpty.");
+        throw DataException("Error - Operations (getNumSamples) not permitted on instances of DataEmpty.");
   }
   return m_noSamples;
 }
@@ -591,36 +630,36 @@ inline
 const DataTypes::ShapeType&
 DataAbstract::getShape() const
 {
-	if (isEmpty())
-	{
-		throw DataException("Error - Operations (getShape) not permitted on instances of DataEmpty.");
-	}
-	return m_shape;
+        if (isEmpty())
+        {
+                throw DataException("Error - Operations (getShape) not permitted on instances of DataEmpty.");
+        }
+        return m_shape;
 }
 
 inline
 unsigned int
 DataAbstract::getRank() const
 {
-	if (isEmpty())
-	{
-		throw DataException("Error - Operations (getRank) not permitted on instances of DataEmpty.");
-	}
-	return m_rank;
+        if (isEmpty())
+        {
+                throw DataException("Error - Operations (getRank) not permitted on instances of DataEmpty.");
+        }
+        return m_rank;
 }
 
 inline
 unsigned int
 DataAbstract::getNoValues() const
-{	
-	if (isEmpty())
-	{
-		throw DataException("Error - Operations (getNoValues) not permitted on instances of DataEmpty.");
-	}
-	return m_novalues;
+{
+        if (isEmpty())
+        {
+                throw DataException("Error - Operations (getNoValues) not permitted on instances of DataEmpty.");
+        }
+        return m_novalues;
 }
 
-
 } // end of namespace
 
-#endif
+#endif // __ESCRIPT_DATAABSTRACT_H__
+
diff --git a/escriptcore/src/DataAlgorithm.h b/escriptcore/src/DataAlgorithm.h
deleted file mode 100644
index 48bf9b3..0000000
--- a/escriptcore/src/DataAlgorithm.h
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_DataAlgorithm_20040714_H
-#define escript_DataAlgorithm_20040714_H
-#include "system_dep.h"
-
-#include "DataExpanded.h"
-#include "DataTagged.h"
-#include "DataConstant.h"
-
-#include "DataMaths.h"
-
-#include <iostream>
-#include <algorithm>
-#include <list>
-
-namespace escript {
-
-/**
-   \brief
-   Adapt binary algorithms so they may be used in DataArrayView reduction operations.
-
-   Description:
-   This functor adapts the given BinaryFunction operation by starting with the
-   given inital value applying this operation to successive values, storing the
-   rolling result in m_currentValue - which can be accessed or reset by getResult
-   and resetResult respectively.
-*/
-template <class BinaryFunction>
-class DataAlgorithmAdapter {
-  public:
-    DataAlgorithmAdapter(double initialValue):
-      m_initialValue(initialValue),
-      m_currentValue(initialValue)
-    {
-    }
-    DataAlgorithmAdapter(const DataAlgorithmAdapter& other):
-      m_initialValue(other.m_initialValue),
-      m_currentValue(other.m_initialValue),
-      operation(other.operation)
-    {
-    }
-    inline void operator()(double value)
-    {
-      m_currentValue=operation(m_currentValue,value);
-      return;
-    }
-    inline void resetResult()
-    {
-      m_currentValue=m_initialValue;
-    }
-    inline double getResult() const
-    {
-      return m_currentValue;
-    }
-  private:
-    //
-    // the initial operation value
-    double m_initialValue;
-    //
-    // the current operation value
-    double m_currentValue;
-    //
-    // The operation to perform
-    BinaryFunction operation;
-};
-
-/**
-   \brief
-   Return the maximum value of the two given values.
-*/
-struct FMax : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return std::max(x,y);
-  }
-};
-
-/**
-   \brief
-   Return the minimum value of the two given values.
-*/
-struct FMin : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return std::min(x,y);
-  }
-};
-
-/**
-   \brief
-   Return the absolute maximum value of the two given values.
-*/
-struct AbsMax : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return std::max(fabs(x),fabs(y));
-  }
-};
-
-/**
-   \brief
-   Return the absolute minimum value of the two given values.
-*/
-struct AbsMin : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return std::min(fabs(x),fabs(y));
-  }
-};
-
-/**
-   \brief
-   Return the length between the two given values.
-*/
-struct Length : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return std::sqrt(std::pow(x,2)+std::pow(y,2));
-  }
-};
-
-/**
-   \brief
-   Return the trace of the two given values.
-*/
-struct Trace : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return x+y;
-  }
-};
-
-/**
-   \brief Return 1 if abs(x)>y, otherwise return 0.
-*/
-struct AbsGT : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return fabs(x)>y;
-  }
-};
-
-/**
-   \brief Return 1 if abs(x)<=y, otherwise return 0.
-*/
-struct AbsLTE : public std::binary_function<double,double,double>
-{
-  inline double operator()(double x, double y) const
-  {
-    return fabs(x)<=y;
-  }
-};
-
-
-/**
-   \brief
-   Perform the given operation upon all values in all data-points in the
-   given Data object and return the final result.
-*/
-template <class BinaryFunction>
-inline
-double
-algorithm(const DataExpanded& data,
-          BinaryFunction operation,
-	  double initial_value)
-{
-  int i,j;
-  int numDPPSample=data.getNumDPPSample();
-  int numSamples=data.getNumSamples();
-  double global_current_value=initial_value;
-  double local_current_value;
-//  DataArrayView dataView=data.getPointDataView();
-  const DataTypes::ValueType& vec=data.getVectorRO();
-  const DataTypes::ShapeType& shape=data.getShape();
-  // calculate the reduction operation value for each data point
-  // reducing the result for each data-point into the current_value variables
-  #pragma omp parallel private(local_current_value)
-  {
-      local_current_value=initial_value;
-      #pragma omp for private(i,j) schedule(static)
-      for (i=0;i<numSamples;i++) {
-        for (j=0;j<numDPPSample;j++) {
-/*          local_current_value=operation(local_current_value,dataView.reductionOp(data.getPointOffset(i,j),operation,initial_value));*/
-          local_current_value=operation(local_current_value,DataMaths::reductionOp(vec,shape,data.getPointOffset(i,j),operation,initial_value));
-
-        }
-      }
-      #pragma omp critical
-      global_current_value=operation(global_current_value,local_current_value);
-  }
-  return global_current_value;
-}
-
-// It is important that the algorithm only be applied to tags which are actually in use.
-template <class BinaryFunction>
-inline
-double
-algorithm(DataTagged& data,
-          BinaryFunction operation,
-	  double initial_value)
-{
-  double current_value=initial_value;
-
-  const DataTypes::ValueType& vec=data.getVectorRO();
-  const DataTypes::ShapeType& shape=data.getShape();
-  const DataTagged::DataMapType& lookup=data.getTagLookup();
-  const std::list<int> used=data.getFunctionSpace().getListOfTagsSTL();
-  for (std::list<int>::const_iterator i=used.begin();i!=used.end();++i)
-  {
-     int tag=*i;
-     if (tag==0)	// check for the default tag
-     {
-	current_value=operation(current_value,DataMaths::reductionOp(vec,shape,data.getDefaultOffset(),operation,initial_value));
-     }
-     else
-     {
-	DataTagged::DataMapType::const_iterator it=lookup.find(tag);
-	if (it!=lookup.end())
-	{
-		current_value=operation(current_value,DataMaths::reductionOp(vec,shape,it->second,operation,initial_value));
-	}
-     }
-  }
-  return current_value;
-}
-
-template <class BinaryFunction>
-inline
-double
-algorithm(DataConstant& data,
-          BinaryFunction operation,
-	  double initial_value)
-{
-  return DataMaths::reductionOp(data.getVectorRO(),data.getShape(),0,operation,initial_value);
-}
-
-/**
-   \brief
-   Perform the given data-point reduction operation on all data-points
-   in data, storing results in corresponding data-points of result.
-
-   Objects data and result must be of the same type, and have the same number
-   of data points, but where data has data points of rank n, result must have
-   data points of rank 0.
-
-   Calls DataArrayView::reductionOp
-*/
-template <class BinaryFunction>
-inline
-void
-dp_algorithm(const DataExpanded& data,
-             DataExpanded& result,
-             BinaryFunction operation,
-	     double initial_value)
-{
-  int i,j;
-  int numSamples=data.getNumSamples();
-  int numDPPSample=data.getNumDPPSample();
-//  DataArrayView dataView=data.getPointDataView();
-//  DataArrayView resultView=result.getPointDataView();
-  const DataTypes::ValueType& dataVec=data.getVectorRO();
-  const DataTypes::ShapeType& shape=data.getShape();
-  DataTypes::ValueType& resultVec=result.getVectorRW();
-  // perform the operation on each data-point and assign
-  // this to the corresponding element in result
-  #pragma omp parallel for private(i,j) schedule(static)
-  for (i=0;i<numSamples;i++) {
-    for (j=0;j<numDPPSample;j++) {
-/*      resultView.getData(result.getPointOffset(i,j)) =
-        dataView.reductionOp(data.getPointOffset(i,j),operation,initial_value);*/
-      resultVec[result.getPointOffset(i,j)] =
-        DataMaths::reductionOp(dataVec, shape, data.getPointOffset(i,j),operation,initial_value);
-
-    }
-  }
-}
-
-template <class BinaryFunction>
-inline
-void
-dp_algorithm(const DataTagged& data,
-             DataTagged& result,
-             BinaryFunction operation,
-	     double initial_value)
-{
-  // perform the operation on each tagged value in data
-  // and assign this to the corresponding element in result
-  const DataTypes::ShapeType& shape=data.getShape();
-  const DataTypes::ValueType& vec=data.getVectorRO();
-  const DataTagged::DataMapType& lookup=data.getTagLookup();
-  for (DataTagged::DataMapType::const_iterator i=lookup.begin(); i!=lookup.end(); i++) {
-    result.getDataByTagRW(i->first,0) =
-	DataMaths::reductionOp(vec,shape,data.getOffsetForTag(i->first),operation,initial_value);
-  }
-  // perform the operation on the default data value
-  // and assign this to the default element in result
-  result.getVectorRW()[result.getDefaultOffset()] = DataMaths::reductionOp(data.getVectorRO(),data.getShape(),data.getDefaultOffset(),operation,initial_value);
-}
-
-template <class BinaryFunction>
-inline
-void
-dp_algorithm(DataConstant& data,
-             DataConstant& result,
-             BinaryFunction operation,
-	     double initial_value)
-{
-  // perform the operation on the data value
-  // and assign this to the element in result
-  result.getVectorRW()[0] =
-    DataMaths::reductionOp(data.getVectorRO(),data.getShape(),0,operation,initial_value);
-}
-
-} // end of namespace
-
-#endif
diff --git a/escriptcore/src/DataBlocks2D.cpp b/escriptcore/src/DataBlocks2D.cpp
deleted file mode 100644
index 2723b0e..0000000
--- a/escriptcore/src/DataBlocks2D.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "DataBlocks2D.h"
-
-#include "DataException.h"
-#include "esysUtils/EsysAssert.h"
-
-using namespace std;
-
-namespace escript {
-
-DataBlocks2D::DataBlocks2D():
-  m_numRows(0),
-  m_numCols(0),
-  m_blockSize(0)
-{
-}
-
-DataBlocks2D::DataBlocks2D(const DataBlocks2D& other):
-  m_numRows(other.m_numRows),
-  m_numCols(other.m_numCols),
-  m_blockSize(other.m_blockSize)
-{
-    m_data=other.m_data;
-}
-
-DataBlocks2D::DataBlocks2D(int numRows, int numCols, int blockSize):
-  m_numRows(numRows),
-  m_numCols(numCols),
-  m_blockSize(blockSize)
-{
-    resize(m_numRows,numCols,blockSize);
-}
-
-DataBlocks2D::~DataBlocks2D()
-{
-    m_numRows=-1;
-    m_numCols=-1;
-    m_blockSize=-1;
-}
-
-void
-DataBlocks2D::resize(int numRows, int numCols, int blockSize)
-{
-    if (numRows < 1 || numCols < 1 || blockSize < 1) {
-      stringstream temp;
-      temp << "DataBlocks2D: Error - Invalid resize parameter. numRows: " << numRows
-	   << " numCols: " << numCols << " blockSize: " << blockSize;
-      throw DataException(temp.str());
-    }
-    ValueType::size_type size=numRows*numCols*blockSize;
-    m_data.resize(size, 0.0, numCols*blockSize);
-    m_numRows=numRows;
-    m_numCols=numCols;
-    m_blockSize=blockSize;
-}
-
-void
-DataBlocks2D::Swap(DataBlocks2D& other)
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    swap(m_data,other.m_data);
-    swap(m_blockSize,other.m_blockSize);
-    swap(m_numRows,other.m_numRows);
-    swap(m_numCols,other.m_numCols);
-}
-
-DataBlocks2D&
-DataBlocks2D::operator=(const DataBlocks2D& other)
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    DataBlocks2D temp(other);
-    Swap(temp);
-    return *this;
-}
- 
-}  // end of namespace
diff --git a/escriptcore/src/DataBlocks2D.h b/escriptcore/src/DataBlocks2D.h
deleted file mode 100644
index 1ad47b0..0000000
--- a/escriptcore/src/DataBlocks2D.h
+++ /dev/null
@@ -1,337 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_DataBlocks2D_20040405_H
-#define escript_DataBlocks2D_20040405_H
-#include "system_dep.h"
-
-#include "DataVector.h"
-
-#include <sstream>
-#include <iostream>
-
-namespace escript {
-
-/**
-   \brief
-   DataBlocks2D manages a 2D array of multi-dimensional data points.
-
-   Description:
-   This class is used to manage the data held by instances of
-   the DataExpanded class.
-*/
-
-class DataBlocks2D {
-
- public:
-
-  //
-  // The type of the underlying data array under management.
-  // The multi-dimensional data points are flattened and stored
-  // serially as a vector of doubles.
-  typedef DataVector ValueType;
-
-  /**
-     \brief
-     Default constructor for DataBlocks2D.
-
-     Description:
-     Default constructor for DataBlocks2D.
-     Creates an empty DataBlocks2D object.
-  */
-  ESCRIPT_DLL_API
-  DataBlocks2D();
-
-  /**
-     \brief
-     Copy constructor for DataBlocks2D.
-
-     Description:
-     Copy constructor for DataBlocks2D.
-  */
-  ESCRIPT_DLL_API
-  DataBlocks2D(const DataBlocks2D& other);
-
-  /**
-     \brief
-     Constructor for DataBlocks2D.
-
-     Description:
-     Constructor for DataBlocks2D.
-
-     \param numRows - Input - Number of rows(samples).
-     \param numCols - Input - Number of columns(data-points per sample).
-     \param blockSize - Input - Number of elements per block(per data-point).
-
-     All parameters must be >0, else an exception will be thrown.
-  */
-  ESCRIPT_DLL_API
-  DataBlocks2D(int numRows, int numCols, int blockSize);
-
-  /**
-     \brief
-     Default destructor for DataBlocks2D.
-
-     Description:
-     Default destructor for DataBlocks2D.
-  */
-  ESCRIPT_DLL_API
-  ~DataBlocks2D();
-
-  /**
-     \brief
-     Return the size of the underlying data array.
-     ie: Number of rows * Number of columns * Number of elements per data point.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::size_type
-  size() const;
-
-  /**
-     \brief
-     Return the number of rows in this DataBlocks2D array.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::size_type
-  getNumRows() const;
-
-  /**
-     \brief
-     Return the number of columns in this DataBlocks2D array.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::size_type
-  getNumCols() const;
-
-  /**
-     \brief
-     Return the data point size for this DataBlocks2D array.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::size_type
-  getBlockSize() const;
-
-  /**
-     \brief
-     Resize the underlying data array. All current data is lost.
-     The new data elements are initialised to 0.
-
-     \param numRows - Input - Number of rows.
-     \param numCols - Input - Number of columns.
-     \param blockSize - Input - Number of elements per block.
-
-     All parameters must be >0, else an exception will be thrown.
-  */
-  ESCRIPT_DLL_API
-  void
-  resize(int numRows, int numCols, int blockSize);
-
-  /**
-     \brief
-     DataBlocks2D assignment operator =
-     Assign the given DataBlocks2D object to this one.
-  */
-  ESCRIPT_DLL_API
-  DataBlocks2D&
-  operator=(const DataBlocks2D& other);
-
-  /**
-     \brief
-     Swap all the values managed by the given DataBlocks2D objects.
-  */
-  ESCRIPT_DLL_API
-  void
-  Swap(DataBlocks2D& other);
-
-  /**
-    \brief
-    Return the 1 dimensional index of the first element for data-point (i,j)
-    within the underlying data array.
-    Provides an index for accessing this data value via the [] operator.
-    Subsequent elements of this data point can be accessed by manually
-    incrementing the returned index value.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::size_type
-  index(int row, int col) const;
-
-  /**
-    \brief
-    Return a reference to the first element for the data-point with index i
-    within the underlying data array as determined by the index(i,j) method.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::reference
-  operator[](ValueType::size_type i);
-
-  ESCRIPT_DLL_API
-  inline
-  ValueType::const_reference
-  operator[](ValueType::size_type i) const;
-
-  /**
-    \brief
-    Return a reference to the first element for the data-point (i,j).
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType::reference
-  operator()(int row, int col);
-
-  ESCRIPT_DLL_API
-  inline
-  ValueType::const_reference
-  operator()(int row, int col) const;
-
-  /**
-     \brief
-     Return a reference to the underlying data array.
-     Data returned is an array type object that can be indexed via indexes generated
-     by DataBlocks2D::index.
-  */
-  ESCRIPT_DLL_API
-  inline
-  ValueType&
-  getData();
-
-  ESCRIPT_DLL_API
-  inline
-  const ValueType&
-  getData() const;
-
-
- protected:
-
- private:
-
-  //
-  // The underlying array of data values.
-  // The two dimensional array of multi-dimensional data points is flattened
-  // and serialised within this one dimensional array of doubles.
-  ValueType m_data;
-
-  //
-  // The dimensions of the 2D array of data points.
-  ValueType::size_type m_numRows;
-  ValueType::size_type m_numCols; 
-
-  //
-  // The number of values per data point.
-  ValueType::size_type m_blockSize;
-
-};
-
-inline
-DataBlocks2D::ValueType::size_type
-DataBlocks2D::size() const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data.size();
-}
-
-inline
-DataBlocks2D::ValueType::size_type
-DataBlocks2D::getNumRows() const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_numRows;
-}
-
-inline
-DataBlocks2D::ValueType::size_type
-DataBlocks2D::getNumCols() const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_numCols;
-}
-
-inline
-DataBlocks2D::ValueType::size_type
-DataBlocks2D::getBlockSize() const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_blockSize;
-}
-
-inline
-DataBlocks2D::ValueType::size_type
-DataBlocks2D::index(int row, int col) const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    EsysAssert(((row >= 0) && (col >= 0) && (m_data.size() > 0)), "(DataBlocks2D) Index value out of range.");
-    ValueType::size_type temp=(row*m_numCols+col)*m_blockSize;
-    EsysAssert((temp <= (m_data.size()-m_blockSize)), "(DataBlocks2D) Index value out of range.");
-    return (temp);
-}
-
-inline
-DataBlocks2D::ValueType::reference
-DataBlocks2D::operator[](DataBlocks2D::ValueType::size_type i)
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data[i];
-}
-
-inline
-DataBlocks2D::ValueType::const_reference
-DataBlocks2D::operator[](DataBlocks2D::ValueType::size_type i) const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data[i];
-}
-
-inline
-DataBlocks2D::ValueType::reference
-DataBlocks2D::operator()(int row, int col)
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data[index(row,col)];
-}
-
-inline
-DataBlocks2D::ValueType::const_reference
-DataBlocks2D::operator()(int row, int col) const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data[index(row,col)];
-}
-
-inline
-DataBlocks2D::ValueType&
-DataBlocks2D::getData()
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data;
-}
-
-inline
-const DataBlocks2D::ValueType&
-DataBlocks2D::getData() const
-{
-    EsysAssert(((m_numRows >= 0) && (m_numCols >= 0) && (m_blockSize >= 0)), "(DataBlocks2D) Invalid object.");
-    return m_data;
-}
-
-} // end of namespace
-
-#endif
diff --git a/escriptcore/src/DataC.cpp b/escriptcore/src/DataC.cpp
deleted file mode 100644
index 3326f8a..0000000
--- a/escriptcore/src/DataC.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "DataC.h"
-
-#include "Data.h"
-#include "DataTypes.h"
-
-int getFunctionSpaceType(const escript::Data* data) 
-{
-  return data->getFunctionSpace().getTypeCode();
-}
-
-
-int isDataPointShapeEqual(const escript::Data* data, int rank, const int* dimensions)
-{
-  if (data == 0) {
-       return 1;
-  } else {
-     return data->isDataPointShapeEqual(rank, dimensions);
-  }
-}
-
-int getNumDataPointsPerSample(const escript::Data* data) 
-{
-  if (data == 0) {
-       return 0;
-  } else {
-     if (data->isEmpty()) {
-        return 0;
-     } else {
-          return (data->getNumDataPointsPerSample());
-     }
-  }
-}
-
-int numSamplesEqual(const escript::Data* data, int numDataPointsPerSample,
-                    dim_t numSamples)
-{
-  if (data == 0) {
-     return 1;
-  } else {
-     return data->numSamplesEqual(numDataPointsPerSample, numSamples);
-  }
-}
-
-int getDataPointRank(const escript::Data* data)
-{
-  if (data == (const escript::Data*)0) {
-       return 0;
-  } else {
-       return data->getDataPointRank();
-  }
-}
-
-int getDataPointShape(const escript::Data* data,int i)
-{
-  if (data == 0) {
-       return 0;
-  } else {
-     int rank = data->getDataPointRank();
-     if (i<0 || i>=rank) {
-        return 1;
-     } else {
-        const escript::DataTypes::ShapeType& view=data->getDataPointShape();
-        return view[i];
-     }
-  }
-}
-
-int getDataPointSize(const escript::Data* data)
-{
-  return data->getDataPointSize();
-}
-
-int getLength(const escript::Data* data)
-{
-  return data->getLength();
-}
-
-int isExpanded(const escript::Data* data)
-{
-  if (data == 0) {
-       return false;
-  } else {
-     if (data->isEmpty()) {
-        return false;
-     } else {
-        return data->actsExpanded();
-     }
-  }
-}
-
-int isEmpty(const escript::Data* data) 
-{
-  if (data == 0) {
-       return true;
-  } else {
-      return data->isEmpty();
-  }
-}
-
-double const* getSampleDataRO(const escript::Data* data, int sampleNo)
-{
-  if (data == 0) {
-       return NULL;
-  } else {
-     if (data->isEmpty()) {
-        return NULL;
-     } else {
-        return data->getSampleDataRO(sampleNo);
-     }
-  }
-}
-
-double* getSampleDataRW(escript::Data* data, int sampleNo)
-{
-  if (data == 0) {
-       return NULL;
-  } else {
-     if (data->isEmpty()) {
-        return NULL;
-     } else {
-        return data->getSampleDataRW(sampleNo);
-     }
-  }
-}
-
-const double* getSampleDataROFast(const escript::Data* data, int sampleNo)
-{
-  return data->getSampleDataRO(sampleNo);
-}
-
-double* getSampleDataRWFast(escript::Data* data, int sampleNo)
-{
-  return data->getSampleDataRW(sampleNo);
-}
-
-double* getDataRW(escript::Data* data)
-{
-  
-  if (data->getNumSamples()>0)
-  {
-     requireWrite(data);
-     return getSampleDataRWFast(data,0);
-  }
-  return 0;
-}
-
-
-void requireWrite(escript::Data* data)
-{
-  if (data == 0) {
-       return;
-  } else {
-      data->requireWrite();
-  }
-}
diff --git a/escriptcore/src/DataC.h b/escriptcore/src/DataC.h
deleted file mode 100644
index 39600ef..0000000
--- a/escriptcore/src/DataC.h
+++ /dev/null
@@ -1,172 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  escript_DataC_20040611_H
-#define escript_DataC_20040611_H
-#include "system_dep.h"
-
-#include "Data.h"
-
-
-
-/**
-   \brief
-   Return the function space type code.
-   \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API int getFunctionSpaceType(const escript::Data* data);
-
-/**
-   \brief
-   sets the int variable _FS to the function space type of _DATA if the data
-   object is not empty.
-   \param _FS Input/Output - variable to be updated.
-   \param _DATA Input - C wrapper for Data.
-*/
-#define updateFunctionSpaceType(_FS,_DATA) _FS=(isEmpty(_DATA) ? _FS : getFunctionSpaceType(_DATA))
-                                                                                     
-/**
-   \brief
-   returns true if the function space type of _DATA is equal to _FS or is empty
-   \param _FS Input - function space type to checked against
-   \param _DATA Input - C wrapper for Data.
-*/
-#define functionSpaceTypeEqual(_FS,_DATA) ( (isEmpty(_DATA) || _FS==getFunctionSpaceType(_DATA)) ) ? 1 : 0
-
-/**
-   \brief
-   Returns the true if the data are empty or data is NULL.
-   \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API int isEmpty(const escript::Data* data);
-
-/**
-   \brief
-   Return true if the input shape matches the data point shape for data
-   \param data Input - C wrapper for Data.
-   \param rank Input - number of dimensions.
-   \param dimensions Input - 
-*/
-ESCRIPT_DLL_API int isDataPointShapeEqual(const escript::Data* data, int rank, const int* dimensions);
-/**
-   \brief
-   Return true if the number of data points per sample and the number 
-   of samples equal the input values. In the case that data is empty or NULL,
-   true is returned.
-   \param data Input - C wrapper for Data.
-   \param numDataPointsPerSample Input - number of data points per sample
-   \param numSamples Input - number of samples
-*/
-ESCRIPT_DLL_API int numSamplesEqual(const escript::Data* data, int numDataPointsPerSample,
-		    dim_t numSamples);
-
-/**
-   \brief
-   Returns the number of data points per sample
-   \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API int getNumDataPointsPerSample(const escript::Data* data);
-
-/**
-   \brief
-   Returns the rank of the point data for the data. 
-   \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API int getDataPointRank(const escript::Data* data);
-
-/**
-   \brief
-   Returns the value of the i-th component of the shape of the point data.
-   \param data Input - C wrapper for Data.
-   \param i Input - index of shape component.
-*/
-ESCRIPT_DLL_API int getDataPointShape(const escript::Data* data, int i);
-
-/**
-   \brief
-   Return the number of doubles needed for each data point.
-   \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API int getDataPointSize(const escript::Data* data);
-
-/**
-   \brief
-   Return true if data can be treated as expanded.
-   
-   Argument data may be NULL, in which case false is returnd.
-   \param data Input - C wrapper for Data.
-   \return true if data is expanded or the data is lazy but would resolve to expanded. False otherwise.
-*/
-ESCRIPT_DLL_API int isExpanded(const escript::Data* data);
-
-/**
-   \brief
-   Return a pointer to the data for the given sample number.
-   if data is empty NULL is returned.
-   data may be NULL, in which case NULL is returnd.
-  \param data Input - C wrapper for Data.
-  \param sampleNo Input - The sample number.
-
-*/
-ESCRIPT_DLL_API double const * getSampleDataRO(const escript::Data* data, int sampleNo);
-/* Placement of __const might be important. See .cpp */
-
-
-ESCRIPT_DLL_API double* getSampleDataRW(escript::Data* data, int sampleNo);
-
-
-/**
-   \brief
-   Return a pointer to the data for the given sample number.
-   Fast version of getSampledataRO: does no error checking.
-  \param data Input - C wrapper for Data.
-  \param sampleNo Input - The sample number.
-
-*/
-ESCRIPT_DLL_API double const* getSampleDataROFast(const escript::Data* data, int sampleNo);
-
-/**
-   \brief
-   Return a pointer to the data for the given sample number.
-   Fast version of getSampledataRW: does no error checking.
-  \param data Input - C wrapper for Data.
-  \param sampleNo Input - The sample number.
-*/
-ESCRIPT_DLL_API double* getSampleDataRWFast(escript::Data* data, int sampleNo);
-
-
-/**
-   \brief
-   Return getSampleDataRWFast(escriptDataC* data, 0) if there are samples.
-   if not, returns NULL.
-   \warning This function calls requireWrite if there are samples so do not use in parallel sections.
-   \warning Please do not use this in new code.
-  \param data Input - C wrapper for Data.
-*/
-ESCRIPT_DLL_API double* getDataRW(escript::Data* data);
-
-
-/**
-   \brief Ensure that this object is ready for writing.
-   It will be resolved and copied if it is currently shared.
-   Use only in single threaded sections of code.
-   Do not create new Data objects based on this one between this call and 
-   writing to the object.
-*/
-ESCRIPT_DLL_API void requireWrite(escript::Data* data);
-
-#endif
diff --git a/escriptcore/src/DataConstant.cpp b/escriptcore/src/DataConstant.cpp
index a66c6cf..5efbbf6 100644
--- a/escriptcore/src/DataConstant.cpp
+++ b/escriptcore/src/DataConstant.cpp
@@ -14,28 +14,38 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Data.h"
 #include "DataConstant.h"
+#include "Data.h"
 #include "DataException.h"
-#include "esysUtils/EsysAssert.h"
-#include "esysUtils/Esys_MPI.h"
+#include "DataVectorOps.h"
 
 #include <iostream>
-#include <boost/python/extract.hpp>
-#include <boost/scoped_ptr.hpp>
-#ifdef USE_NETCDF
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#include "DataMaths.h"
-
-// #define CHECK_FOR_EX_WRITE if (!checkNoSharing()) {throw DataException("Attempt to modify shared object");}
-
-#define CHECK_FOR_EX_WRITE if (!checkNoSharing()) {std::ostringstream ss; ss << " Attempt to modify shared object. line " << __LINE__ << " of " << __FILE__; ss << m_owners.size(); cerr << ss << endl; /* *((int*)0)=17; */throw DataException(ss.str());}
 
+#ifdef SLOWSHARECHECK
+  #define CHECK_FOR_EX_WRITE if (isShared()) {\
+    std::ostringstream ss;\
+    ss << "Attempt to modify shared object. line " << __LINE__ << " of " << __FILE__;\
+    int nn=17;\
+    try\
+    {\
+	nn=shared_from_this().use_count();\
+	ss << "use count=" << nn << "\n";\
+    } catch (...)\
+    {\
+	ss << "Failed to get a use count\n";\
+    }\
+    std::cerr << ss.str() << std::endl;\
+    throw DataException(ss.str());}
+#else
+  #define CHECK_FOR_EX_WRITE 
+#endif
+    
+    
 using namespace std;
 using namespace boost::python;
 
@@ -45,116 +55,211 @@ DataConstant::DataConstant(const WrappedArray& value,
                            const FunctionSpace& what)
   : parent(what,value.getShape())
 {
-  m_data.copyFromArray(value,1);
+  if (value.isComplex())
+  {
+      m_data_c.copyFromArray(value,1);
+      this->m_iscompl=true;
+  }
+  else
+  {
+      m_data_r.copyFromArray(value,1);
+  }
 }
 
 DataConstant::DataConstant(const DataConstant& other)
   : parent(other.getFunctionSpace(),other.getShape())
-{ 
-  m_data=other.m_data;
+{
+  this->m_iscompl=other.m_iscompl;
+  if (other.isComplex()) 
+  {
+      m_data_c=other.m_data_c;
+  }
+  else
+  {
+      m_data_r=other.m_data_r;
+  }
 }
 
 DataConstant::DataConstant(const DataConstant& other,
                            const DataTypes::RegionType& region)
   : parent(other.getFunctionSpace(),DataTypes::getResultSliceShape(region))
 {
-  //
-  // allocate space for this new DataConstant's data
-  int len = getNoValues();
-  m_data.resize(len,0.,len);
-  //
-  // create a view of the data with the correct shape
-  DataTypes::RegionLoopRangeType region_loop_range=DataTypes::getSliceRegionLoopRange(region);
-  //
-  // load the view with the data from the slice
-  DataTypes::copySlice(m_data,getShape(),0,other.getVectorRO(),other.getShape(),0,region_loop_range);
+    // create a view of the data with the correct shape
+    DataTypes::RegionLoopRangeType region_loop_range=DataTypes::getSliceRegionLoopRange(region);
+    int len = getNoValues();
+    if (other.isComplex())
+    {
+        // allocate space for this new DataConstant's data
+        m_data_c.resize(len,0.,len);
+        // load the view with the data from the slice
+        DataTypes::copySlice(m_data_c,getShape(),0,other.getVectorROC(),other.getShape(),0,region_loop_range);
+	m_iscompl=true; 	
+    }
+    else
+    {
+        // allocate space for this new DataConstant's data
+        m_data_r.resize(len,0.,len);
+        // load the view with the data from the slice
+        DataTypes::copySlice(m_data_r,getShape(),0,other.getVectorRO(),other.getShape(),0,region_loop_range);
+	m_iscompl=false;
+    }
 }
 
 DataConstant::DataConstant(const FunctionSpace& what,
                            const DataTypes::ShapeType &shape,
-                           const DataTypes::ValueType &data)
+                           const DataTypes::RealVectorType &data)
   : parent(what,shape)
 {
-  //
-  // copy the data in the correct format
-  m_data=data;
+    // copy the data in the correct format
+    m_data_r=data;
+}
+
+DataConstant::DataConstant(const FunctionSpace& what,
+                           const DataTypes::ShapeType &shape,
+                           const DataTypes::CplxVectorType &data)
+  : parent(what,shape)
+{
+    // copy the data in the correct format
+    m_data_c=data;
+    m_iscompl=true;        
 }
 
 DataConstant::DataConstant(const FunctionSpace& what,
                            const DataTypes::ShapeType &shape,
-                           const double v)
-  : parent(what,shape), m_data(DataTypes::noValues(shape),v)
+                           const DataTypes::real_t v)
+  : parent(what,shape), m_data_r(DataTypes::noValues(shape),v)
 {
 }
 
+DataConstant::DataConstant(const FunctionSpace& what,
+                           const DataTypes::ShapeType &shape,
+                           const DataTypes::cplx_t v)
+  : parent(what,shape), m_data_c(DataTypes::noValues(shape),v)
+{
+    m_iscompl=true;
+}
 
-bool
-DataConstant::hasNaN() const
+bool DataConstant::hasNaN() const
 {
-  bool haveNaN=false;
-  #pragma omp parallel for
-	for (ValueType::size_type i=0;i<m_data.size();++i)
-	{
-		if (nancheck(m_data[i]))	
-		{
-		    #pragma omp critical 
+    bool haveNaN=false;
+    if (isComplex())
+    {
+        #pragma omp parallel for
+        for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+        {
+            if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))
+            {
+                #pragma omp critical 
+                {
+                    haveNaN=true;
+                }
+            }
+        }
+    }
+    else
+    {
+        #pragma omp parallel for
+        for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
         {
-            haveNaN=true;
+            if (std::isnan(m_data_r[i]))
+            {
+                #pragma omp critical 
+                {
+                    haveNaN=true;
+                }
+            }
         }
-		}
-	}
-	return haveNaN;
+    }
+    return haveNaN;
 }
 
 void
 DataConstant::replaceNaN(double value)
 {
-  #pragma omp parallel for
-  for (ValueType::size_type i=0;i<m_data.size();++i)
-  {
-    if (nancheck(m_data[i]))  
+    CHECK_FOR_EX_WRITE  
+    if (isComplex())
     {
-      m_data[i] = value;
-    } 
-  }
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+        if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))  
+        {
+          m_data_c[i] = value;
+        }
+      }
+    }
+    else
+    {
+      #pragma omp parallel for
+      for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
+      {
+        if (std::isnan(m_data_r[i]))  
+        {
+          m_data_r[i] = value;
+        }
+      }    
+    }
+}
+
+void
+DataConstant::replaceNaN(DataTypes::cplx_t value)
+{
+    CHECK_FOR_EX_WRITE  
+    if (isComplex())
+    {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+        if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag())) 
+        {
+          m_data_c[i] = value;
+        }
+      }
+    }
+    else
+    {
+      complicate();
+      replaceNaN(value);
+    }
 }
 
-string
-DataConstant::toString() const
+string DataConstant::toString() const
 {
-  return DataTypes::pointToString(m_data,getShape(),0,"");
+    if (isComplex())
+        return DataTypes::pointToString(m_data_c,getShape(),0,"");
+
+    return DataTypes::pointToString(m_data_r,getShape(),0,"");
 }
 
 
-DataAbstract*
-DataConstant::deepCopy()
+DataAbstract* DataConstant::deepCopy() const
 {
-  return new DataConstant(*this);
+    return new DataConstant(*this);
 }
 
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataConstant::getPointOffset(int sampleNo,
                              int dataPointNo) const
 {
 // We avoid this check for constant data due to issues manipulating 
 // data with no samples.
 
-//  EsysAssert((validSamplePointNo(dataPointNo) && validSampleNo(sampleNo)),
+//  ESYS_ASSERT((validSamplePointNo(dataPointNo) && validSampleNo(sampleNo)),
 //              "Invalid index, sampleNo: " << sampleNo << " dataPointNo: " << dataPointNo);
   //
   // Whatever the coord's always return the same value as this is constant data.
   return 0;
 }
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataConstant::getPointOffset(int sampleNo,
                              int dataPointNo)
 {
 // We avoid this check for constant data due to issues manipulating 
 // data with no samples.
  
-//  EsysAssert((validSamplePointNo(dataPointNo) && validSampleNo(sampleNo)),
+//  ESYS_ASSERT((validSamplePointNo(dataPointNo) && validSampleNo(sampleNo)),
 //              "Invalid index, sampleNo: " << sampleNo << " dataPointNo: " << dataPointNo);
   //
   // Whatever the coord's always return the same value as this is constant data.
@@ -163,10 +268,10 @@ DataConstant::getPointOffset(int sampleNo,
   return 0;
 }
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataConstant::getLength() const
 {
-  return m_data.size();
+    return std::max(m_data_c.size(), m_data_r.size());  
 }
 
 DataAbstract*
@@ -196,8 +301,14 @@ DataConstant::setSlice(const DataAbstract* value,
     throw DataException (DataTypes::createShapeErrorMessage(
                 "Error - Couldn't copy slice due to shape mismatch.",shape,value->getShape()));
   }
-  //   getPointDataView().copySliceFrom(tempDataConst->getPointDataView(),region_loop_range);
-  DataTypes::copySliceFrom(m_data,getShape(),0,tempDataConst->getVectorRO(), tempDataConst->getShape(),0,region_loop_range);
+  if (value->isComplex())
+  {
+      DataTypes::copySliceFrom(m_data_c,getShape(),0,tempDataConst->getVectorROC(), tempDataConst->getShape(),0,region_loop_range);
+  }
+  else
+  {
+      DataTypes::copySliceFrom(m_data_r,getShape(),0,tempDataConst->getVectorRO(), tempDataConst->getShape(),0,region_loop_range);
+  }
 }
 
 
@@ -209,29 +320,78 @@ DataConstant::symmetric(DataAbstract* ev)
   if (temp_ev==0) {
     throw DataException("Error - DataConstant::symmetric: casting to DataConstant failed (probably a programming error).");
   }
-  DataMaths::symmetric(m_data,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  if (isComplex())
+  {
+      escript::symmetric(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0);
+  }
+  else
+  {
+      escript::symmetric(m_data_r,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  }
 }
 
 void
-DataConstant::nonsymmetric(DataAbstract* ev)
+DataConstant::antisymmetric(DataAbstract* ev)
 {
   DataConstant* temp_ev=dynamic_cast<DataConstant*>(ev);
   if (temp_ev==0) {
-    throw DataException("Error - DataConstant::nonsymmetric: casting to DataConstant failed (probably a programming error).");
+    throw DataException("Error - DataConstant::antisymmetric: casting to DataConstant failed (probably a programming error).");
+  }
+  if (isComplex())
+  {
+      escript::antisymmetric(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0);
   }
-  DataMaths::nonsymmetric(m_data,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  else
+  {
+      escript::antisymmetric(m_data_r,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  }  
 }
 
 void
+DataConstant::hermitian(DataAbstract* ev)
+{
+  DataConstant* temp_ev=dynamic_cast<DataConstant*>(ev);
+  if (temp_ev==0) {
+    throw DataException("Error - DataConstant::hermitian: casting to DataConstant failed (probably a programming error).");
+  }
+  if (!isComplex() || !temp_ev->isComplex())
+  {
+      throw DataException("DataTagged::hermitian: do not call this method with real data");
+  }  
+  escript::hermitian(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0);
+}
+
+void
+DataConstant::antihermitian(DataAbstract* ev)
+{
+  DataConstant* temp_ev=dynamic_cast<DataConstant*>(ev);
+  if (temp_ev==0) {
+    throw DataException("Error - DataConstant::antihermitian: casting to DataConstant failed (probably a programming error).");
+  }
+  if (!isComplex() || !temp_ev->isComplex())
+  {
+      throw DataException("DataTagged::antihermitian: do not call this method with real data");
+  }  
+  escript::antihermitian(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0);
+}
+
+
+void
 DataConstant::trace(DataAbstract* ev, int axis_offset)
 {
   DataConstant* temp_ev=dynamic_cast<DataConstant*>(ev);
   if (temp_ev==0) {
     throw DataException("Error - DataConstant::trace: casting to DataConstant failed (probably a programming error).");
   }
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  DataMaths::trace(m_data,getShape(),0,evVec,evShape,0,axis_offset);
+  if (isComplex())
+  {
+      escript::trace(m_data_c,getShape(),0,temp_ev->getVectorRWC(),evShape,0,axis_offset);
+  }
+  else
+  {
+      escript::trace(m_data_r,getShape(),0,temp_ev->getVectorRW(),evShape,0,axis_offset);    
+  }
 }
 
 void
@@ -241,7 +401,14 @@ DataConstant::swapaxes(DataAbstract* ev, int axis0, int axis1)
   if (temp_ev==0) {
     throw DataException("Error - DataConstant::swapaxes: casting to DataConstant failed (probably a programming error).");
   }
-  DataMaths::swapaxes(m_data,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0,axis0,axis1);
+  if (isComplex())
+  {
+      escript::swapaxes(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0,axis0,axis1);
+  }
+  else
+  {
+      escript::swapaxes(m_data_r,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0,axis0,axis1);
+  }
 }
 
 void
@@ -251,7 +418,14 @@ DataConstant::transpose(DataAbstract* ev, int axis_offset)
   if (temp_ev==0) {
     throw DataException("Error - DataConstant::transpose: casting to DataConstant failed (probably a programming error).");
   }
-  DataMaths::transpose(m_data, getShape(),0, temp_ev->getVectorRW(),temp_ev->getShape(),0,axis_offset);
+  if (isComplex())
+  {
+      escript::transpose(m_data_c, getShape(),0, temp_ev->getVectorRWC(),temp_ev->getShape(),0,axis_offset);
+  }
+  else
+  {
+      escript::transpose(m_data_r, getShape(),0, temp_ev->getVectorRW(),temp_ev->getShape(),0,axis_offset);
+  }
 }
 
 void
@@ -261,8 +435,16 @@ DataConstant::eigenvalues(DataAbstract* ev)
   if (temp_ev==0) {
     throw DataException("Error - DataConstant::eigenvalues: casting to DataConstant failed (probably a programming error).");
   }
-  DataMaths::eigenvalues(m_data,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  if (isComplex())
+  {
+      escript::eigenvalues(m_data_c,getShape(),0,temp_ev->getVectorRWC(), temp_ev->getShape(),0);    
+  }
+  else
+  {
+      escript::eigenvalues(m_data_r,getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0);
+  }
 }
+
 void
 DataConstant::eigenvalues_and_eigenvectors(DataAbstract* ev,DataAbstract* V,const double tol)
 {
@@ -274,7 +456,7 @@ DataConstant::eigenvalues_and_eigenvectors(DataAbstract* ev,DataAbstract* V,cons
   if (temp_V==0) {
     throw DataException("Error - DataConstant::eigenvalues_and_eigenvectors: casting to DataConstant failed (probably a programming error).");
   }
-  DataMaths::eigenvalues_and_eigenvectors(m_data, getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0,temp_V->getVectorRW(), temp_V->getShape(),0,tol);
+  escript::eigenvalues_and_eigenvectors(m_data_r, getShape(),0,temp_ev->getVectorRW(), temp_ev->getShape(),0,temp_V->getVectorRW(), temp_V->getShape(),0,tol);
 }
 
 
@@ -285,14 +467,14 @@ DataConstant::matrixInverse(DataAbstract* out) const
   DataConstant* temp=dynamic_cast<DataConstant*>(out);
   if (temp==0)
   {
-	throw DataException("Error - DataConstant::matrixInverse: casting to DataConstant failed (probably a programming error).");
+        throw DataException("Error - DataConstant::matrixInverse: casting to DataConstant failed (probably a programming error).");
   }
   if (getRank()!=2)
   {
-	throw DataException("Error - DataExpanded::matrixInverse: input must be rank 2.");
+        throw DataException("Error - DataExpanded::matrixInverse: input must be rank 2.");
   }
   LapackInverseHelper h(getShape()[0]);
-  int res=DataMaths::matrix_inverse(m_data, getShape(), 0, temp->getVectorRW(), temp->getShape(), 0, 1, h);
+  int res=escript::matrix_inverse(m_data_r, getShape(), 0, temp->getVectorRW(), temp->getShape(), 0, 1, h);
   return res;
 }
 
@@ -300,78 +482,77 @@ void
 DataConstant::setToZero()
 {
     CHECK_FOR_EX_WRITE
-    DataTypes::ValueType::size_type n=m_data.size();
-    for (int i=0; i<n ;++i) m_data[i]=0.;
+    DataTypes::RealVectorType::size_type n=m_data_r.size();
+    for (int i=0; i<n ;++i) m_data_r[i]=0.;
 }
 
 void
 DataConstant::dump(const std::string fileName) const
 {
-   #ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
    const NcDim* ncdims[DataTypes::maxRank];
    NcVar* var;
    int rank = getRank();
    int type=  getFunctionSpace().getTypeCode();
    int ndims =0;
    long dims[DataTypes::maxRank];
-   const double* d_ptr=&(m_data[0]);
+   const double* d_ptr=&(m_data_r[0]);
    DataTypes::ShapeType shape = getShape();
-   int mpi_iam=getFunctionSpace().getDomain()->getMPIRank();
-   int mpi_num=getFunctionSpace().getDomain()->getMPISize();
+   JMPI mpiInfo(getFunctionSpace().getDomain()->getMPI());
 #ifdef ESYS_MPI
+   const int mpi_iam = mpiInfo->rank;
+   const int mpi_num = mpiInfo->size;
    MPI_Status status;
-#endif
 
-#ifdef ESYS_MPI
    /* Serialize NetCDF I/O */
-   if (mpi_iam>0) MPI_Recv(&ndims, 0, MPI_INT, mpi_iam-1, 81802, MPI_COMM_WORLD, &status);
+   if (mpi_iam > 0)
+       MPI_Recv(&ndims, 0, MPI_INT, mpi_iam-1, 81802, mpiInfo->comm, &status);
 #endif
 
    // netCDF error handler
    NcError err(NcError::verbose_nonfatal);
    // Create the file.
-   const std::string newFileName(esysUtils::appendRankToFileName(fileName,
-                                                            mpi_num, mpi_iam));
+   const std::string newFileName(mpiInfo->appendRankToFileName(fileName));
    NcFile dataFile(newFileName.c_str(), NcFile::Replace);
    // check if writing was successful
    if (!dataFile.is_valid())
-	throw DataException("Error - DataConstant:: opening of netCDF file for output failed.");
+        throw DataException("Error - DataConstant:: opening of netCDF file for output failed.");
    if (!dataFile.add_att("type_id",0) )
-	throw DataException("Error - DataConstant:: appending data type to netCDF file failed.");
+        throw DataException("Error - DataConstant:: appending data type to netCDF file failed.");
    if (!dataFile.add_att("rank",rank) )
-	throw DataException("Error - DataConstant:: appending rank attribute to netCDF file failed.");
+        throw DataException("Error - DataConstant:: appending rank attribute to netCDF file failed.");
    if (!dataFile.add_att("function_space_type",type))
-	throw DataException("Error - DataConstant:: appending function space attribute to netCDF file failed.");
+        throw DataException("Error - DataConstant:: appending function space attribute to netCDF file failed.");
 
    if (rank == 0) {
       if( ! (ncdims[0] = dataFile.add_dim("l", 1)) )
-		throw DataException("Error - DataConstant:: appending ncdimension 0 to netCDF file failed.");
+                throw DataException("Error - DataConstant:: appending ncdimension 0 to netCDF file failed.");
       dims[0]=1,
       ndims=1;
    } else {
        ndims=rank;
        dims[0]=shape[0];
        if (! (ncdims[0] = dataFile.add_dim("d0",shape[0])) )
-		throw DataException("Error - DataConstant:: appending ncdimension 0 to netCDF file failed.");
+                throw DataException("Error - DataConstant:: appending ncdimension 0 to netCDF file failed.");
        if ( rank >1 ) {
            dims[1]=shape[1];
            if (! (ncdims[1] = dataFile.add_dim("d1",shape[1])) )
-		throw DataException("Error - DataConstant:: appending ncdimension 1 to netCDF file failed.");
+                throw DataException("Error - DataConstant:: appending ncdimension 1 to netCDF file failed.");
        }
        if ( rank >2 ) {
            dims[2]=shape[2];
            if (! (ncdims[2] = dataFile.add_dim("d2", shape[2])) )
-		throw DataException("Error - DataConstant:: appending ncdimension 2 to netCDF file failed.");
+                throw DataException("Error - DataConstant:: appending ncdimension 2 to netCDF file failed.");
        }
        if ( rank >3 ) {
            dims[3]=shape[3];
            if (! (ncdims[3] = dataFile.add_dim("d3", shape[3])) )
-		throw DataException("Error - DataConstant:: appending ncdimension 3 to netCDF file failed.");
+                throw DataException("Error - DataConstant:: appending ncdimension 3 to netCDF file failed.");
        }
    }
 
    if (! ( var = dataFile.add_var("data", ncDouble, ndims, ncdims)) )
-	throw DataException("Error - DataConstant:: appending variable to netCDF file failed.");
+        throw DataException("Error - DataConstant:: appending variable to netCDF file failed.");
    if (! (var->put(d_ptr,dims)) )
          throw DataException("Error - DataConstant:: copy data to netCDF buffer failed.");
 #ifdef ESYS_MPI
@@ -384,17 +565,67 @@ DataConstant::dump(const std::string fileName) const
 
 // These used to be marked as inline in DataConstant.
 // But they are marked virtual in DataReady
-DataTypes::ValueType&
+DataTypes::RealVectorType&
 DataConstant::getVectorRW()
 {
-  CHECK_FOR_EX_WRITE
-  return m_data;
+    CHECK_FOR_EX_WRITE
+    return m_data_r;
 }
 
-const DataTypes::ValueType&
+const DataTypes::RealVectorType&
 DataConstant::getVectorRO() const
 {
-  return m_data;
+    return m_data_r;
+}
+
+DataTypes::CplxVectorType&
+DataConstant::getVectorRWC()
+{
+    CHECK_FOR_EX_WRITE
+    return m_data_c;
+}
+
+const DataTypes::CplxVectorType&
+DataConstant::getVectorROC() const
+{
+    return m_data_c;
+}
+
+DataTypes::RealVectorType&
+DataConstant::getTypedVectorRW(DataTypes::real_t dummy)
+{
+    CHECK_FOR_EX_WRITE
+    return m_data_r;
+}
+
+const DataTypes::RealVectorType&
+DataConstant::getTypedVectorRO(DataTypes::real_t dummy) const
+{
+    return m_data_r;
+}
+
+DataTypes::CplxVectorType&
+DataConstant::getTypedVectorRW(DataTypes::cplx_t dummy)
+{
+    CHECK_FOR_EX_WRITE
+    return m_data_c;
+}
+
+const DataTypes::CplxVectorType&
+DataConstant::getTypedVectorRO(DataTypes::cplx_t dummy) const
+{
+    return m_data_c;
+}
+
+void DataConstant::complicate()
+{
+    if (!isComplex())
+    {
+        fillComplexFromReal(m_data_r, m_data_c);
+        this->m_iscompl=true;
+        m_data_r.resize(0,0,1);
+    }
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/DataConstant.h b/escriptcore/src/DataConstant.h
index e39fc1b..95305dc 100644
--- a/escriptcore/src/DataConstant.h
+++ b/escriptcore/src/DataConstant.h
@@ -49,7 +49,7 @@ typedef DataReady parent;
      \param what - Input - A description of what this data object represents.
   */
   ESCRIPT_DLL_API
-  DataConstant(const WrappedArray& value,
+  explicit DataConstant(const WrappedArray& value,
                const FunctionSpace& what);
 
 
@@ -71,7 +71,7 @@ typedef DataReady parent;
      \param region - Input - region to copy.
   */
   ESCRIPT_DLL_API
-  DataConstant(const DataConstant& other,
+  explicit DataConstant(const DataConstant& other,
                const DataTypes::RegionType& region);
 
   /**
@@ -85,16 +85,23 @@ typedef DataReady parent;
      \param data - the data values for each data-point.
   */
   ESCRIPT_DLL_API
-  DataConstant(const FunctionSpace& what,
+  explicit DataConstant(const FunctionSpace& what,
                const DataTypes::ShapeType &shape,
-               const DataTypes::ValueType &data);
+               const DataTypes::RealVectorType &data);
+  
+  explicit DataConstant(const FunctionSpace& what,
+               const DataTypes::ShapeType &shape,
+               const DataTypes::CplxVectorType &data);    
 
   ESCRIPT_DLL_API
-  DataConstant(const FunctionSpace& what,
+  explicit DataConstant(const FunctionSpace& what,
+                           const DataTypes::ShapeType &shape,
+                           const DataTypes::real_t v);
+               
+  explicit DataConstant(const FunctionSpace& what,
                            const DataTypes::ShapeType &shape,
-                           const double v);
-	       
-	       
+                           const DataTypes::cplx_t v);
+               
   ESCRIPT_DLL_API
   bool
   isConstant() const 
@@ -114,7 +121,11 @@ typedef DataReady parent;
   */
   ESCRIPT_DLL_API
   void
-  replaceNaN(double value);
+  replaceNaN(DataTypes::real_t value);
+
+  ESCRIPT_DLL_API
+  void
+  replaceNaN(DataTypes::cplx_t value);
 
   /**
      \brief
@@ -130,7 +141,7 @@ typedef DataReady parent;
   ESCRIPT_DLL_API
   virtual
   DataAbstract*
-  deepCopy();
+  deepCopy() const;
 
 
  /**
@@ -162,13 +173,13 @@ typedef DataReady parent;
    */
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo) const;
 
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo);
 
@@ -178,7 +189,7 @@ typedef DataReady parent;
   */
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
   /**
@@ -226,7 +237,29 @@ typedef DataReady parent;
   */
   ESCRIPT_DLL_API
   virtual void
-  nonsymmetric(DataAbstract* ev);
+  antisymmetric(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an hermitian matrix (A + A*) / 2
+
+     \param ev - Output - hermitian matrix
+
+  */
+  ESCRIPT_DLL_API
+  virtual void
+  hermitian(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an anti-hermitian matrix (A - A*) / 2
+
+     \param ev - Output - antihermitian matrix
+
+  */
+  ESCRIPT_DLL_API
+  virtual void
+  antihermitian(DataAbstract* ev);
 
   /**
      \brief
@@ -305,20 +338,57 @@ typedef DataReady parent;
      Return a reference to the underlying DataVector.
   */
   ESCRIPT_DLL_API
-  DataTypes::ValueType&
+  DataTypes::RealVectorType&
   getVectorRW();
 
 
   ESCRIPT_DLL_API
-  const DataTypes::ValueType&
+  const DataTypes::RealVectorType&
   getVectorRO() const;
 
+  ESCRIPT_DLL_API
+  DataTypes::CplxVectorType&
+  getVectorRWC();
+
+
+  ESCRIPT_DLL_API
+  const DataTypes::CplxVectorType&
+  getVectorROC() const;  
+
+
+
+  ESCRIPT_DLL_API
+  virtual DataTypes::RealVectorType&
+  getTypedVectorRW(DataTypes::real_t dummy);  
+  
+  ESCRIPT_DLL_API
+  virtual const DataTypes::RealVectorType&
+  getTypedVectorRO(DataTypes::real_t dummy) const;
+
+  ESCRIPT_DLL_API
+  virtual DataTypes::CplxVectorType&
+  getTypedVectorRW(DataTypes::cplx_t dummy);
+  
+  ESCRIPT_DLL_API
+  virtual const DataTypes::CplxVectorType&
+  getTypedVectorRO(DataTypes::cplx_t dummy) const;  
+
+
+
+  
+  /**
+   * \brief Convert from real data to complex data.
+  */ 
+  ESCRIPT_DLL_API
+  void complicate();
+
  protected:
 
  private:
   //
   // the actual data
-  DataTypes::ValueType m_data;
+  DataTypes::RealVectorType m_data_r;
+  DataTypes::CplxVectorType m_data_c;
 
 };
 
diff --git a/escriptcore/src/DataEmpty.cpp b/escriptcore/src/DataEmpty.cpp
index d032e6b..8e4b144 100644
--- a/escriptcore/src/DataEmpty.cpp
+++ b/escriptcore/src/DataEmpty.cpp
@@ -14,16 +14,12 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "DataEmpty.h"
 #include "DataException.h"
 
 
 namespace {
 
-
   inline
   void
   throwStandardException(const std::string& functionName)
@@ -31,9 +27,8 @@ namespace {
     throw escript::DataException("Error - "+functionName+" function call invalid for DataEmpty.");
   }
 
-
-  escript::DataTypes::ValueType dummy;	
-
+  escript::DataTypes::RealVectorType dummy;	
+  escript::DataTypes::CplxVectorType dummyc;	
 }
 
 namespace escript {
@@ -56,12 +51,12 @@ DataEmpty::toString() const
 
 
 DataAbstract*
-DataEmpty::deepCopy()
+DataEmpty::deepCopy() const
 {
   return new DataEmpty();
 }
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataEmpty::getPointOffset(int sampleNo,
                           int dataPointNo) const 
 {
@@ -69,7 +64,7 @@ DataEmpty::getPointOffset(int sampleNo,
   return 0;
 }
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataEmpty::getPointOffset(int sampleNo,
                           int dataPointNo)
 {
@@ -77,7 +72,7 @@ DataEmpty::getPointOffset(int sampleNo,
   return 0;
 }
 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataEmpty::getLength() const
 {
   return 0;
@@ -105,14 +100,14 @@ DataEmpty::matrixInverse(DataAbstract* out) const
 }
 
 
-DataTypes::ValueType&
+DataTypes::RealVectorType&
 DataEmpty::getVectorRW()
 {
   throwStandardException("getVector");	// always throws but the compiler doesn't know that.
   return dummy;			// dead code to stop the compiler complaining
 }
 
-const DataTypes::ValueType&
+const DataTypes::RealVectorType&
 DataEmpty::getVectorRO() const
 {
   throwStandardException("getVector");	// always throws but the compiler doesn't know that.
@@ -120,6 +115,52 @@ DataEmpty::getVectorRO() const
 }
 
 
+DataTypes::CplxVectorType&
+DataEmpty::getVectorRWC()
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummyc;			// dead code to stop the compiler complaining
+}
+
+const DataTypes::CplxVectorType&
+DataEmpty::getVectorROC() const
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummyc;			// dead code to stop the compiler complaining
+}
+
+
+DataTypes::RealVectorType&
+DataEmpty::getTypedVectorRW(DataTypes::real_t dummypar)
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummy;			// dead code to stop the compiler complaining
+}
+
+const DataTypes::RealVectorType&
+DataEmpty::getTypedVectorRO(DataTypes::real_t dummypar) const
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummy;			// dead code to stop the compiler complaining
+}
+
+
+DataTypes::CplxVectorType&
+DataEmpty::getTypedVectorRW(DataTypes::cplx_t dummypar)
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummyc;			// dead code to stop the compiler complaining
+}
+
+const DataTypes::CplxVectorType&
+DataEmpty::getTypedVectorRO(DataTypes::cplx_t dummypar) const
+{
+  throwStandardException("getVector");	// always throws but the compiler doesn't know that.
+  return dummyc;			// dead code to stop the compiler complaining
+}
+
+
+
 void
 DataEmpty::dump(const std::string fileName) const
 {
@@ -127,3 +168,4 @@ DataEmpty::dump(const std::string fileName) const
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/DataEmpty.h b/escriptcore/src/DataEmpty.h
index 9a3b7b0..b1981bc 100644
--- a/escriptcore/src/DataEmpty.h
+++ b/escriptcore/src/DataEmpty.h
@@ -14,11 +14,10 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_DATAEMPTY_H__
+#define __ESCRIPT_DATAEMPTY_H__
 
-#if !defined escript_DataEmpty_20040726_H
-#define escript_DataEmpty_20040726_H
 #include "system_dep.h"
-
 #include "DataReady.h"
 
 namespace escript {
@@ -69,7 +68,7 @@ typedef DataReady parent;
   ESCRIPT_DLL_API
   virtual
   DataAbstract*
-  deepCopy();
+  deepCopy() const;
 
 
   /**
@@ -82,13 +81,13 @@ typedef DataReady parent;
    */
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo) const;
 
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo);
 
@@ -99,7 +98,7 @@ typedef DataReady parent;
   */
   ESCRIPT_DLL_API
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
   /**
@@ -150,10 +149,17 @@ typedef DataReady parent;
 
   ESCRIPT_DLL_API
   void
-  replaceNaN(double value)
+  replaceNaN(DataTypes::real_t value)
   {
   
   }
+  
+  ESCRIPT_DLL_API
+  void
+  replaceNaN(DataTypes::cplx_t value)
+  {
+  
+  }  
 
  protected:
 
@@ -161,15 +167,36 @@ typedef DataReady parent;
 	\brief Provide access to underlying storage. Internal use only!
   */
   ESCRIPT_DLL_API
-  virtual DataTypes::ValueType&
+  virtual DataTypes::RealVectorType&
   getVectorRW();
 
 
   ESCRIPT_DLL_API
-  virtual const DataTypes::ValueType&
+  virtual const DataTypes::RealVectorType&
   getVectorRO() const;
 
+  ESCRIPT_DLL_API
+  virtual DataTypes::CplxVectorType&
+  getVectorRWC();
+
 
+  ESCRIPT_DLL_API
+  virtual const DataTypes::CplxVectorType&
+  getVectorROC() const;
+  
+  virtual DataTypes::RealVectorType&
+  getTypedVectorRW(DataTypes::real_t dummy);  
+  
+  virtual const DataTypes::RealVectorType&
+  getTypedVectorRO(DataTypes::real_t dummy) const;
+
+  virtual DataTypes::CplxVectorType&
+  getTypedVectorRW(DataTypes::cplx_t dummy);
+  
+  virtual const DataTypes::CplxVectorType&
+  getTypedVectorRO(DataTypes::cplx_t dummy) const;      
+  
+  
  private:
 
 //  /**
@@ -184,4 +211,5 @@ typedef DataReady parent;
 
 } // end of namespace
 
-#endif
+#endif // __ESCRIPT_DATAEMPTY_H__
+
diff --git a/escriptcore/src/DataException.cpp b/escriptcore/src/DataException.cpp
deleted file mode 100644
index 9ffeef1..0000000
--- a/escriptcore/src/DataException.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "DataException.h"
-
-using namespace escript;
-
-const std::string 
-DataException::exceptionNameValue("DataException");
-
-const std::string &
-DataException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
diff --git a/escriptcore/src/DataException.h b/escriptcore/src/DataException.h
index 5a1b877..d39011c 100644
--- a/escriptcore/src/DataException.h
+++ b/escriptcore/src/DataException.h
@@ -17,90 +17,20 @@
 
 #if !defined  escript_DataException_20040324_H
 #define escript_DataException_20040324_H
-#include "system_dep.h"
 
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
 namespace escript
 {
 
-  /**
-  \brief
-  DataException exception class.
-
-  Description:
-  DataException exception class.
-  The class provides a public function returning the exception name
-  */
-  class DataException : public esysUtils::EsysException 
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DataException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DataException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DataException(const std::string &str) : Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DataException(const DataException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESCRIPT_DLL_API
-    inline DataException &
-    operator=(const DataException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-
-    /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~DataException() THROW(NO_ARG) {}
-
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-
-
-  };
+class DataException : public EsysException 
+{
+public:
+    DataException(const std::string& str) : EsysException(str) {}
+    virtual ~DataException() throw() {}
+};
 
 } // end of namespace
 
-
 #endif
+
diff --git a/escriptcore/src/DataExpanded.cpp b/escriptcore/src/DataExpanded.cpp
index 8540d94..dfb15e8 100644
--- a/escriptcore/src/DataExpanded.cpp
+++ b/escriptcore/src/DataExpanded.cpp
@@ -14,35 +14,35 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-#include <esysUtils/Esys_MPI.h>
-
 #include "Data.h"
 #include "DataConstant.h"
 #include "DataException.h"
 #include "DataExpanded.h"
-#include "DataMaths.h"
+#include "DataVectorOps.h"
 #include "DataTagged.h"
 
 #include <limits>
-#ifdef USE_NETCDF
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
 using namespace std;
 using namespace escript::DataTypes;
 
-#define CHECK_FOR_EX_WRITE do {\
-    if (!checkNoSharing()) {\
+#ifdef SLOWSHARECHECK
+  #define CHECK_FOR_EX_WRITE do {\
+    if (isShared()) {\
         std::ostringstream ss;\
         ss << "Attempt to modify shared object. Line " << __LINE__ << " in "\
            << __FILE__;\
         abort();\
         throw DataException(ss.str());\
     }\
-} while(0)
-
+  } while(0);
+#else
+  #define CHECK_FOR_EX_WRITE
+#endif
 
 namespace escript {
 
@@ -51,22 +51,23 @@ DataExpanded::DataExpanded(const WrappedArray& value,
   : parent(what,value.getShape())
 {
     // initialise the data array for this object
-    initialise(what.getNumSamples(),what.getNumDPPSample());
+    initialise(what.getNumSamples(),what.getNumDPPSample(), value.isComplex());
     // copy the given value to every data point
     copy(value);
 }
 
 DataExpanded::DataExpanded(const DataExpanded& other)
   : parent(other.getFunctionSpace(), other.getShape()),
-    m_data(other.m_data)
+    m_data_r(other.m_data_r), m_data_c(other.m_data_c)
 {
+    m_iscompl=other.m_iscompl;
 }
 
 DataExpanded::DataExpanded(const DataConstant& other)
   : parent(other.getFunctionSpace(), other.getShape())
 {
     // initialise the data array for this object
-    initialise(other.getNumSamples(),other.getNumDPPSample());
+    initialise(other.getNumSamples(),other.getNumDPPSample(), other.isComplex());
     // DataConstant only has one value, copy this to every data point
     copy(other);
 }
@@ -75,22 +76,42 @@ DataExpanded::DataExpanded(const DataTagged& other)
   : parent(other.getFunctionSpace(), other.getShape())
 {
     // initialise the data array for this object
-    initialise(other.getNumSamples(),other.getNumDPPSample());
+    initialise(other.getNumSamples(),other.getNumDPPSample(), other.isComplex());
     // for each data point in this object, extract and copy the corresponding
     // data value from the given DataTagged object
-    DataTypes::ValueType::size_type numRows=m_data.getNumRows();
-    DataTypes::ValueType::size_type numCols=m_data.getNumCols();
-#pragma omp parallel for
-    for (int i=0; i<numRows; i++) {
-        for (int j=0; j<numCols; j++) {
-            try {
-                DataTypes::copyPoint(getVectorRW(), getPointOffset(i,j),
-                                     getNoValues(), other.getVectorRO(),
-                                     other.getPointOffset(i,j));
-            } catch (std::exception& e) {
-                cerr << e.what() << endl;
-            }
-        }
+    if (isComplex())
+    {
+	DataTypes::cplx_t dummy=0;
+	#pragma omp parallel for
+	for (int i=0; i<m_noSamples; i++) {
+	    for (int j=0; j<m_noDataPointsPerSample; j++) {
+		try {
+		    DataTypes::copyPoint(getTypedVectorRW(dummy), getPointOffset(i,j),
+					getNoValues(), other.getTypedVectorRO(dummy),
+					other.getPointOffset(i,j));
+		} catch (std::exception& e) {
+		    cerr << e.what() << endl;
+		}
+	    }
+	}      
+      
+      
+    }
+    else
+    {
+	DataTypes::real_t dummy=0;
+	#pragma omp parallel for
+	for (int i=0; i<m_noSamples; i++) {
+	    for (int j=0; j<m_noDataPointsPerSample; j++) {
+		try {
+		    DataTypes::copyPoint(getTypedVectorRW(dummy), getPointOffset(i,j),
+					getNoValues(), other.getTypedVectorRO(dummy),
+					other.getPointOffset(i,j));
+		} catch (std::exception& e) {
+		    cerr << e.what() << endl;
+		}
+	    }
+	}
     }
 }
 
@@ -99,40 +120,84 @@ DataExpanded::DataExpanded(const DataExpanded& other,
   : parent(other.getFunctionSpace(),DataTypes::getResultSliceShape(region))
 {
     // initialise this Data object to the shape of the slice
-    initialise(other.getNumSamples(),other.getNumDPPSample());
+    initialise(other.getNumSamples(),other.getNumDPPSample(), other.isComplex());
     // copy the data
     DataTypes::RegionLoopRangeType region_loop_range =
                                     DataTypes::getSliceRegionLoopRange(region);
-    DataTypes::ValueType::size_type numRows=m_data.getNumRows();
-    DataTypes::ValueType::size_type numCols=m_data.getNumCols();
-#pragma omp parallel for
-    for (int i=0; i<numRows; i++) {
-        for (int j=0; j<numCols; j++) {
-            try {
-                DataTypes::copySlice(getVectorRW(), getShape(),
-                                     getPointOffset(i,j), other.getVectorRO(),
-                                     other.getShape(),
-                                     other.getPointOffset(i,j),
-                                     region_loop_range);
-            } catch (std::exception& e) {
-                cerr << e.what() << endl;
+    if (isComplex())
+    {
+	DataTypes::cplx_t dummy=0;
+	#pragma omp parallel for
+	for (int i=0; i<m_noSamples; i++) {
+	    for (int j=0; j<m_noDataPointsPerSample; j++) {
+		try {
+		    DataTypes::copySlice(getTypedVectorRW(dummy), getShape(),
+					getPointOffset(i,j), other.getTypedVectorRO(dummy),
+					other.getShape(),
+					other.getPointOffset(i,j),
+					region_loop_range);
+		} catch (std::exception& e) {
+		    cerr << e.what() << endl;
+		}
+	    }
+	}      
+    }
+    else
+    {
+	DataTypes::real_t dummy=0;
+	#pragma omp parallel for
+	for (int i=0; i<m_noSamples; i++) {
+	    for (int j=0; j<m_noDataPointsPerSample; j++) {
+		try {
+		    DataTypes::copySlice(getTypedVectorRW(dummy), getShape(),
+					getPointOffset(i,j), other.getTypedVectorRO(dummy),
+					other.getShape(),
+					other.getPointOffset(i,j),
+					region_loop_range);
+		} catch (std::exception& e) {
+		    cerr << e.what() << endl;
+		}
+	    }
+	}
+    }
+}
+
+DataExpanded::DataExpanded(const FunctionSpace& what,
+                           const DataTypes::ShapeType &shape,
+                           const DataTypes::RealVectorType &data)
+  : parent(what,shape)
+{
+    ESYS_ASSERT(data.size()%getNoValues()==0,
+                 "DataExpanded Constructor - size of supplied data is not a multiple of shape size.");
+
+    if (data.size() == getNoValues()) {
+        RealVectorType& vec=m_data_r;
+        // create the view of the data
+        initialise(what.getNumSamples(),what.getNumDPPSample(), false);
+        // now we copy this value to all elements
+        for (int i=0; i<getLength();) {
+            for (unsigned int j=0;j<getNoValues();++j,++i) {
+                vec[i]=data[j];
             }
         }
+    } else {
+        // copy the data in the correct format
+        m_data_r = data;
     }
 }
 
 DataExpanded::DataExpanded(const FunctionSpace& what,
                            const DataTypes::ShapeType &shape,
-                           const DataTypes::ValueType &data)
+                           const DataTypes::CplxVectorType &data)
   : parent(what,shape)
 {
-    EsysAssert(data.size()%getNoValues()==0,
+    ESYS_ASSERT(data.size()%getNoValues()==0,
                  "DataExpanded Constructor - size of supplied data is not a multiple of shape size.");
 
     if (data.size() == getNoValues()) {
-        ValueType& vec=m_data.getData();
+        CplxVectorType& vec=m_data_c;
         // create the view of the data
-        initialise(what.getNumSamples(),what.getNumDPPSample());
+        initialise(what.getNumSamples(),what.getNumDPPSample(), true);
         // now we copy this value to all elements
         for (int i=0; i<getLength();) {
             for (unsigned int j=0;j<getNoValues();++j,++i) {
@@ -141,18 +206,34 @@ DataExpanded::DataExpanded(const FunctionSpace& what,
         }
     } else {
         // copy the data in the correct format
-        m_data.getData() = data;
+        m_data_c = data;
+    }
+}
+
+
+DataExpanded::DataExpanded(const FunctionSpace& what,
+                           const DataTypes::ShapeType &shape,
+                           const DataTypes::real_t v)
+  : parent(what,shape)
+{
+    initialise(what.getNumSamples(),what.getNumDPPSample(), false);
+    DataTypes::RealVectorType& vec=m_data_r;
+    // now we copy this value to all elements
+    const int L=getLength();
+#pragma omp parallel for
+    for (int i=0; i<L; ++i) {
+        vec[i]=v;
     }
 }
 
 DataExpanded::DataExpanded(const FunctionSpace& what,
                            const DataTypes::ShapeType &shape,
-                           const double v)
+                           const DataTypes::cplx_t v)
   : parent(what,shape)
 {
-    ValueType& vec=m_data.getData();
-    // create the view of the data
-    initialise(what.getNumSamples(),what.getNumDPPSample());
+    initialise(what.getNumSamples(),what.getNumDPPSample(), true);
+    DataTypes::CplxVectorType& vec=m_data_c;
+    
     // now we copy this value to all elements
     const int L=getLength();
 #pragma omp parallel for
@@ -161,11 +242,12 @@ DataExpanded::DataExpanded(const FunctionSpace& what,
     }
 }
 
+
 DataExpanded::~DataExpanded()
 {
 }
 
-DataAbstract* DataExpanded::deepCopy()
+DataAbstract* DataExpanded::deepCopy() const
 {
     return new DataExpanded(*this);
 }
@@ -198,15 +280,13 @@ void DataExpanded::setSlice(const DataAbstract* value,
                 shape, value->getShape()));
 
     // copy the data from the slice into this object
-    DataTypes::ValueType::size_type numRows = m_data.getNumRows();
-    DataTypes::ValueType::size_type numCols = m_data.getNumCols();
-    ValueType& vec=getVectorRW();
+    DataTypes::RealVectorType& vec=getVectorRW();
     const ShapeType& mshape=getShape();
-    const ValueType& tVec=tempDataExp->getVectorRO();
+    const DataTypes::RealVectorType& tVec=tempDataExp->getVectorRO();
     const ShapeType& tShape=tempDataExp->getShape();
 #pragma omp parallel for
-    for (int i=0; i<numRows; i++) {
-        for (int j=0; j<numCols; j++) {
+    for (int i=0; i<m_noSamples; i++) {
+        for (int j=0; j<m_noDataPointsPerSample; j++) {
             DataTypes::copySliceFrom(vec, mshape, getPointOffset(i,j), tVec,
                                      tShape, tempDataExp->getPointOffset(i,j),
                                      region_loop_range);
@@ -216,19 +296,46 @@ void DataExpanded::setSlice(const DataAbstract* value,
 
 void DataExpanded::copy(const DataConstant& value)
 {
-    EsysAssert((checkShape(getShape(), value.getShape())),
+    ESYS_ASSERT(checkShape(getShape(), value.getShape()),
                  createShapeErrorMessage("Error - Couldn't copy due to shape mismatch.", value.getShape(), getShape()));
-
-    // copy a single value to every data point in this object
-    int nRows=m_data.getNumRows();
-    int nCols=m_data.getNumCols();
-#pragma omp parallel for
-    for (int i=0; i<nRows; i++) {
-        for (int j=0; j<nCols; j++) {
-            DataTypes::copyPoint(getVectorRW(), getPointOffset(i,j),
-                                 getNoValues(), value.getVectorRO(), 0);
-        }
+    if (isComplex())
+    {
+	if (value.isComplex())
+	{
+	    // copy a single value to every data point in this object
+	    #pragma omp parallel for
+	    for (int i=0; i<m_noSamples; i++) {
+		for (int j=0; j<m_noDataPointsPerSample; j++) {
+		    DataTypes::copyPoint(getTypedVectorRW((cplx_t)(0)), getPointOffset(i,j),
+					getNoValues(), value.getTypedVectorRO((cplx_t)(0)), 0);
+		}
+	    }	    
+	}
+	else	// value is real
+	{
+	    throw DataException("Programming error - DataExpanded::copy source and target must be the same complexity.");	  
+	}
+    }
+    else
+    {
+	if (value.isComplex())
+	{
+	    throw DataException("Programming error - DataExpanded::copy source and target must be the same complexity.");	  	  
+	}
+	else
+	{
+	    real_t dummy=0;
+	    // copy a single value to every data point in this object
+	    #pragma omp parallel for
+	    for (int i=0; i<m_noSamples; i++) {
+		for (int j=0; j<m_noDataPointsPerSample; j++) {
+		    DataTypes::copyPoint(getTypedVectorRW(dummy), getPointOffset(i,j),
+					getNoValues(), value.getTypedVectorRO(dummy), 0);
+		}
+	    }
+	}
     }
+    
 }
 
 void DataExpanded::copy(const WrappedArray& value)
@@ -241,39 +348,106 @@ void DataExpanded::copy(const WrappedArray& value)
     getVectorRW().copyFromArray(value, getNumDPPSample()*getNumSamples());
 }
 
-void DataExpanded::initialise(int noSamples, int noDataPointsPerSample)
+void DataExpanded::initialise(int noSamples, int noDataPointsPerSample, bool cplx)
 {
+    this->m_iscompl=cplx;
     if (noSamples==0) //retain the default empty object
         return;
 
-    // resize data array to the required size
-    m_data.resize(noSamples, noDataPointsPerSample, getNoValues());
+    if (cplx)
+    {
+	// resize data array to the required size
+	m_data_c.resize(noSamples*noDataPointsPerSample*getNoValues(), 0.0, noDataPointsPerSample*getNoValues());      
+    }
+    else
+    {
+	// resize data array to the required size
+	m_data_r.resize(noSamples*noDataPointsPerSample*getNoValues(), 0.0, noDataPointsPerSample*getNoValues());
+    }
 }
 
-bool DataExpanded::hasNaN() const
+bool
+DataExpanded::hasNaN() const
 {
-    bool haveNaN = false;
-    const ValueType& v = m_data.getData();
-#pragma omp parallel for
-    for (ValueType::size_type i=0; i<v.size(); ++i) {
-        if (nancheck(v[i])) {
-#pragma omp critical
-            {
-                haveNaN=true;
-            }
-        }
-    }
-    return haveNaN;
+  bool haveNaN=false;
+  if (isComplex())
+  {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+	  if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))
+	  {
+	      #pragma omp critical 
+	      {
+		  haveNaN=true;
+	      }
+	  }
+      }
+  }
+  else
+  {
+      #pragma omp parallel for
+      for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
+      {
+	  if (std::isnan(m_data_r[i]))
+	  {
+	      #pragma omp critical 
+	      {
+		  haveNaN=true;
+	      }
+	  }
+      }
+  }
+  return haveNaN;
 }
 
-void DataExpanded::replaceNaN(double value)
-{
-#pragma omp parallel for
-    for (ValueType::size_type i=0; i<m_data.size(); ++i) {
-        if (nancheck(m_data[i])) {
-            m_data[i] = value;
-        }
-    }
+
+void
+DataExpanded::replaceNaN(DataTypes::real_t value) {
+  CHECK_FOR_EX_WRITE  
+  if (isComplex())
+  {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+	if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))  
+	{
+	  m_data_c[i] = value;
+	}
+      }
+  }
+  else
+  {
+      #pragma omp parallel for
+      for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
+      {
+	if (std::isnan(m_data_r[i]))  
+	{
+	  m_data_r[i] = value;
+	}
+      }    
+  }
+}
+
+void
+DataExpanded::replaceNaN(DataTypes::cplx_t value) {
+  CHECK_FOR_EX_WRITE  
+  if (isComplex())
+  {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+	if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag())) 
+	{
+	  m_data_c[i] = value;
+	}
+      }
+  }
+  else
+  {
+      complicate();
+      replaceNaN(value);
+  }
 }
 
 string DataExpanded::toString() const
@@ -282,15 +456,17 @@ string DataExpanded::toString() const
     FunctionSpace fs=getFunctionSpace();
 
     int offset=0;
-    for (int i=0; i<m_data.getNumRows(); i++) {
-        for (int j=0; j<m_data.getNumCols(); j++) {
+    for (int i=0; i<m_noSamples; i++) {
+        for (int j=0; j<m_noDataPointsPerSample; j++) {
             offset = getPointOffset(i,j);
             stringstream suffix;
             suffix << "( id: " << i << ", ref: "
                    << fs.getReferenceIDOfSample(i) << ", pnt: " << j << ")";
-            ss << DataTypes::pointToString(getVectorRO(), getShape(), offset,
-                                           suffix.str());
-            if (!(i==(m_data.getNumRows()-1) && j==(m_data.getNumCols()-1))) {
+            ss << (isComplex()?
+		    DataTypes::pointToString(getTypedVectorRO((cplx_t)0), getShape(), offset, suffix.str())
+		   :
+		    DataTypes::pointToString(getTypedVectorRO((real_t)0), getShape(), offset, suffix.str()));
+            if (!(i==(m_noSamples-1) && j==(m_noDataPointsPerSample-1))) {
                 ss << endl;
             }
         }
@@ -302,25 +478,47 @@ string DataExpanded::toString() const
     return result;
 }
 
-DataTypes::ValueType::size_type DataExpanded::getPointOffset(int sampleNo,
+DataTypes::RealVectorType::size_type DataExpanded::getPointOffset(int sampleNo,
                                                         int dataPointNo) const
 {
-    return m_data.index(sampleNo,dataPointNo);
+    DataTypes::RealVectorType::size_type blockSize=getNoValues();
+    ESYS_ASSERT((isComplex()?
+		  ((sampleNo >= 0) && (dataPointNo >= 0) && (m_data_c.size() > 0))
+		:
+		  ((sampleNo >= 0) && (dataPointNo >= 0) && (m_data_r.size() > 0))), 
+	       "(DataBlocks2D) Index value out of range.");
+    DataTypes::RealVectorType::size_type temp=(sampleNo*m_noDataPointsPerSample+dataPointNo)*blockSize;
+    ESYS_ASSERT((isComplex()?
+		  (temp <= (m_data_c.size()-blockSize))
+		:
+		  (temp <= (m_data_r.size()-blockSize))), "Index value out of range.");
+
+    return temp;
 }
 
-DataTypes::ValueType::size_type DataExpanded::getPointOffset(int sampleNo,
-                                                             int dataPointNo)
+
+void DataExpanded::complicate()
 {
-    return m_data.index(sampleNo,dataPointNo);
+    if (!isComplex())
+    {
+        fillComplexFromReal(m_data_r, m_data_c);
+        this->m_iscompl=true;
+        m_data_r.resize(0,0,1);
+    }
 }
 
-DataTypes::ValueType::size_type DataExpanded::getLength() const
+
+DataTypes::RealVectorType::size_type DataExpanded::getLength() const
 {
-    return m_data.size();
+    return std::max(m_data_c.size(), m_data_r.size());  
 }
 
-void DataExpanded::copyToDataPoint(int sampleNo, int dataPointNo, double value)
+void DataExpanded::copyToDataPoint(int sampleNo, int dataPointNo, const DataTypes::cplx_t value)
 {
+    if (!isComplex())
+    {
+	throw DataException("Programming error - attempt to set complex value on real data.");
+    }
     CHECK_FOR_EX_WRITE;
     // Get the number of samples and data-points per sample.
     int numSamples = getNumSamples();
@@ -334,8 +532,65 @@ void DataExpanded::copyToDataPoint(int sampleNo, int dataPointNo, double value)
         if (dataPointNo >= numDataPointsPerSample || dataPointNo < 0)
             throw DataException("DataExpanded::copyDataPoint: invalid dataPointNo.");
 
-        ValueType::size_type offset = getPointOffset(sampleNo, dataPointNo);
-        ValueType& vec = getVectorRW();
+        DataTypes::CplxVectorType::size_type offset = getPointOffset(sampleNo, dataPointNo);
+        DataTypes::CplxVectorType& vec = getTypedVectorRW(cplx_t(0));
+        if (dataPointRank==0) {
+            vec[offset] = value;
+        } else if (dataPointRank==1) {
+            for (int i=0; i<dataPointShape[0]; i++) {
+                vec[offset+i] = value;
+            }
+        } else if (dataPointRank==2) {
+            for (int i=0; i<dataPointShape[0]; i++) {
+                for (int j=0; j<dataPointShape[1]; j++) {
+                    vec[offset+getRelIndex(dataPointShape,i,j)] = value;
+                }
+            }
+        } else if (dataPointRank==3) {
+            for (int i=0; i<dataPointShape[0]; i++) {
+                for (int j=0; j<dataPointShape[1]; j++) {
+                    for (int k=0; k<dataPointShape[2]; k++) {
+                        vec[offset+getRelIndex(dataPointShape,i,j,k)] = value;
+                    }
+                }
+            }
+        } else if (dataPointRank==4) {
+            for (int i=0; i<dataPointShape[0]; i++) {
+                for (int j=0; j<dataPointShape[1]; j++) {
+                    for (int k=0; k<dataPointShape[2]; k++) {
+                        for (int l=0; l<dataPointShape[3]; l++) {
+                            vec[offset+getRelIndex(dataPointShape,i,j,k,l)] = value;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+void DataExpanded::copyToDataPoint(int sampleNo, int dataPointNo, const DataTypes::real_t value)
+{
+    if (isComplex())
+    {
+	copyToDataPoint(sampleNo, dataPointNo, cplx_t(value));
+	return;
+    }
+    CHECK_FOR_EX_WRITE;
+    // Get the number of samples and data-points per sample.
+    int numSamples = getNumSamples();
+    int numDataPointsPerSample = getNumDPPSample();
+    int dataPointRank = getRank();
+    ShapeType dataPointShape = getShape();
+    if (numSamples*numDataPointsPerSample > 0) {
+        //TODO: global error handling
+        if (sampleNo >= numSamples || sampleNo < 0)
+            throw DataException("DataExpanded::copyDataPoint: invalid sampleNo.");
+        if (dataPointNo >= numDataPointsPerSample || dataPointNo < 0)
+            throw DataException("DataExpanded::copyDataPoint: invalid dataPointNo.");
+
+        DataTypes::RealVectorType::size_type offset = getPointOffset(sampleNo, dataPointNo);
+        DataTypes::RealVectorType& vec = getVectorRW();
         if (dataPointRank==0) {
             vec[offset] = value;
         } else if (dataPointRank==1) {
@@ -388,9 +643,18 @@ void DataExpanded::copyToDataPoint(int sampleNo, int dataPointNo,
         if (dataPointNo >= numDataPointsPerSample || dataPointNo < 0)
             throw DataException("DataExpanded::copyDataPoint: invalid dataPointNoInSample.");
 
-        ValueType::size_type offset = getPointOffset(sampleNo, dataPointNo);
-        ValueType& vec = getVectorRW();
-        vec.copyFromArrayToOffset(value, offset, 1);
+	if (isComplex())
+	{
+	    DataTypes::CplxVectorType::size_type offset = getPointOffset(sampleNo, dataPointNo);
+	    DataTypes::CplxVectorType& vec = getTypedVectorRW(cplx_t(0));
+	    vec.copyFromArrayToOffset(value, offset, 1);
+	}
+	else
+	{
+	    DataTypes::RealVectorType::size_type offset = getPointOffset(sampleNo, dataPointNo);
+	    DataTypes::RealVectorType& vec = getTypedVectorRW(real_t(0));
+	    vec.copyFromArrayToOffset(value, offset, 1);
+	}
     }
 }
 
@@ -402,42 +666,132 @@ void DataExpanded::symmetric(DataAbstract* ev)
     if (!temp_ev)
         throw DataException("DataExpanded::symmetric: casting to DataExpanded failed (probably a programming error).");
 
-    const ValueType& vec = getVectorRO();
     const ShapeType& shape = getShape();
-    ValueType& evVec = temp_ev->getVectorRW();
     const ShapeType& evShape = temp_ev->getShape();
-#pragma omp parallel for
-    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::symmetric(vec, shape,
-                    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
-                    ev->getPointOffset(sampleNo,dataPointNo));
-        }
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec = getTypedVectorRO((DataTypes::cplx_t)0);
+	DataTypes::CplxVectorType& evVec = temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::symmetric(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec = getTypedVectorRO(0.0);
+	DataTypes::RealVectorType& evVec = temp_ev->getTypedVectorRW(0.0);
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::symmetric(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
     }
 }
 
-void DataExpanded::nonsymmetric(DataAbstract* ev)
+void DataExpanded::antisymmetric(DataAbstract* ev)
 {
     const int numSamples = getNumSamples();
     const int numDataPointsPerSample = getNumDPPSample();
     DataExpanded* temp_ev=dynamic_cast<DataExpanded*>(ev);
     if (!temp_ev)
-        throw DataException("DataExpanded::nonsymmetric: casting to DataExpanded failed (probably a programming error).");
+        throw DataException("DataExpanded::antisymmetric: casting to DataExpanded failed (probably a programming error).");
 
-    const ValueType& vec = getVectorRO();
     const ShapeType& shape = getShape();
-    ValueType& evVec = temp_ev->getVectorRW();
     const ShapeType& evShape = temp_ev->getShape();
-#pragma omp parallel for
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec = getTypedVectorRO((DataTypes::cplx_t)0);
+	DataTypes::CplxVectorType& evVec = temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) 
+	{
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++)
+	    {
+		escript::antisymmetric(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec = getTypedVectorRO(0.0);
+	DataTypes::RealVectorType& evVec = temp_ev->getTypedVectorRW(0.0);
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++)
+	    {
+		escript::antisymmetric(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
+    }    
+}
+
+
+void DataExpanded::hermitian(DataAbstract* ev)
+{
+    const int numSamples = getNumSamples();
+    const int numDataPointsPerSample = getNumDPPSample();
+    DataExpanded* temp_ev = dynamic_cast<DataExpanded*>(ev);
+    if (!temp_ev)
+        throw DataException("DataExpanded::hermitian: casting to DataExpanded failed (probably a programming error).");
+    if (!isComplex() || !temp_ev->isComplex())
+    {
+	throw DataException("DataExpanded::hermitian: do not call this method with real data");
+    }
+    const ShapeType& shape = getShape();
+    const ShapeType& evShape = temp_ev->getShape();
+    const DataTypes::CplxVectorType& vec = getTypedVectorRO((DataTypes::cplx_t)0);
+    DataTypes::CplxVectorType& evVec = temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+    #pragma omp parallel for
     for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::nonsymmetric(vec, shape,
-                    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
-                    ev->getPointOffset(sampleNo,dataPointNo));
-        }
+	for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+	    escript::hermitian(vec, shape,
+		    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+		    ev->getPointOffset(sampleNo,dataPointNo));
+	}
     }
 }
 
+void DataExpanded::antihermitian(DataAbstract* ev)
+{
+    const int numSamples = getNumSamples();
+    const int numDataPointsPerSample = getNumDPPSample();
+    DataExpanded* temp_ev=dynamic_cast<DataExpanded*>(ev);
+    if (!temp_ev)
+        throw DataException("DataExpanded::antihermitian: casting to DataExpanded failed (probably a programming error).");
+    if (!isComplex() || !temp_ev->isComplex())
+    {
+	throw DataException("DataExpanded::antihermitian: do not call this method with real data");
+    }
+    const ShapeType& shape = getShape();
+    const ShapeType& evShape = temp_ev->getShape();
+    const DataTypes::CplxVectorType& vec = getTypedVectorRO((DataTypes::cplx_t)0);
+    DataTypes::CplxVectorType& evVec = temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+    #pragma omp parallel for
+    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) 
+    {
+	for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++)
+	{
+	    escript::antihermitian(vec, shape,
+		    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+		    ev->getPointOffset(sampleNo,dataPointNo));
+	}
+    }
+}
+
+
+
 void DataExpanded::trace(DataAbstract* ev, int axis_offset)
 {
     const int numSamples = getNumSamples();
@@ -445,18 +799,35 @@ void DataExpanded::trace(DataAbstract* ev, int axis_offset)
     DataExpanded* temp_ev = dynamic_cast<DataExpanded*>(ev);
     if (!temp_ev)
         throw DataException("DataExpanded::trace: casting to DataExpanded failed (probably a programming error).");
-
-    const ValueType& vec=getVectorRO();
     const ShapeType& shape=getShape();
-    ValueType& evVec=temp_ev->getVectorRW();
-    const ShapeType& evShape=temp_ev->getShape();
-#pragma omp parallel for
-    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::trace(vec, shape, getPointOffset(sampleNo,dataPointNo),
-                     evVec, evShape, ev->getPointOffset(sampleNo,dataPointNo),
-                     axis_offset);
-        }
+    const ShapeType& evShape=temp_ev->getShape(); 
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec=getVectorROC();
+	DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::trace(vec, shape, getPointOffset(sampleNo,dataPointNo),
+			evVec, evShape, ev->getPointOffset(sampleNo,dataPointNo),
+			axis_offset);
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec=getVectorRO();
+	DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::trace(vec, shape, getPointOffset(sampleNo,dataPointNo),
+			evVec, evShape, ev->getPointOffset(sampleNo,dataPointNo),
+			axis_offset);
+	    }
+	}
     }
 }
 
@@ -467,18 +838,34 @@ void DataExpanded::transpose(DataAbstract* ev, int axis_offset)
     DataExpanded* temp_ev=dynamic_cast<DataExpanded*>(ev);
     if (!temp_ev)
         throw DataException("DataExpanded::transpose: casting to DataExpanded failed (probably a programming error).");
-
-    const ValueType& vec = getVectorRO();
     const ShapeType& shape = getShape();
-    ValueType& evVec = temp_ev->getVectorRW();
-    const ShapeType& evShape = temp_ev->getShape();
-#pragma omp parallel for
-    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::transpose(vec, shape,
-                    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
-                    ev->getPointOffset(sampleNo,dataPointNo), axis_offset);
-        }
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec = getVectorROC();
+	DataTypes::CplxVectorType& evVec = temp_ev->getVectorRWC();
+	const ShapeType& evShape = temp_ev->getShape();
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::transpose(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo), axis_offset);
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec = getVectorRO();
+	DataTypes::RealVectorType& evVec = temp_ev->getVectorRW();
+	const ShapeType& evShape = temp_ev->getShape();
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::transpose(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo), axis_offset);
+	    }
+	}
     }
 }
 
@@ -489,18 +876,35 @@ void DataExpanded::swapaxes(DataAbstract* ev, int axis0, int axis1)
     DataExpanded* temp_ev=dynamic_cast<DataExpanded*>(ev);
     if (!temp_ev)
         throw DataException("Error - DataExpanded::swapaxes: casting to DataExpanded failed (probably a programming error).");
-
-    const ValueType& vec=getVectorRO();
-    const ShapeType& shape=getShape();
-    ValueType& evVec=temp_ev->getVectorRW();
+    const ShapeType& shape=getShape();    
     const ShapeType& evShape=temp_ev->getShape();
-#pragma omp parallel for
-    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::swapaxes(vec, shape,
-                    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
-                    ev->getPointOffset(sampleNo,dataPointNo), axis0, axis1);
-        }
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec=getVectorROC();
+	DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::swapaxes(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo), axis0, axis1);
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec=getVectorRO();
+	DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::swapaxes(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo), axis0, axis1);
+	    }
+	}
     }
 }
 
@@ -511,18 +915,35 @@ void DataExpanded::eigenvalues(DataAbstract* ev)
     DataExpanded* temp_ev=dynamic_cast<DataExpanded*>(ev);
     if (!temp_ev)
         throw DataException("DataExpanded::eigenvalues: casting to DataExpanded failed (probably a programming error).");
-
-    const ValueType& vec=getVectorRO();
-    const ShapeType& shape=getShape();
-    ValueType& evVec=temp_ev->getVectorRW();
     const ShapeType& evShape=temp_ev->getShape();
-#pragma omp parallel for
-    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
-        for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::eigenvalues(vec, shape,
-                    getPointOffset(sampleNo,dataPointNo), evVec, evShape,
-                    ev->getPointOffset(sampleNo,dataPointNo));
-        }
+    const ShapeType& shape=getShape();
+    if (isComplex())
+    {
+	const DataTypes::CplxVectorType& vec=getVectorROC();
+	DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::eigenvalues(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
+    }
+    else
+    {
+	const DataTypes::RealVectorType& vec=getVectorRO();
+	DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();
+
+    #pragma omp parallel for
+	for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+	    for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+		escript::eigenvalues(vec, shape,
+			getPointOffset(sampleNo,dataPointNo), evVec, evShape,
+			ev->getPointOffset(sampleNo,dataPointNo));
+	    }
+	}
     }
 }
 
@@ -539,16 +960,16 @@ void DataExpanded::eigenvalues_and_eigenvectors(DataAbstract* ev,
     if (!temp_V)
         throw DataException("DataExpanded::eigenvalues_and_eigenvectors: casting to DataExpanded failed (probably a programming error).");
 
-    const ValueType& vec = getVectorRO();
+    const DataTypes::RealVectorType& vec = getVectorRO();
     const ShapeType& shape = getShape();
-    ValueType& evVec = temp_ev->getVectorRW();
+    DataTypes::RealVectorType& evVec = temp_ev->getVectorRW();
     const ShapeType& evShape = temp_ev->getShape();
-    ValueType& VVec = temp_V->getVectorRW();
+    DataTypes::RealVectorType& VVec = temp_V->getVectorRW();
     const ShapeType& VShape = temp_V->getShape();
 #pragma omp parallel for
     for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
         for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            DataMaths::eigenvalues_and_eigenvectors(vec, shape,
+            escript::eigenvalues_and_eigenvectors(vec, shape,
                     getPointOffset(sampleNo,dataPointNo), evVec, evShape,
                     ev->getPointOffset(sampleNo,dataPointNo), VVec, VShape,
                     V->getPointOffset(sampleNo,dataPointNo), tol);
@@ -568,7 +989,7 @@ int DataExpanded::matrixInverse(DataAbstract* out) const
 
     const int numdpps=getNumDPPSample();
     const int numSamples = getNumSamples();
-    const ValueType& vec=m_data.getData();
+    const DataTypes::RealVectorType& vec=m_data_r;
     int errcode=0;
 #pragma omp parallel
     {
@@ -578,8 +999,8 @@ int DataExpanded::matrixInverse(DataAbstract* out) const
         for (int sampleNo = 0; sampleNo < numSamples; sampleNo++)
         {
             // not sure I like all those virtual calls to getPointOffset
-            DataTypes::ValueType::size_type offset=getPointOffset(sampleNo,0);
-            int res=DataMaths::matrix_inverse(vec, getShape(), offset,
+            DataTypes::RealVectorType::size_type offset=getPointOffset(sampleNo,0);
+            int res=escript::matrix_inverse(vec, getShape(), offset,
                     temp->getVectorRW(), temp->getShape(), offset, numdpps, h);
             if (res > errorcode) {
                 errorcode=res;
@@ -603,11 +1024,11 @@ void DataExpanded::setToZero()
     CHECK_FOR_EX_WRITE;
     const int numSamples = getNumSamples();
     const int numDataPointsPerSample = getNumDPPSample();
-    const DataTypes::ValueType::size_type n = getNoValues();
+    const DataTypes::RealVectorType::size_type n = getNoValues();
 #pragma omp parallel for
     for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
         for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-            double* p = &m_data[getPointOffset(sampleNo,dataPointNo)];
+            double* p = &m_data_r[getPointOffset(sampleNo,dataPointNo)];
             for (int i=0; i<n; ++i)
                 p[i] = 0.;
         }
@@ -616,7 +1037,7 @@ void DataExpanded::setToZero()
 
 void DataExpanded::dump(const std::string fileName) const
 {
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
     const int ldims=2+DataTypes::maxRank;
     const NcDim* ncdims[ldims];
     NcVar *var, *ids;
@@ -624,15 +1045,12 @@ void DataExpanded::dump(const std::string fileName) const
     int type=  getFunctionSpace().getTypeCode();
     int ndims =0;
     long dims[ldims];
-    const double* d_ptr=&(m_data[0]);
+    const double* d_ptr=&(m_data_r[0]);
     const DataTypes::ShapeType& shape = getShape();
-    int mpi_iam=getFunctionSpace().getDomain()->getMPIRank();
-    int mpi_num=getFunctionSpace().getDomain()->getMPISize();
-
+    JMPI mpiInfo(getFunctionSpace().getDomain()->getMPI());
+    const std::string newFileName(mpiInfo->appendRankToFileName(fileName));
     // netCDF error handler
     NcError err(NcError::verbose_nonfatal);
-    std::string newFileName(esysUtils::appendRankToFileName(fileName,
-                                                            mpi_num, mpi_iam));
     NcFile dataFile(newFileName.c_str(), NcFile::Replace);
     if (!dataFile.is_valid())
         throw DataException("DataExpanded::dump: opening of netCDF file for output failed.");
@@ -683,19 +1101,57 @@ void DataExpanded::dump(const std::string fileName) const
     }
 #else
     throw DataException("DataExpanded::dump: not configured with netCDF. Please contact your installation manager.");
-#endif // USE_NETCDF
+#endif // ESYS_HAVE_NETCDF
+}
+
+void DataExpanded::setTaggedValue(int tagKey,
+                                  const DataTypes::ShapeType& pointshape,
+                                  const DataTypes::RealVectorType& value,
+                                  int dataOffset)
+{
+    CHECK_FOR_EX_WRITE;
+    if (isComplex())
+    {
+        CplxVectorType tv;
+	fillComplexFromReal(value, tv);
+	setTaggedValue(tagKey, pointshape, tv, dataOffset);
+        return;
+    }
+    const int numSamples = getNumSamples();
+    const int numDataPointsPerSample = getNumDPPSample();
+    const DataTypes::RealVectorType::size_type n = getNoValues();
+    const real_t* in = &value[0+dataOffset];
+
+    if (value.size() != n)
+        throw DataException("DataExpanded::setTaggedValue: number of input values does not match number of values per data points.");
+
+#pragma omp parallel for
+    for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
+        if (getFunctionSpace().getTagFromSampleNo(sampleNo) == tagKey) {
+            for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
+                real_t* p = &m_data_r[getPointOffset(sampleNo,dataPointNo)];
+                for (int i=0; i<n; ++i)
+                    p[i] = in[i];
+            }
+        }
+    }
 }
 
+
 void DataExpanded::setTaggedValue(int tagKey,
                                   const DataTypes::ShapeType& pointshape,
-                                  const DataTypes::ValueType& value,
+                                  const DataTypes::CplxVectorType& value,
                                   int dataOffset)
 {
     CHECK_FOR_EX_WRITE;
+    if (!isComplex())
+    {
+	throw DataException("Programming Error - Attempt to set a complex value on a real object.");
+    }
     const int numSamples = getNumSamples();
     const int numDataPointsPerSample = getNumDPPSample();
-    const DataTypes::ValueType::size_type n = getNoValues();
-    const double* in = &value[0+dataOffset];
+    const DataTypes::CplxVectorType::size_type n = getNoValues();
+    const DataTypes::cplx_t* in = &value[0+dataOffset];
 
     if (value.size() != n)
         throw DataException("DataExpanded::setTaggedValue: number of input values does not match number of values per data points.");
@@ -704,7 +1160,7 @@ void DataExpanded::setTaggedValue(int tagKey,
     for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
         if (getFunctionSpace().getTagFromSampleNo(sampleNo) == tagKey) {
             for (int dataPointNo = 0; dataPointNo < numDataPointsPerSample; dataPointNo++) {
-                double* p = &m_data[getPointOffset(sampleNo,dataPointNo)];
+                cplx_t* p = &m_data_c[getPointOffset(sampleNo,dataPointNo)];
                 for (int i=0; i<n; ++i)
                     p[i] = in[i];
             }
@@ -717,7 +1173,7 @@ void DataExpanded::reorderByReferenceIDs(dim_t *reference_ids)
 {
     CHECK_FOR_EX_WRITE;
     const int numSamples = getNumSamples();
-    const DataTypes::ValueType::size_type n = getNoValues() * getNumDPPSample();
+    const DataTypes::RealVectorType::size_type n = getNoValues() * getNumDPPSample();
     FunctionSpace fs=getFunctionSpace();
 
     for (int sampleNo = 0; sampleNo < numSamples; sampleNo++) {
@@ -727,8 +1183,8 @@ void DataExpanded::reorderByReferenceIDs(dim_t *reference_ids)
             bool matched=false;
             for (int sampleNo2 = sampleNo+1; sampleNo2 < numSamples; sampleNo2++) {
                 if (id == reference_ids[sampleNo2]) {
-                    double* p = &m_data[getPointOffset(sampleNo,0)];
-                    double* p2 = &m_data[getPointOffset(sampleNo2,0)];
+                    double* p = &m_data_r[getPointOffset(sampleNo,0)];
+                    double* p2 = &m_data_r[getPointOffset(sampleNo2,0)];
                     for (int i=0; i<n; i++) {
                         const double rtmp=p[i];
                         p[i] = p2[i];
@@ -746,24 +1202,58 @@ void DataExpanded::reorderByReferenceIDs(dim_t *reference_ids)
     }
 }
 
-DataTypes::ValueType& DataExpanded::getVectorRW()
+DataTypes::RealVectorType& DataExpanded::getVectorRW()
+{
+    CHECK_FOR_EX_WRITE;
+    return m_data_r;
+}
+
+const DataTypes::RealVectorType& DataExpanded::getVectorRO() const
+{
+    return m_data_r;
+}
+
+DataTypes::CplxVectorType& DataExpanded::getVectorRWC()
+{
+    CHECK_FOR_EX_WRITE;
+    return m_data_c;
+}
+
+const DataTypes::CplxVectorType& DataExpanded::getVectorROC() const
+{
+    return m_data_c;
+}
+
+DataTypes::RealVectorType& DataExpanded::getTypedVectorRW(DataTypes::real_t dummypar)
 {
     CHECK_FOR_EX_WRITE;
-    return m_data.getData();
+    return m_data_r;
 }
 
-const DataTypes::ValueType& DataExpanded::getVectorRO() const
+const DataTypes::RealVectorType& DataExpanded::getTypedVectorRO(DataTypes::real_t dummypar) const
 {
-    return m_data.getData();
+    return m_data_r;
 }
 
+DataTypes::CplxVectorType& DataExpanded::getTypedVectorRW(DataTypes::cplx_t dummypar)
+{
+    CHECK_FOR_EX_WRITE;
+    return m_data_c;
+}
+
+const DataTypes::CplxVectorType& DataExpanded::getTypedVectorRO(DataTypes::cplx_t dummypar) const
+{
+    return m_data_c;
+}
+
+
 //void DataExpanded::randomFill(long seed)
 //{
 //    CHECK_FOR_EX_WRITE;
 //
 //    DataVector&  dv=getVectorRW();
 //    const size_t dvsize=dv.size();
-//    esysUtils::randomFillArray(seed, &(dv[0]), dvsize);
+//    randomFillArray(seed, &(dv[0]), dvsize);
 //}
 
 }  // end of namespace
diff --git a/escriptcore/src/DataExpanded.h b/escriptcore/src/DataExpanded.h
index ef10a2f..ff12be2 100644
--- a/escriptcore/src/DataExpanded.h
+++ b/escriptcore/src/DataExpanded.h
@@ -20,7 +20,6 @@
 #include "system_dep.h"
 
 #include "DataReady.h"
-#include "DataBlocks2D.h"
 
 namespace escript {
 
@@ -62,7 +61,7 @@ typedef DataReady parent;
      \param what - Input - A description of what this data represents.
   */
   ESCRIPT_DLL_API
-  DataExpanded(const WrappedArray& value,
+  explicit DataExpanded(const WrappedArray& value,
                const FunctionSpace& what);
 
   /**
@@ -74,7 +73,7 @@ typedef DataReady parent;
      \param region - Input - region to copy.
   */
   ESCRIPT_DLL_API
-  DataExpanded(const DataExpanded& other,
+  explicit DataExpanded(const DataExpanded& other,
                const DataTypes::RegionType& region);
 
   /**
@@ -90,15 +89,28 @@ typedef DataReady parent;
 TODO Note that this constructor will also copy data to all points if it only contains enough elements to hold a single point.  ie this is the merge of two separate constructors.
   */
   ESCRIPT_DLL_API
-  DataExpanded(const FunctionSpace& what,
+  explicit DataExpanded(const FunctionSpace& what,
                const DataTypes::ShapeType &shape,
-               const DataTypes::ValueType &data);
+               const DataTypes::RealVectorType &data);
+  
+  
+  ESCRIPT_DLL_API
+  explicit DataExpanded(const FunctionSpace& what,
+               const DataTypes::ShapeType &shape,
+               const DataTypes::CplxVectorType &data);
+  
 
 	       
   ESCRIPT_DLL_API
-  DataExpanded(const FunctionSpace& what,
+  explicit DataExpanded(const FunctionSpace& what,
+               const DataTypes::ShapeType &shape,
+               const DataTypes::real_t data);	       
+  
+  ESCRIPT_DLL_API
+  explicit DataExpanded(const FunctionSpace& what,
                const DataTypes::ShapeType &shape,
-               const double data);	       
+               const DataTypes::cplx_t data);	       
+  
 	       
   /**
      \brief
@@ -114,7 +126,7 @@ TODO Note that this constructor will also copy data to all points if it only con
      Construct a DataExpanded from a DataConstant.
   */
   ESCRIPT_DLL_API
-  DataExpanded(const DataConstant& other);
+  explicit DataExpanded(const DataConstant& other);
 
   /**
      \brief
@@ -122,7 +134,7 @@ TODO Note that this constructor will also copy data to all points if it only con
      Construct a DataExpanded from a DataTagged.
   */
   ESCRIPT_DLL_API
-  DataExpanded(const DataTagged& other);
+  explicit DataExpanded(const DataTagged& other);
 
   /**
      \brief
@@ -158,7 +170,11 @@ TODO Note that this constructor will also copy data to all points if it only con
   */
   ESCRIPT_DLL_API
   void
-  replaceNaN(double value);
+  replaceNaN(DataTypes::real_t value);
+
+  ESCRIPT_DLL_API
+  void
+  replaceNaN(DataTypes::cplx_t value);
     
   /**
      \brief
@@ -175,7 +191,7 @@ TODO Note that this constructor will also copy data to all points if it only con
   ESCRIPT_DLL_API
   virtual
   DataAbstract*
-  deepCopy();
+  deepCopy() const;
 
 
  /**
@@ -217,15 +233,15 @@ TODO Note that this constructor will also copy data to all points if it only con
   */
   ESCRIPT_DLL_API
   virtual
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo) const;
 
-  ESCRIPT_DLL_API
-  virtual
-  DataTypes::ValueType::size_type
-  getPointOffset(int sampleNo,
-                 int dataPointNo);
+//   ESCRIPT_DLL_API
+//   virtual
+//   DataTypes::RealVectorType::size_type
+//   getPointOffset(int sampleNo,
+//                  int dataPointNo);
 
   /**
      \brief
@@ -233,14 +249,32 @@ TODO Note that this constructor will also copy data to all points if it only con
   */
 
   ESCRIPT_DLL_API
-  DataTypes::ValueType&
+  DataTypes::RealVectorType&
   getVectorRW();
 
   ESCRIPT_DLL_API
-  const DataTypes::ValueType&
+  const DataTypes::RealVectorType&
   getVectorRO() const;
 
+  ESCRIPT_DLL_API
+  DataTypes::CplxVectorType&
+  getVectorRWC();
 
+  ESCRIPT_DLL_API
+  const DataTypes::CplxVectorType&
+  getVectorROC() const;
+  
+  virtual DataTypes::RealVectorType&
+  getTypedVectorRW(DataTypes::real_t dummy);  
+  
+  virtual const DataTypes::RealVectorType&
+  getTypedVectorRO(DataTypes::real_t dummy) const;
+
+  virtual DataTypes::CplxVectorType&
+  getTypedVectorRW(DataTypes::cplx_t dummy);
+  
+  virtual const DataTypes::CplxVectorType&
+  getTypedVectorRO(DataTypes::cplx_t dummy) const;    
 
   /**
      \brief
@@ -248,7 +282,7 @@ TODO Note that this constructor will also copy data to all points if it only con
   */
   ESCRIPT_DLL_API
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
   /**
@@ -291,10 +325,14 @@ TODO Note that this constructor will also copy data to all points if it only con
   void  
   setTaggedValue(int tagKey,
  	         const DataTypes::ShapeType& pointshape,
-                 const DataTypes::ValueType& value,
+                 const DataTypes::RealVectorType& value,
 		 int dataOffset=0);
 
-
+  void  
+  setTaggedValue(int tagKey,
+ 	         const DataTypes::ShapeType& pointshape,
+                 const DataTypes::CplxVectorType& value,
+		 int dataOffset=0);
 
   /**
      \brief
@@ -309,14 +347,38 @@ TODO Note that this constructor will also copy data to all points if it only con
 
   /**
      \brief
-     Computes a nonsymmetric matrix (A - AT) / 2
+     Computes a antisymmetric matrix (A - AT) / 2
 
      \param ev - Output - nonsymmetric matrix
 
   */
   ESCRIPT_DLL_API
   virtual void
-  nonsymmetric(DataAbstract* ev);
+  antisymmetric(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an hermitian matrix (A + A*) / 2
+
+     \param ev - Output - hermitian matrix
+
+  */
+  ESCRIPT_DLL_API
+  virtual void
+  hermitian(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an antihermitian matrix (A - A*) / 2
+
+     \param ev - Output - antihermitian matrix
+
+  */
+  ESCRIPT_DLL_API
+  virtual void
+  antihermitian(DataAbstract* ev);
+
+
 
   /**
      \brief
@@ -388,8 +450,11 @@ TODO Note that this constructor will also copy data to all points if it only con
 */
   ESCRIPT_DLL_API
   virtual void
-  reorderByReferenceIDs(dim_t *reference_ids);
+  reorderByReferenceIDs(DataTypes::dim_t *reference_ids);
 
+  ESCRIPT_DLL_API
+  void
+  complicate();
  protected:
 
  private:
@@ -407,10 +472,13 @@ TODO Note that this constructor will also copy data to all points if it only con
 
      \param noSamples - Input - number of samples.
      \param noDataPointsPerSample - Input - number of data points per sample.
+     \param cplx - Input - is this data complex?
   */
   void
   initialise(int noSamples,
-             int noDataPointsPerSample);
+             int noDataPointsPerSample,
+	     bool cplx
+	    );
 
   /**
      \brief
@@ -450,8 +518,12 @@ TODO Note that this constructor will also copy data to all points if it only con
   */
   ESCRIPT_DLL_API
   virtual void
-  copyToDataPoint(const int sampleNo, const int dataPointNo, const double value);
+  copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::real_t value);
 
+  ESCRIPT_DLL_API
+  virtual void
+  copyToDataPoint(const int sampleNo, const int dataPointNo, const DataTypes::cplx_t value);  
+  
 
   /**
      \brief
@@ -468,8 +540,8 @@ TODO Note that this constructor will also copy data to all points if it only con
   //
   // The main data storage array, a 2D array of data blocks.
   // noSamples * noDataPointsPerSample
-  DataBlocks2D m_data;
-
+  DataTypes::RealVectorType m_data_r;
+  DataTypes::CplxVectorType m_data_c;
 };
 
 } // end of namespace
diff --git a/escriptcore/src/DataFactory.cpp b/escriptcore/src/DataFactory.cpp
index bce295d..da11fb4 100644
--- a/escriptcore/src/DataFactory.cpp
+++ b/escriptcore/src/DataFactory.cpp
@@ -14,440 +14,359 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataFactory.h"
-#include "esysUtils/Esys_MPI.h"
 
 #include <boost/python/extract.hpp>
 #include <boost/scoped_array.hpp>
 
-#include <iostream>
 #include <exception>
-#ifdef USE_NETCDF
+#include <iostream>
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-using namespace boost::python;
+namespace bp = boost::python;
 
 namespace escript {
 
-Data
-Scalar(double value,
-       const FunctionSpace& what,
-       bool expanded)
+Data Scalar(double value, const FunctionSpace& what, bool expanded)
 {
-    //
     // an empty shape is a scalar
     DataTypes::ShapeType shape;
-    return Data(value,shape,what,expanded);
+    return Data(value, shape, what, expanded);
 }
 
-Data
-Vector(double value,
-       const FunctionSpace& what,
-       bool expanded)
+Data Vector(double value, const FunctionSpace& what, bool expanded)
 {
-    DataTypes::ShapeType shape(1,what.getDomain()->getDim());
-    return Data(value,shape,what,expanded);
+    DataTypes::ShapeType shape(1, what.getDomain()->getDim());
+    return Data(value, shape, what, expanded);
 }
 
-Data
-VectorFromObj(boost::python::object o,
-	const FunctionSpace& what,
-	bool expanded)
+Data VectorFromObj(bp::object o, const FunctionSpace& what, bool expanded)
 {    
-    double v;
-    try			// first try to get a double and route it to the other method
-    {
-	v=boost::python::extract<double>(o);
-	return Vector(v,what,expanded);
+    // first try to get a double and route it to the other method
+    try {
+        double v = bp::extract<double>(o);
+        return Vector(v, what, expanded);
+    } catch(...) {
+        PyErr_Clear();
     }
-    catch(...)
-    {
-	PyErr_Clear();
-    }
-    DataTypes::ShapeType shape(1,what.getDomain()->getDim());
-    Data d(o,what,expanded);
-    if (d.getDataPointShape()!=shape)
-    {
-	throw DataException("VectorFromObj: Shape of vector passed to function does not match the dimension of the domain. ");
+    DataTypes::ShapeType shape(1, what.getDomain()->getDim());
+    Data d(o, what, expanded);
+    if (d.getDataPointShape() != shape) {
+        throw DataException("VectorFromObj: Shape of vector passed to function"
+               " does not match the dimension of the domain. ");
     }
     return d;
 }
 
-Data
-Tensor(double value,
-       const FunctionSpace& what,
-       bool expanded)
+Data Tensor(double value, const FunctionSpace& what, bool expanded)
 {
-    DataTypes::ShapeType shape(2,what.getDomain()->getDim());
-    return Data(value,shape,what,expanded);
+    DataTypes::ShapeType shape(2, what.getDomain()->getDim());
+    return Data(value, shape, what, expanded);
 }
 
 
 // We need to take some care here because this signature trumps the other one from boost's point of view
-Data
-TensorFromObj(boost::python::object o,
-	const FunctionSpace& what,
-	bool expanded)
+Data TensorFromObj(bp::object o, const FunctionSpace& what, bool expanded)
 {
-    double v;
-    try			// first try to get a double and route it to the other method
-    {
-	v=boost::python::extract<double>(o);
-	return Tensor(v,what,expanded);
-    }
-    catch(...)
-    {
-	PyErr_Clear();
+    // first try to get a double and route it to the other method
+    try {
+        double v = bp::extract<double>(o);
+        return Tensor(v, what, expanded);
+    } catch(...) {
+        PyErr_Clear();
     }
-    DataTypes::ShapeType shape(2,what.getDomain()->getDim());
-    Data d(o,what,expanded);
-    if (d.getDataPointShape()!=shape)
-    {
-	throw DataException("TensorFromObj: Shape of tensor passed to function does not match the dimension of the domain. ");
+    DataTypes::ShapeType shape(2, what.getDomain()->getDim());
+    Data d(o, what, expanded);
+    if (d.getDataPointShape() != shape) {
+        throw DataException("TensorFromObj: Shape of tensor passed to function"
+               " does not match the dimension of the domain.");
     }
     return d;
 }
 
-Data
-Tensor3(double value,
-        const FunctionSpace& what,
-        bool expanded)
+Data Tensor3(double value, const FunctionSpace& what, bool expanded)
 {
-    DataTypes::ShapeType shape(3,what.getDomain()->getDim());
-    return Data(value,shape,what,expanded);
+    DataTypes::ShapeType shape(3, what.getDomain()->getDim());
+    return Data(value, shape, what, expanded);
 }
 
-Data
-Tensor3FromObj(boost::python::object o,
-	const FunctionSpace& what,
-	bool expanded)
+Data Tensor3FromObj(bp::object o, const FunctionSpace& what, bool expanded)
 {
-    double v;
-    try			// first try to get a double and route it to the other method
-    {
-	v=boost::python::extract<double>(o);
-	return Tensor3(v,what,expanded);
-    }
-    catch(...)
-    {
-	PyErr_Clear();
+    // first try to get a double and route it to the other method
+    try {
+        double v = bp::extract<double>(o);
+        return Tensor3(v, what, expanded);
+    } catch(...) {
+        PyErr_Clear();
     }
-    DataTypes::ShapeType shape(3,what.getDomain()->getDim());
-    Data d(o,what,expanded);
-    if (d.getDataPointShape()!=shape)
-    {
-	throw DataException("Tensor3FromObj: Shape of tensor passed to function does not match the dimension of the domain. ");
+    DataTypes::ShapeType shape(3, what.getDomain()->getDim());
+    Data d(o, what, expanded);
+    if (d.getDataPointShape() != shape) {
+        throw DataException("Tensor3FromObj: Shape of tensor passed to "
+                "function does not match the dimension of the domain.");
     }
     return d;
 }
 
-Data
-Tensor4(double value,
-        const FunctionSpace& what,
-        bool expanded)
+Data Tensor4(double value, const FunctionSpace& what, bool expanded)
 {
-    DataTypes::ShapeType shape(4,what.getDomain()->getDim());
-    return Data(value,shape,what,expanded);
+    DataTypes::ShapeType shape(4, what.getDomain()->getDim());
+    return Data(value, shape, what, expanded);
 }
 
-Data
-Tensor4FromObj(boost::python::object o,
-	const FunctionSpace& what,
-	bool expanded)
+Data Tensor4FromObj(bp::object o, const FunctionSpace& what, bool expanded)
 {
-    double v;
-    try			// first try to get a double and route it to the other method
-    {
-	v=boost::python::extract<double>(o);
-	return Tensor4(v,what,expanded);
+    // first try to get a double and route it to the other method
+    try {
+        double v = bp::extract<double>(o);
+        return Tensor4(v, what, expanded);
+    } catch(...) {
+        PyErr_Clear();
     }
-    catch(...)
-    {
-	PyErr_Clear();
-    }
-    DataTypes::ShapeType shape(4,what.getDomain()->getDim());
-    Data d(o,what,expanded);
-    if (d.getDataPointShape()!=shape)
-    {
-	throw DataException("VectorFromObj: Shape of tensor passed to function does not match the dimension of the domain. ");
+    DataTypes::ShapeType shape(4, what.getDomain()->getDim());
+    Data d(o, what, expanded);
+    if (d.getDataPointShape() != shape) {
+        throw DataException("VectorFromObj: Shape of tensor passed to function"
+               " does not match the dimension of the domain.");
     }
     return d;
 }
 
 
-Data 
-load(const std::string fileName,
-     const AbstractDomain& domain)
+Data load(const std::string fileName, const AbstractDomain& domain)
 {
-   #ifdef USE_NETCDF
-   NcAtt *type_att, *rank_att, *function_space_type_att;
-   // netCDF error handler
-   NcError err(NcError::silent_nonfatal);
-   int mpi_iam=0, mpi_num=1;
-   // Create the file.
-#ifdef ESYS_MPI
-   MPI_Comm_rank(MPI_COMM_WORLD, &mpi_iam);
-   MPI_Comm_size(MPI_COMM_WORLD, &mpi_num);
-#endif
-   const std::string newFileName(esysUtils::appendRankToFileName(fileName,
-                                                            mpi_num, mpi_iam));
-   NcFile dataFile(newFileName.c_str(), NcFile::ReadOnly);
-   if (!dataFile.is_valid())
-        throw DataException("Error - load:: opening of netCDF file for input failed.");
-   /* recover function space */
-   if (! (function_space_type_att=dataFile.get_att("function_space_type")) )
-        throw DataException("Error - load:: cannot recover function_space_type attribute from escript netCDF file.");
-   int function_space_type = function_space_type_att->as_int(0);
-   delete function_space_type_att;
-   /* test if function space id is valid and create function space instance */
-   if (! domain.isValidFunctionSpaceType(function_space_type) ) 
-        throw DataException("Error - load:: function space type code in netCDF file is invalid for given domain.");
-   FunctionSpace function_space=FunctionSpace(domain.getPtr(), function_space_type);
-   /* recover rank */
-   if (! (rank_att=dataFile.get_att("rank")) )
-        throw DataException("Error - load:: cannot recover rank attribute from escript netCDF file.");
-   int rank = rank_att->as_int(0);
-   delete rank_att;
-   if (rank<0 || rank>DataTypes::maxRank)
-        throw DataException("Error - load:: rank in escript netCDF file is greater than maximum rank.");
-   /* recover type attribute */
-   int type=-1;
-   if ((type_att=dataFile.get_att("type")) ) {
-       boost::scoped_array<char> type_str(type_att->as_string(0));
-       if (strncmp(type_str.get(), "constant", strlen("constant")) == 0 ) {
-          type = 0;
-       } else if (strncmp(type_str.get(), "tagged", strlen("tagged")) == 0 ) {
-           type = 1;
-       } else if (strncmp(type_str.get(), "expanded", strlen("expanded")) == 0 ) {
-           type = 2;
-       }
-   } else {
-      if (! (type_att=dataFile.get_att("type_id")) )
-  	throw DataException("Error - load:: cannot recover type attribute from escript netCDF file.");
-      type=type_att->as_int(0);
-   }
-   delete type_att;
+#ifdef ESYS_HAVE_NETCDF
+    NcAtt *type_att, *rank_att, *function_space_type_att;
+    // netCDF error handler
+    NcError err(NcError::silent_nonfatal);
+    JMPI mpiInfo(domain.getMPI());
+    const std::string newFileName(mpiInfo->appendRankToFileName(fileName));
+    NcFile dataFile(newFileName.c_str(), NcFile::ReadOnly);
+    Data out;
+    int error = 0;
+    std::string msg;
+    try {
+        if (!dataFile.is_valid())
+            throw DataException("load: opening of netCDF file for input failed.");
+       // recover function space
+        if (! (function_space_type_att=dataFile.get_att("function_space_type")) )
+            throw DataException("load: cannot recover function_space_type attribute from escript netCDF file.");
+        int function_space_type = function_space_type_att->as_int(0);
+        delete function_space_type_att;
+        // test if function space id is valid and create function space instance
+        if (!domain.isValidFunctionSpaceType(function_space_type)) 
+            throw DataException("load: function space type code in netCDF file is invalid for given domain.");
+        FunctionSpace function_space=FunctionSpace(domain.getPtr(), function_space_type);
+        // recover rank
+        if (! (rank_att=dataFile.get_att("rank")) )
+            throw DataException("load: cannot recover rank attribute from escript netCDF file.");
+        int rank = rank_att->as_int(0);
+        delete rank_att;
+        if (rank<0 || rank>DataTypes::maxRank)
+            throw DataException("load: rank in escript netCDF file is greater than maximum rank.");
+        // recover type attribute
+        int type=-1;
+        if ((type_att=dataFile.get_att("type")) ) {
+            boost::scoped_array<char> type_str(type_att->as_string(0));
+            if (strncmp(type_str.get(), "constant", strlen("constant")) == 0 ) {
+                type = 0;
+            } else if (strncmp(type_str.get(), "tagged", strlen("tagged")) == 0 ) {
+                type = 1;
+            } else if (strncmp(type_str.get(), "expanded", strlen("expanded")) == 0 ) {
+                type = 2;
+            }
+        } else {
+            if (! (type_att=dataFile.get_att("type_id")) )
+                throw DataException("load: cannot recover type attribute from escript netCDF file.");
+            type=type_att->as_int(0);
+        }
+        delete type_att;
 
-   /* recover dimension */
-   int ndims=dataFile.num_dims();
-   int ntags =0 , num_samples =0 , num_data_points_per_sample =0, d=0, len_data_point=1;
-   NcDim *d_dim, *tags_dim, *num_samples_dim, *num_data_points_per_sample_dim;
-   /* recover shape */
-   DataTypes::ShapeType shape;
-   long dims[DataTypes::maxRank+2];
-   if (rank>0) {
-     if (! (d_dim=dataFile.get_dim("d0")) )
-          throw DataException("Error - load:: unable to recover d0 from netCDF file.");
-      d=d_dim->size();
-      shape.push_back(d);
-      dims[0]=d;
-      len_data_point*=d;
-   }
-   if (rank>1) {
-     if (! (d_dim=dataFile.get_dim("d1")) )
-          throw DataException("Error - load:: unable to recover d1 from netCDF file.");
-      d=d_dim->size();
-      shape.push_back(d);
-      dims[1]=d;
-      len_data_point*=d;
-   }
-   if (rank>2) {
-     if (! (d_dim=dataFile.get_dim("d2")) )
-          throw DataException("Error - load:: unable to recover d2 from netCDF file.");
-      d=d_dim->size();
-      shape.push_back(d);
-      dims[2]=d;
-      len_data_point*=d;
-   }
-   if (rank>3) {
-     if (! (d_dim=dataFile.get_dim("d3")) )
-          throw DataException("Error - load:: unable to recover d3 from netCDF file.");
-      d=d_dim->size();
-      shape.push_back(d);
-      dims[3]=d;
-      len_data_point*=d;
-   }
-   /* recover stuff */
-   Data out;
-   NcVar *var, *ids_var, *tags_var;
-   if (type == 0) {
-      /* constant data */
-      if ( ! ( (ndims == rank && rank >0) || ( ndims ==1 && rank == 0 ) ) )
-          throw DataException("Error - load:: illegal number of dimensions for constant data in netCDF file.");
-      if (rank == 0) {
-          if (! (d_dim=dataFile.get_dim("l")) )
-              throw DataException("Error - load:: unable to recover d0 for scalar constant data in netCDF file.");
-          int d0 = d_dim->size();
-          if (d0 != 1)
-              throw DataException("Error - load:: d0 is expected to be one for scalar constant data in netCDF file.");
-          dims[0]=1;
-      }
-      out=Data(0,shape,function_space);
-      if (!(var = dataFile.get_var("data")))
-              throw DataException("Error - load:: unable to find data in netCDF file.");
-      if (! var->get(&(out.getDataAtOffsetRW(out.getDataOffset(0,0))), dims) ) 
-              throw DataException("Error - load:: unable to recover data from netCDF file.");
-   } else if (type == 1) { 
-      /* tagged data */
-      if ( ! (ndims == rank + 1) )
-         throw DataException("Error - load:: illegal number of dimensions for tagged data in netCDF file.");
-      if (! (tags_dim=dataFile.get_dim("num_tags")) )
-         throw DataException("Error - load:: unable to recover number of tags from netCDF file.");
-      ntags=tags_dim->size();
-      dims[rank]=ntags;
-      std::vector<int> tags(ntags);
-      if (! ( tags_var = dataFile.get_var("tags")) )
-      {
-         throw DataException("Error - load:: unable to find tags in netCDF file.");
-      }
-      if (! tags_var->get(&tags[0], ntags) ) 
-      {
-         throw DataException("Error - load:: unable to recover tags from netCDF file.");
-      }
+        // recover dimension
+        int ndims=dataFile.num_dims();
+        int ntags =0 , num_samples =0 , num_data_points_per_sample =0, d=0, len_data_point=1;
+        NcDim *d_dim, *tags_dim, *num_samples_dim, *num_data_points_per_sample_dim;
+        /* recover shape */
+        DataTypes::ShapeType shape;
+        long dims[DataTypes::maxRank+2];
+        if (rank>0) {
+            if (! (d_dim=dataFile.get_dim("d0")) )
+                throw DataException("load: unable to recover d0 from netCDF file.");
+            d=d_dim->size();
+            shape.push_back(d);
+            dims[0]=d;
+            len_data_point*=d;
+        }
+        if (rank>1) {
+            if (! (d_dim=dataFile.get_dim("d1")) )
+                throw DataException("load: unable to recover d1 from netCDF file.");
+            d=d_dim->size();
+            shape.push_back(d);
+            dims[1]=d;
+            len_data_point*=d;
+        }
+        if (rank>2) {
+            if (! (d_dim=dataFile.get_dim("d2")) )
+                throw DataException("load: unable to recover d2 from netCDF file.");
+            d=d_dim->size();
+            shape.push_back(d);
+            dims[2]=d;
+            len_data_point*=d;
+        }
+        if (rank>3) {
+            if (! (d_dim=dataFile.get_dim("d3")) )
+                throw DataException("load: unable to recover d3 from netCDF file.");
+            d=d_dim->size();
+            shape.push_back(d);
+            dims[3]=d;
+            len_data_point*=d;
+        }
 
-// Current Version
-/*      DataVector data(len_data_point * ntags, 0., len_data_point * ntags);
-      if (!(var = dataFile.get_var("data")))
-      {
-         esysUtils::free(tags);
-         throw DataException("Error - load:: unable to find data in netCDF file.");
-      }
-      if (! var->get(&(data[0]), dims) ) 
-      {
-         esysUtils::free(tags);
-         throw DataException("Error - load:: unable to recover data from netCDF file.");
-      }
-      out=Data(DataArrayView(data,shape,0),function_space);
-      for (int t=1; t<ntags; ++t) {
-	 out.setTaggedValueFromCPP(tags[t],shape, data, t*len_data_point);
-//         out.setTaggedValueFromCPP(tags[t],DataArrayView(data,shape,t*len_data_point));
-      }*/
-// End current version
-	
-// New version
+        NcVar *var, *ids_var, *tags_var;
+        if (type == 0) {
+            // constant data
+            if ( ! ( (ndims == rank && rank >0) || ( ndims ==1 && rank == 0 ) ) )
+                throw DataException("load: illegal number of dimensions for constant data in netCDF file.");
+            if (rank == 0) {
+                if (! (d_dim=dataFile.get_dim("l")) )
+                    throw DataException("load: unable to recover d0 for scalar constant data in netCDF file.");
+                int d0 = d_dim->size();
+                if (d0 != 1)
+                    throw DataException("load: d0 is expected to be one for scalar constant data in netCDF file.");
+                dims[0]=1;
+            }
+            out=Data(0,shape,function_space,false);
+            if (!(var = dataFile.get_var("data")))
+                throw DataException("load: unable to find data in netCDF file.");
+            if (! var->get(&(out.getDataAtOffsetRW(out.getDataOffset(0,0))), dims) ) 
+                throw DataException("load: unable to recover data from netCDF file.");
+        } else if (type == 1) { 
+            // tagged data
+            if ( ! (ndims == rank + 1) )
+                throw DataException("load: illegal number of dimensions for tagged data in netCDF file.");
+            if (! (tags_dim=dataFile.get_dim("num_tags")) )
+                throw DataException("load: unable to recover number of tags from netCDF file.");
+            ntags=tags_dim->size();
+            dims[rank]=ntags;
+            std::vector<int> tags(ntags);
+            if (! ( tags_var = dataFile.get_var("tags")) )
+                throw DataException("load: unable to find tags in netCDF file.");
+            if (! tags_var->get(&tags[0], ntags) ) 
+                throw DataException("load: unable to recover tags from netCDF file.");
 
-	// A) create a DataTagged dt
-	// B) Read data from file
-	// C) copy default value into dt
-	// D) copy tagged values into dt
-	// E) create a new Data based on dt
+            // A) create a DataTagged dt
+            // B) Read data from file
+            // C) copy default value into dt
+            // D) copy tagged values into dt
+            // E) create a new Data based on dt
 
-      NcVar* var1;
-      DataVector data1(len_data_point * ntags, 0., len_data_point * ntags);
-      if (!(var1 = dataFile.get_var("data")))
-      {
-         throw DataException("Error - load:: unable to find data in netCDF file.");
-      }
-      if (! var1->get(&(data1[0]), dims) ) 
-      {
-         throw DataException("Error - load:: unable to recover data from netCDF file.");
-      }
-      DataTagged* dt=new DataTagged(function_space, shape, &tags[0], data1);
-      out=Data(dt);
-   } else if (type == 2) {
-      /* expanded data */
-      if ( ! (ndims == rank + 2) )
-          throw DataException("Error - load:: illegal number of dimensions for expanded data in netCDF file.");
-      if ( ! (num_samples_dim = dataFile.get_dim("num_samples") ) )
-          throw DataException("Error - load:: unable to recover number of samples from netCDF file.");
-      num_samples = num_samples_dim->size();
-      if ( ! (num_data_points_per_sample_dim = dataFile.get_dim("num_data_points_per_sample") ) )
-          throw DataException("Error - load:: unable to recover number of data points per sample from netCDF file.");
-      num_data_points_per_sample=num_data_points_per_sample_dim->size();
-      // check shape:
-      if ( ! (num_samples == function_space.getNumSamples() && num_data_points_per_sample == function_space.getNumDataPointsPerSample()) )
-          throw DataException("Error - load:: data sample layout of file does not match data layout of function space.");
-      if (num_samples==0) {
-	out = Data(0,shape,function_space,true);
-      }
-      else {
-	// get ids
-	if (! ( ids_var = dataFile.get_var("id")) )
-		throw DataException("Error - load:: unable to find reference ids in netCDF file.");
-	const dim_t* ids_p=function_space.borrowSampleReferenceIDs();
-    std::vector<dim_t> ids_of_nc(num_samples);
-	if (! ids_var->get(&ids_of_nc[0], (long) num_samples) ) 
-	{
-		throw DataException("Error - load:: unable to recover ids from netCDF file.");
-	}
-	// check order:
-	int failed=-1, local_failed=-1, i;
-	#pragma omp parallel private(local_failed)
-	{
-		local_failed=-1;
-		#pragma omp for private(i) schedule(static)
-		for (i=0;i < num_samples; ++i) {
-		if (ids_of_nc[i]!=ids_p[i]) local_failed=i;
-		}
-		#pragma omp critical
-		if (local_failed>=0) failed = local_failed;
-	}
-	/* if (failed>=0) 
-	{
-		throw DataException("Error - load:: data ordering in netCDF file does not match ordering of FunctionSpace.");
-	} */
-	// get the data:
-	dims[rank]=num_data_points_per_sample;
-	dims[rank+1]=num_samples;
-	out=Data(0,shape,function_space,true);
-	if (!(var = dataFile.get_var("data")))
-	{
-		throw DataException("Error - load:: unable to find data in netCDF file.");
-	}
-	if (! var->get(&(out.getDataAtOffsetRW(out.getDataOffset(0,0))), dims) ) 
-	{
-		throw DataException("Error - load:: unable to recover data from netCDF file.");
-	}
-	if (failed>=0) {
-		try {
-		std::cout << "Information - load: start reordering data from netCDF file " << fileName << std::endl;
-		out.borrowData()->reorderByReferenceIDs(&ids_of_nc[0]);
-		} 
-		catch (std::exception&) {
-		throw DataException("Error - load:: unable to reorder data in netCDF file.");
-		}
-	}
-      }
-   } else {
-       throw DataException("Error - load:: unknown escript data type in netCDF file.");
-   }
-   return out;
-   #else
-   throw DataException("Error - load:: is not compiled with netCDF. Please contact your installation manager.");
-   #endif
+            NcVar* var1;
+            DataTypes::RealVectorType data1(len_data_point * ntags, 0., len_data_point * ntags);
+            if (!(var1 = dataFile.get_var("data")))
+                throw DataException("load: unable to find data in netCDF file.");
+            if (! var1->get(&(data1[0]), dims) ) 
+                throw DataException("load: unable to recover data from netCDF file.");
+            DataTagged* dt=new DataTagged(function_space, shape, &tags[0], data1);
+            out=Data(dt);
+        } else if (type == 2) {
+            // expanded data
+            if ( ! (ndims == rank + 2) )
+                throw DataException("load: illegal number of dimensions for expanded data in netCDF file.");
+            if ( ! (num_samples_dim = dataFile.get_dim("num_samples") ) )
+                throw DataException("load: unable to recover number of samples from netCDF file.");
+            num_samples = num_samples_dim->size();
+            if ( ! (num_data_points_per_sample_dim = dataFile.get_dim("num_data_points_per_sample") ) )
+                throw DataException("load: unable to recover number of data points per sample from netCDF file.");
+            num_data_points_per_sample=num_data_points_per_sample_dim->size();
+            // check shape:
+            if ( ! (num_samples == function_space.getNumSamples() && num_data_points_per_sample == function_space.getNumDataPointsPerSample()) )
+                throw DataException("load: data sample layout of file does not match data layout of function space.");
+            if (num_samples==0) {
+                out = Data(0,shape,function_space,true);
+            } else {
+                // get ids
+                if (! ( ids_var = dataFile.get_var("id")) )
+                    throw DataException("load: unable to find reference ids in netCDF file.");
+                const DataTypes::dim_t* ids_p=function_space.borrowSampleReferenceIDs();
+                std::vector<DataTypes::dim_t> ids_of_nc(num_samples);
+                if (! ids_var->get(&ids_of_nc[0], (long) num_samples) ) 
+                    throw DataException("load: unable to recover ids from netCDF file.");
+                // check order:
+                int failed=-1, local_failed=-1, i;
+#pragma omp parallel private(local_failed)
+                {
+                    local_failed=-1;
+#pragma omp for private(i) schedule(static)
+                    for (i=0; i < num_samples; ++i) {
+                        if (ids_of_nc[i]!=ids_p[i]) local_failed=i;
+                    }
+#pragma omp critical
+                    if (local_failed>=0) failed = local_failed;
+                }
+                // get the data:
+                dims[rank]=num_data_points_per_sample;
+                dims[rank+1]=num_samples;
+                out=Data(0,shape,function_space,true);
+                if (!(var = dataFile.get_var("data")))
+                    throw DataException("load: unable to find data in netCDF file.");
+                if (! var->get(&(out.getDataAtOffsetRW(out.getDataOffset(0,0))), dims) ) 
+                    throw DataException("load: unable to recover data from netCDF file.");
+                if (failed >= 0) {
+                    try {
+                        std::cout << "Information - load: start reordering data from netCDF file " << fileName << std::endl;
+                        out.borrowData()->reorderByReferenceIDs(&ids_of_nc[0]);
+                    } catch (std::exception&) {
+                        throw DataException("load: unable to reorder data in netCDF file.");
+                    }
+                }
+            }
+        } else {
+            throw DataException("load: unknown escript data type in netCDF file.");
+        }
+    } catch (DataException& e) {
+        error=1;
+        msg=e.what();
+    }
+    int gerror = error;
+    checkResult(error, gerror, mpiInfo);
+    if (gerror > 0) {
+        char* gmsg;
+        shipString(msg.c_str(), &gmsg, mpiInfo->comm);
+        throw DataException(gmsg);
+    }
+    return out;
+#else
+    throw DataException("load: not compiled with netCDF. Please contact your"
+                        " installation manager.");
+#endif // ESYS_HAVE_NETCDF
 }
 
-bool 
-loadConfigured()
+bool loadConfigured()
 {
-   #ifdef USE_NETCDF
-   return true;
-   #else
-   return false;
-   #endif
+#ifdef ESYS_HAVE_NETCDF
+    return true;
+#else
+    return false;
+#endif
 }
 
-Data
-convertToData(const boost::python::object& value,
-              const FunctionSpace& what) 
+Data convertToData(const bp::object& value, const FunctionSpace& what) 
 {
-     // first we try to extract a Data object from value 
-     extract<Data> value_data(value);
-     if (value_data.check()) {
-         Data extracted_data=value_data();
-         if (extracted_data.isEmpty()) {
+    // first we try to extract a Data object from value 
+    bp::extract<Data> value_data(value);
+    if (value_data.check()) {
+        Data extracted_data=value_data();
+        if (extracted_data.isEmpty()) {
             return extracted_data;
-         } else {
+        } else {
             return Data(extracted_data,what);
-         }
-     } else {
-        return Data(value,what);
-     }
+        }
+    } else {
+        return Data(value,what,false);
+    }
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/DataLazy.cpp b/escriptcore/src/DataLazy.cpp
index 794dbc7..e468cce 100644
--- a/escriptcore/src/DataLazy.cpp
+++ b/escriptcore/src/DataLazy.cpp
@@ -14,27 +14,19 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "DataLazy.h"
-#include "esysUtils/Esys_MPI.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-#include "FunctionSpace.h"
-#include "DataTypes.h"
 #include "Data.h"
-#include "UnaryFuncs.h"		// for escript::fsign
+#include "DataTypes.h"
+#include "EscriptParams.h"
+#include "FunctionSpace.h"
 #include "Utils.h"
+#include "DataVectorOps.h"
 
-#include "EscriptParams.h"
+#include <iomanip> // for some fancy formatting in debug
 
-#ifdef USE_NETCDF
-#include <netcdfcpp.h>
-#endif
+using namespace escript::DataTypes;
 
-#include <iomanip>		// for some fancy formatting in debug
+#define NO_ARG
 
 // #define LAZYDEBUG(X) if (privdebug){X;} 
 #define LAZYDEBUG(X)
@@ -46,12 +38,18 @@ bool privdebug=false;
 #define DISABLEDEBUG privdebug=false;
 }
 
-// #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
+//#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
 
-// #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
+//#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
 
 
-#define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
+#define SIZELIMIT \
+    if (m_height > escript::escriptParams.getTooManyLevels()) {\
+        if (escript::escriptParams.getLazyVerbose()) {\
+            cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
+        }\
+        resolveToIdentity();\
+    }
 
 /*
 How does DataLazy work?
@@ -72,9 +70,9 @@ I will refer to individual DataLazy objects with the structure as nodes.
 
 Each node also stores:
 - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
-	evaluated.
+        evaluated.
 - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
-	evaluate the expression.
+        evaluate the expression.
 - m_samplesize ~ the number of doubles stored in a sample.
 
 When a new node is created, the above values are computed based on the values in the child nodes.
@@ -135,64 +133,15 @@ std::vector<void*> stackend(getNumberOfThreads());
 size_t maxstackuse=0;
 #endif
 
-enum ES_opgroup
-{
-   G_UNKNOWN,
-   G_IDENTITY,
-   G_BINARY,		// pointwise operations with two arguments
-   G_UNARY,		// pointwise operations with one argument
-   G_UNARY_P,		// pointwise operations with one argument, requiring a parameter
-   G_NP1OUT,		// non-pointwise op with one output
-   G_NP1OUT_P,		// non-pointwise op with one output requiring a parameter
-   G_TENSORPROD,	// general tensor product
-   G_NP1OUT_2P,		// non-pointwise op with one output requiring two params
-   G_REDUCTION,		// non-pointwise unary op with a scalar output
-   G_CONDEVAL
-};
-
-
-
-
-string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
-			"sin","cos","tan",
-			"asin","acos","atan","sinh","cosh","tanh","erf",
-			"asinh","acosh","atanh",
-			"log10","log","sign","abs","neg","pos","exp","sqrt",
-			"1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
-			"symmetric","nonsymmetric",
-			"prod",
-			"transpose", "trace",
-			"swapaxes",
-			"minval", "maxval",
-			"condEval"};
-int ES_opcount=44;
-ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
-			G_UNARY,G_UNARY,G_UNARY, //10
-			G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,	// 17
-			G_UNARY,G_UNARY,G_UNARY,					// 20
-			G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,	// 28
-			G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,		// 35
-			G_NP1OUT,G_NP1OUT,
-			G_TENSORPROD,
-			G_NP1OUT_P, G_NP1OUT_P,
-			G_NP1OUT_2P,
-			G_REDUCTION, G_REDUCTION,
-			G_CONDEVAL};
-inline
-ES_opgroup
-getOpgroup(ES_optype op)
-{
-  return opgroups[op];
-}
 
 // return the FunctionSpace of the result of "left op right"
 FunctionSpace
 resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
 {
-	// perhaps this should call interpolate and throw or something?
-	// maybe we need an interpolate node -
-	// that way, if interpolate is required in any other op we can just throw a 
-	// programming error exception.
+        // perhaps this should call interpolate and throw or something?
+        // maybe we need an interpolate node -
+        // that way, if interpolate is required in any other op we can just throw a 
+        // programming error exception.
 
   FunctionSpace l=left->getFunctionSpace();
   FunctionSpace r=right->getFunctionSpace();
@@ -201,11 +150,11 @@ resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
     signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
     if (res==1)
     {
-	return l;
+        return l;
     }
     if (res==-1)
     {
-	return r;
+        return r;
     }
     throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
   }
@@ -217,24 +166,24 @@ resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
 DataTypes::ShapeType
 resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
 {
-	if (left->getShape()!=right->getShape())
-	{
-	  if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
-	  {
-		throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
-	  }
-
-	  if (left->getRank()==0)	// we need to allow scalar * anything
-	  {
-		return right->getShape();
-	  }
-	  if (right->getRank()==0)
-	  {
-		return left->getShape();
-	  }
-	  throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
-	}
-	return left->getShape();
+        if (left->getShape()!=right->getShape())
+        {
+          if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
+          {
+                throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
+          }
+
+          if (left->getRank()==0)       // we need to allow scalar * anything
+          {
+                return right->getShape();
+          }
+          if (right->getRank()==0)
+          {
+                return left->getShape();
+          }
+          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
+        }
+        return left->getShape();
 }
 
 // return the shape for "op left"
@@ -242,85 +191,85 @@ resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
 DataTypes::ShapeType
 resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
 {
-	switch(op)
-	{
-    	case TRANS:
-	   {			// for the scoping of variables
-		const DataTypes::ShapeType& s=left->getShape();
-		DataTypes::ShapeType sh;
-		int rank=left->getRank();
-		if (axis_offset<0 || axis_offset>rank)
-		{
+        switch(op)
+        {
+        case TRANS:
+           {                    // for the scoping of variables
+                const DataTypes::ShapeType& s=left->getShape();
+                DataTypes::ShapeType sh;
+                int rank=left->getRank();
+                if (axis_offset<0 || axis_offset>rank)
+                {
             stringstream e;
             e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
             throw DataException(e.str());
-     	}
-     	for (int i=0; i<rank; i++)
-		{
-		   int index = (axis_offset+i)%rank;
-       	   sh.push_back(s[index]); // Append to new shape
-     	}
-		return sh;
-	   }
-	break;
-   	case TRACE:
-	   {
-		int rank=left->getRank();
-		if (rank<2)
-		{
-		   throw DataException("Trace can only be computed for objects with rank 2 or greater.");
-		}
-		if ((axis_offset>rank-2) || (axis_offset<0))
-		{
-		   throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
-		}
-		if (rank==2)
-		{
-		   return DataTypes::scalarShape;
-		}
-		else if (rank==3)
-		{
-		   DataTypes::ShapeType sh;
-        	   if (axis_offset==0)
-		   {
-          		sh.push_back(left->getShape()[2]);
-        	   }
-        	   else 	// offset==1
-		   {
-			sh.push_back(left->getShape()[0]);
-        	   }
-		   return sh;
-		}
-		else if (rank==4)
-		{
-		   DataTypes::ShapeType sh;
-		   const DataTypes::ShapeType& s=left->getShape();
-        	   if (axis_offset==0)
-		   {
-          		sh.push_back(s[2]);
-          		sh.push_back(s[3]);
-        	   }
-        	   else if (axis_offset==1)
-		   {
-          		sh.push_back(s[0]);
-          		sh.push_back(s[3]);
-        	   }
-		   else 	// offset==2
-		   {
-	  		sh.push_back(s[0]);
-	  		sh.push_back(s[1]);
-		   }
-		   return sh;
-		}
-		else		// unknown rank
-		{
-		   throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
-		}
-	   }
-	break;
-    	default:
-	throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
-	}
+        }
+        for (int i=0; i<rank; i++)
+                {
+                   int index = (axis_offset+i)%rank;
+           sh.push_back(s[index]); // Append to new shape
+        }
+                return sh;
+           }
+        break;
+        case TRACE:
+           {
+                int rank=left->getRank();
+                if (rank<2)
+                {
+                   throw DataException("Trace can only be computed for objects with rank 2 or greater.");
+                }
+                if ((axis_offset>rank-2) || (axis_offset<0))
+                {
+                   throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
+                }
+                if (rank==2)
+                {
+                   return DataTypes::scalarShape;
+                }
+                else if (rank==3)
+                {
+                   DataTypes::ShapeType sh;
+                   if (axis_offset==0)
+                   {
+                        sh.push_back(left->getShape()[2]);
+                   }
+                   else         // offset==1
+                   {
+                        sh.push_back(left->getShape()[0]);
+                   }
+                   return sh;
+                }
+                else if (rank==4)
+                {
+                   DataTypes::ShapeType sh;
+                   const DataTypes::ShapeType& s=left->getShape();
+                   if (axis_offset==0)
+                   {
+                        sh.push_back(s[2]);
+                        sh.push_back(s[3]);
+                   }
+                   else if (axis_offset==1)
+                   {
+                        sh.push_back(s[0]);
+                        sh.push_back(s[3]);
+                   }
+                   else         // offset==2
+                   {
+                        sh.push_back(s[0]);
+                        sh.push_back(s[1]);
+                   }
+                   return sh;
+                }
+                else            // unknown rank
+                {
+                   throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
+                }
+           }
+        break;
+        default:
+        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
+        }
 }
 
 DataTypes::ShapeType
@@ -376,7 +325,7 @@ SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 DataTypes::ShapeType
 GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
 {
-	
+        
   // Get rank and shape of inputs
   int rank0 = left->getRank();
   int rank1 = right->getRank();
@@ -385,40 +334,40 @@ GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int tra
 
   // Prepare for the loops of the product and verify compatibility of shapes
   int start0=0, start1=0;
-  if (transpose == 0)		{}
-  else if (transpose == 1)	{ start0 = axis_offset; }
-  else if (transpose == 2)	{ start1 = rank1-axis_offset; }
-  else				{ throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
+  if (transpose == 0)           {}
+  else if (transpose == 1)      { start0 = axis_offset; }
+  else if (transpose == 2)      { start1 = rank1-axis_offset; }
+  else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
 
   if (rank0<axis_offset)
   {
-	throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
+        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
   }
 
   // Adjust the shapes for transpose
-  DataTypes::ShapeType tmpShape0(rank0);	// pre-sizing the vectors rather
-  DataTypes::ShapeType tmpShape1(rank1);	// than using push_back
-  for (int i=0; i<rank0; i++)	{ tmpShape0[i]=shape0[(i+start0)%rank0]; }
-  for (int i=0; i<rank1; i++)	{ tmpShape1[i]=shape1[(i+start1)%rank1]; }
+  DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
+  DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
+  for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
+  for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
 
   // Prepare for the loops of the product
   SL=1, SM=1, SR=1;
-  for (int i=0; i<rank0-axis_offset; i++)	{
+  for (int i=0; i<rank0-axis_offset; i++)       {
     SL *= tmpShape0[i];
   }
-  for (int i=rank0-axis_offset; i<rank0; i++)	{
+  for (int i=rank0-axis_offset; i<rank0; i++)   {
     if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
       throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
     }
     SM *= tmpShape0[i];
   }
-  for (int i=axis_offset; i<rank1; i++)		{
+  for (int i=axis_offset; i<rank1; i++)         {
     SR *= tmpShape1[i];
   }
 
   // Define the shape of the output (rank of shape is the sum of the loop ranges below)
-  DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);	
-  {			// block to limit the scope of out_index
+  DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);       
+  {                     // block to limit the scope of out_index
      int out_index=0;
      for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
      for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
@@ -434,20 +383,7 @@ GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int tra
   return shape2;
 }
 
-}	// end anonymous namespace
-
-
-
-// Return a string representing the operation
-const std::string&
-opToString(ES_optype op)
-{
-  if (op<0 || op>=ES_opcount) 
-  {
-    op=UNKNOWNOP;
-  }
-  return ES_opstrings[op];
-}
+}       // end anonymous namespace
 
 void DataLazy::LazyNodeSetup()
 {
@@ -469,47 +405,46 @@ void DataLazy::LazyNodeSetup()
 
 // Creates an identity node
 DataLazy::DataLazy(DataAbstract_ptr p)
-	: parent(p->getFunctionSpace(),p->getShape())
-	,m_sampleids(0),
-	m_samples(1)
+        : parent(p->getFunctionSpace(),p->getShape())
+        ,m_sampleids(0),
+        m_samples(1)
 {
    if (p->isLazy())
    {
-	// I don't want identity of Lazy.
-	// Question: Why would that be so bad?
-	// Answer: We assume that the child of ID is something we can call getVector on
-	throw DataException("Programmer error - attempt to create identity from a DataLazy.");
+        // I don't want identity of Lazy.
+        // Question: Why would that be so bad?
+        // Answer: We assume that the child of ID is something we can call getVector on
+        throw DataException("Programmer error - attempt to create identity from a DataLazy.");
    }
    else
    {
-	p->makeLazyShared();
-	DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
-	makeIdentity(dr);
+        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
+        makeIdentity(dr);
 LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
    }
 LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
 }
 
 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
-	: parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
-	m_op(op),
-	m_axis_offset(0),
-	m_transpose(0),
-	m_SL(0), m_SM(0), m_SR(0)
+        : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
+        m_op(op),
+        m_axis_offset(0),
+        m_transpose(0),
+        m_SL(0), m_SM(0), m_SR(0)
 {
    if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
+        throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
    }
 
    DataLazy_ptr lleft;
    if (!left->isLazy())
    {
-	lleft=DataLazy_ptr(new DataLazy(left));
+        lleft=DataLazy_ptr(new DataLazy(left));
    }
    else
    {
-	lleft=dynamic_pointer_cast<DataLazy>(left);
+        lleft=dynamic_pointer_cast<DataLazy>(left);
    }
    m_readytype=lleft->m_readytype;
    m_left=lleft;
@@ -523,64 +458,64 @@ DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
 
 // In this constructor we need to consider interpolation
 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
-	: parent(resultFS(left,right,op), resultShape(left,right,op)),
-	m_op(op),
-	m_SL(0), m_SM(0), m_SR(0)
+        : parent(resultFS(left,right,op), resultShape(left,right,op)),
+        m_op(op),
+        m_SL(0), m_SM(0), m_SR(0)
 {
 LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
    if ((getOpgroup(op)!=G_BINARY))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
+        throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
    }
 
-   if (getFunctionSpace()!=left->getFunctionSpace())	// left needs to be interpolated
+   if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
    {
-	FunctionSpace fs=getFunctionSpace();
-	Data ltemp(left);
-	Data tmp(ltemp,fs);
-	left=tmp.borrowDataPtr();
+        FunctionSpace fs=getFunctionSpace();
+        Data ltemp(left);
+        Data tmp(ltemp,fs);
+        left=tmp.borrowDataPtr();
    }
-   if (getFunctionSpace()!=right->getFunctionSpace())	// right needs to be interpolated
+   if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
    {
-	Data tmp(Data(right),getFunctionSpace());
-	right=tmp.borrowDataPtr();
+        Data tmp(Data(right),getFunctionSpace());
+        right=tmp.borrowDataPtr();
 LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
    }
    left->operandCheck(*right);
 
-   if (left->isLazy())			// the children need to be DataLazy. Wrap them in IDENTITY if required
+   if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
    {
-	m_left=dynamic_pointer_cast<DataLazy>(left);
+        m_left=dynamic_pointer_cast<DataLazy>(left);
 LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
    }
    else
    {
-	m_left=DataLazy_ptr(new DataLazy(left));
+        m_left=DataLazy_ptr(new DataLazy(left));
 LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
    }
    if (right->isLazy())
    {
-	m_right=dynamic_pointer_cast<DataLazy>(right);
+        m_right=dynamic_pointer_cast<DataLazy>(right);
 LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
    }
    else
    {
-	m_right=DataLazy_ptr(new DataLazy(right));
+        m_right=DataLazy_ptr(new DataLazy(right));
 LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
    }
    char lt=m_left->m_readytype;
    char rt=m_right->m_readytype;
    if (lt=='E' || rt=='E')
    {
-	m_readytype='E';
+        m_readytype='E';
    }
    else if (lt=='T' || rt=='T')
    {
-	m_readytype='T';
+        m_readytype='T';
    }
    else
    {
-	m_readytype='C';
+        m_readytype='C';
    }
    m_samplesize=getNumDPPSample()*getNoValues();
    m_children=m_left->m_children+m_right->m_children+2;
@@ -591,62 +526,62 @@ LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
 }
 
 DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
-	: parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
-	m_op(op),
-	m_axis_offset(axis_offset),
-	m_transpose(transpose)
+        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
+        m_op(op),
+        m_axis_offset(axis_offset),
+        m_transpose(transpose)
 {
    if ((getOpgroup(op)!=G_TENSORPROD))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
+        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
    }
    if ((transpose>2) || (transpose<0))
    {
-	throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
+        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
    }
-   if (getFunctionSpace()!=left->getFunctionSpace())	// left needs to be interpolated
+   if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
    {
-	FunctionSpace fs=getFunctionSpace();
-	Data ltemp(left);
-	Data tmp(ltemp,fs);
-	left=tmp.borrowDataPtr();
+        FunctionSpace fs=getFunctionSpace();
+        Data ltemp(left);
+        Data tmp(ltemp,fs);
+        left=tmp.borrowDataPtr();
    }
-   if (getFunctionSpace()!=right->getFunctionSpace())	// right needs to be interpolated
+   if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
    {
-	Data tmp(Data(right),getFunctionSpace());
-	right=tmp.borrowDataPtr();
+        Data tmp(Data(right),getFunctionSpace());
+        right=tmp.borrowDataPtr();
    }
 //    left->operandCheck(*right);
 
-   if (left->isLazy())			// the children need to be DataLazy. Wrap them in IDENTITY if required
+   if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
    {
-	m_left=dynamic_pointer_cast<DataLazy>(left);
+        m_left=dynamic_pointer_cast<DataLazy>(left);
    }
    else
    {
-	m_left=DataLazy_ptr(new DataLazy(left));
+        m_left=DataLazy_ptr(new DataLazy(left));
    }
    if (right->isLazy())
    {
-	m_right=dynamic_pointer_cast<DataLazy>(right);
+        m_right=dynamic_pointer_cast<DataLazy>(right);
    }
    else
    {
-	m_right=DataLazy_ptr(new DataLazy(right));
+        m_right=DataLazy_ptr(new DataLazy(right));
    }
    char lt=m_left->m_readytype;
    char rt=m_right->m_readytype;
    if (lt=='E' || rt=='E')
    {
-	m_readytype='E';
+        m_readytype='E';
    }
    else if (lt=='T' || rt=='T')
    {
-	m_readytype='T';
+        m_readytype='T';
    }
    else
    {
-	m_readytype='C';
+        m_readytype='C';
    }
    m_samplesize=getNumDPPSample()*getNoValues();
    m_children=m_left->m_children+m_right->m_children+2;
@@ -658,24 +593,24 @@ LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
 
 
 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
-	: parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
-	m_op(op),
-	m_axis_offset(axis_offset),
-	m_transpose(0),
-	m_tol(0)
+        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
+        m_op(op),
+        m_axis_offset(axis_offset),
+        m_transpose(0),
+        m_tol(0)
 {
    if ((getOpgroup(op)!=G_NP1OUT_P))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
+        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
    }
    DataLazy_ptr lleft;
    if (!left->isLazy())
    {
-	lleft=DataLazy_ptr(new DataLazy(left));
+        lleft=DataLazy_ptr(new DataLazy(left));
    }
    else
    {
-	lleft=dynamic_pointer_cast<DataLazy>(left);
+        lleft=dynamic_pointer_cast<DataLazy>(left);
    }
    m_readytype=lleft->m_readytype;
    m_left=lleft;
@@ -688,24 +623,24 @@ LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
 }
 
 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
-	: parent(left->getFunctionSpace(), left->getShape()),
-	m_op(op),
-	m_axis_offset(0),
-	m_transpose(0),
-	m_tol(tol)
+        : parent(left->getFunctionSpace(), left->getShape()),
+        m_op(op),
+        m_axis_offset(0),
+        m_transpose(0),
+        m_tol(tol)
 {
    if ((getOpgroup(op)!=G_UNARY_P))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
+        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
    }
    DataLazy_ptr lleft;
    if (!left->isLazy())
    {
-	lleft=DataLazy_ptr(new DataLazy(left));
+        lleft=DataLazy_ptr(new DataLazy(left));
    }
    else
    {
-	lleft=dynamic_pointer_cast<DataLazy>(left);
+        lleft=dynamic_pointer_cast<DataLazy>(left);
    }
    m_readytype=lleft->m_readytype;
    m_left=lleft;
@@ -719,24 +654,24 @@ LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
 
 
 DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
-	: parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
-	m_op(op),
-	m_axis_offset(axis0),
-	m_transpose(axis1),
-	m_tol(0)
+        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
+        m_op(op),
+        m_axis_offset(axis0),
+        m_transpose(axis1),
+        m_tol(0)
 {
    if ((getOpgroup(op)!=G_NP1OUT_2P))
    {
-	throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
+        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
    }
    DataLazy_ptr lleft;
    if (!left->isLazy())
    {
-	lleft=DataLazy_ptr(new DataLazy(left));
+        lleft=DataLazy_ptr(new DataLazy(left));
    }
    else
    {
-	lleft=dynamic_pointer_cast<DataLazy>(left);
+        lleft=dynamic_pointer_cast<DataLazy>(left);
    }
    m_readytype=lleft->m_readytype;
    m_left=lleft;
@@ -754,18 +689,18 @@ namespace
 
     inline int max3(int a, int b, int c)
     {
-	int t=(a>b?a:b);
-	return (t>c?t:c);
+        int t=(a>b?a:b);
+        return (t>c?t:c);
 
     }
 }
 
 DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
-	: parent(left->getFunctionSpace(), left->getShape()),
-	m_op(CONDEVAL),
-	m_axis_offset(0),
-	m_transpose(0),
-	m_tol(0)
+        : parent(left->getFunctionSpace(), left->getShape()),
+        m_op(CONDEVAL),
+        m_axis_offset(0),
+        m_transpose(0),
+        m_tol(0)
 {
 
    DataLazy_ptr lmask;
@@ -773,32 +708,32 @@ DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_pt
    DataLazy_ptr lright;
    if (!mask->isLazy())
    {
-	lmask=DataLazy_ptr(new DataLazy(mask));
+        lmask=DataLazy_ptr(new DataLazy(mask));
    }
    else
    {
-	lmask=dynamic_pointer_cast<DataLazy>(mask);
+        lmask=dynamic_pointer_cast<DataLazy>(mask);
    }
    if (!left->isLazy())
    {
-	lleft=DataLazy_ptr(new DataLazy(left));
+        lleft=DataLazy_ptr(new DataLazy(left));
    }
    else
    {
-	lleft=dynamic_pointer_cast<DataLazy>(left);
+        lleft=dynamic_pointer_cast<DataLazy>(left);
    }
    if (!right->isLazy())
    {
-	lright=DataLazy_ptr(new DataLazy(right));
+        lright=DataLazy_ptr(new DataLazy(right));
    }
    else
    {
-	lright=dynamic_pointer_cast<DataLazy>(right);
+        lright=dynamic_pointer_cast<DataLazy>(right);
    }
    m_readytype=lmask->m_readytype;
    if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
    {
-	throw DataException("Programmer Error - condEval arguments must have the same readytype");
+        throw DataException("Programmer Error - condEval arguments must have the same readytype");
    }
    m_left=lleft;
    m_right=lright;
@@ -828,7 +763,7 @@ DataReady_ptr
 DataLazy::collapseToReady() const
 {
   if (m_readytype=='E')
-  {	// this is more an efficiency concern than anything else
+  {     // this is more an efficiency concern than anything else
     throw DataException("Programmer Error - do not use collapse on Expanded data.");
   }
   if (m_op==IDENTITY)
@@ -846,129 +781,132 @@ DataLazy::collapseToReady() const
   switch(m_op)
   {
     case ADD:
-	result=left+right;
-	break;
-    case SUB:		
-	result=left-right;
-	break;
-    case MUL:		
-	result=left*right;
-	break;
-    case DIV:		
-	result=left/right;
-	break;
+        result=left+right;
+        break;
+    case SUB:           
+        result=left-right;
+        break;
+    case MUL:           
+        result=left*right;
+        break;
+    case DIV:           
+        result=left/right;
+        break;
     case SIN:
-	result=left.sin();	
-	break;
+        result=left.sin();      
+        break;
     case COS:
-	result=left.cos();
-	break;
+        result=left.cos();
+        break;
     case TAN:
-	result=left.tan();
-	break;
+        result=left.tan();
+        break;
     case ASIN:
-	result=left.asin();
-	break;
+        result=left.asin();
+        break;
     case ACOS:
-	result=left.acos();
-	break;
+        result=left.acos();
+        break;
     case ATAN:
-	result=left.atan();
-	break;
+        result=left.atan();
+        break;
     case SINH:
-	result=left.sinh();
-	break;
+        result=left.sinh();
+        break;
     case COSH:
-	result=left.cosh();
-	break;
+        result=left.cosh();
+        break;
     case TANH:
-	result=left.tanh();
-	break;
+        result=left.tanh();
+        break;
     case ERF:
-	result=left.erf();
-	break;
+        result=left.erf();
+        break;
    case ASINH:
-	result=left.asinh();
-	break;
+        result=left.asinh();
+        break;
    case ACOSH:
-	result=left.acosh();
-	break;
+        result=left.acosh();
+        break;
    case ATANH:
-	result=left.atanh();
-	break;
+        result=left.atanh();
+        break;
     case LOG10:
-	result=left.log10();
-	break;
+        result=left.log10();
+        break;
     case LOG:
-	result=left.log();
-	break;
+        result=left.log();
+        break;
     case SIGN:
-	result=left.sign();
-	break;
+        result=left.sign();
+        break;
     case ABS:
-	result=left.abs();
-	break;
+        result=left.abs();
+        break;
     case NEG:
-	result=left.neg();
-	break;
+        result=left.neg();
+        break;
     case POS:
-	// it doesn't mean anything for delayed.
-	// it will just trigger a deep copy of the lazy object
-	throw DataException("Programmer error - POS not supported for lazy data.");
-	break;
+        // it doesn't mean anything for delayed.
+        // it will just trigger a deep copy of the lazy object
+        throw DataException("Programmer error - POS not supported for lazy data.");
+        break;
     case EXP:
-	result=left.exp();
-	break;
+        result=left.exp();
+        break;
     case SQRT:
-	result=left.sqrt();
-	break;
+        result=left.sqrt();
+        break;
     case RECIP:
-	result=left.oneOver();
-	break;
+        result=left.oneOver();
+        break;
     case GZ:
-	result=left.wherePositive();
-	break;
+        result=left.wherePositive();
+        break;
     case LZ:
-	result=left.whereNegative();
-	break;
+        result=left.whereNegative();
+        break;
     case GEZ:
-	result=left.whereNonNegative();
-	break;
+        result=left.whereNonNegative();
+        break;
     case LEZ:
-	result=left.whereNonPositive();
-	break;
+        result=left.whereNonPositive();
+        break;
     case NEZ:
-	result=left.whereNonZero(m_tol);
-	break;
+        result=left.whereNonZero(m_tol);
+        break;
     case EZ:
-	result=left.whereZero(m_tol);
-	break;
+        result=left.whereZero(m_tol);
+        break;
     case SYM:
-	result=left.symmetric();
-	break;
+        result=left.symmetric();
+        break;
     case NSYM:
-	result=left.nonsymmetric();
-	break;
+        result=left.antisymmetric();
+        break;
     case PROD:
-	result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
-	break;
+        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
+        break;
     case TRANS:
-	result=left.transpose(m_axis_offset);
-	break;
+        result=left.transpose(m_axis_offset);
+        break;
     case TRACE:
-	result=left.trace(m_axis_offset);
-	break;
+        result=left.trace(m_axis_offset);
+        break;
     case SWAP:
-	result=left.swapaxes(m_axis_offset, m_transpose);
-	break;
+        result=left.swapaxes(m_axis_offset, m_transpose);
+        break;
     case MINVAL:
-	result=left.minval();
-	break;
+        result=left.minval();
+        break;
     case MAXVAL:
-	result=left.minval();
+        result=left.minval();
+        break;
+    case HER:
+	result=left.hermitian();
 	break;
     default:
-	throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
   }
   return result.borrowReadyPtr();
 }
@@ -984,52 +922,30 @@ DataLazy::collapse() const
 {
   if (m_op==IDENTITY)
   {
-	return;
+        return;
   }
   if (m_readytype=='E')
-  {	// this is more an efficiency concern than anything else
+  {     // this is more an efficiency concern than anything else
     throw DataException("Programmer Error - do not use collapse on Expanded data.");
   }
   m_id=collapseToReady();
   m_op=IDENTITY;
 }
 
-
-
-
-
-
-#define PROC_OP(TYPE,X)                               \
-	for (int j=0;j<onumsteps;++j)\
-	{\
-	  for (int i=0;i<numsteps;++i,resultp+=resultStep) \
-	  { \
-LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
-LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
-	     tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
-LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
-	     lroffset+=leftstep; \
-	     rroffset+=rightstep; \
-	  }\
-	  lroffset+=oleftstep;\
-	  rroffset+=orightstep;\
-	}
-
-
 // The result will be stored in m_samples
 // The return value is a pointer to the DataVector, offset is the offset within the return value
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
 {
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
-	// collapse so we have a 'E' node or an IDENTITY for some other type
+        // collapse so we have a 'E' node or an IDENTITY for some other type
   if (m_readytype!='E' && m_op!=IDENTITY)
   {
-	collapse();
+        collapse();
   }
-  if (m_op==IDENTITY)	
+  if (m_op==IDENTITY)   
   {
-    const ValueType& vec=m_id->getVectorRO();
+    const RealVectorType& vec=m_id->getVectorRO();
     roffset=m_id->getPointOffset(sampleNo, 0);
 #ifdef LAZY_STACK_PROF
 int x;
@@ -1046,8 +962,8 @@ if (&x<stackend[omp_get_thread_num()])
   }
   if (m_sampleids[tid]==sampleNo)
   {
-	roffset=tid*m_samplesize;
-	return &(m_samples);		// sample is already resolved
+        roffset=tid*m_samplesize;
+        return &(m_samples);            // sample is already resolved
   }
   m_sampleids[tid]=sampleNo;
 
@@ -1067,13 +983,13 @@ if (&x<stackend[omp_get_thread_num()])
   }
 }
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
 {
-	// we assume that any collapsing has been done before we get here
-	// since we only have one argument we don't need to think about only
-	// processing single points.
-	// we will also know we won't get identity nodes
+        // we assume that any collapsing has been done before we get here
+        // since we only have one argument we don't need to think about only
+        // processing single points.
+        // we will also know we won't get identity nodes
   if (m_readytype!='E')
   {
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
@@ -1082,130 +998,32 @@ DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
   {
     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
   }
-  const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
+  const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
   const double* left=&((*leftres)[roffset]);
   roffset=m_samplesize*tid;
   double* result=&(m_samples[roffset]);
-  switch (m_op)
+  if (m_op==POS)
   {
-    case SIN:	
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
-	break;
-    case COS:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
-	break;
-    case TAN:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
-	break;
-    case ASIN:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
-	break;
-    case ACOS:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
-	break;
-    case ATAN:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
-	break;
-    case SINH:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
-	break;
-    case COSH:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
-	break;
-    case TANH:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
-	break;
-    case ERF:
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-	throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
-#else
-	tensor_unary_operation(m_samplesize, left, result, ::erf);
-	break;
-#endif
-   case ASINH:
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-	tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
-#else
-	tensor_unary_operation(m_samplesize, left, result, ::asinh);
-#endif   
-	break;
-   case ACOSH:
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-	tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
-#else
-	tensor_unary_operation(m_samplesize, left, result, ::acosh);
-#endif   
-	break;
-   case ATANH:
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-	tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
-#else
-	tensor_unary_operation(m_samplesize, left, result, ::atanh);
-#endif   
-	break;
-    case LOG10:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
-	break;
-    case LOG:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
-	break;
-    case SIGN:
-	tensor_unary_operation(m_samplesize, left, result, escript::fsign);
-	break;
-    case ABS:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
-	break;
-    case NEG:
-	tensor_unary_operation(m_samplesize, left, result, negate<double>());
-	break;
-    case POS:
-	// it doesn't mean anything for delayed.
-	// it will just trigger a deep copy of the lazy object
-	throw DataException("Programmer error - POS not supported for lazy data.");
-	break;
-    case EXP:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
-	break;
-    case SQRT:
-	tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
-	break;
-    case RECIP:
-	tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
-	break;
-    case GZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
-	break;
-    case LZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
-	break;
-    case GEZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
-	break;
-    case LEZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
-	break;
-// There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
-    case NEZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
-	break;
-    case EZ:
-	tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
-	break;
-
-    default:
-	throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
-  }
+	// this should be prevented earlier
+	// operation is meaningless for lazy
+        throw DataException("Programmer error - POS not supported for lazy data.");    
+  }
+  tensor_unary_array_operation(m_samplesize,
+                             left,
+                             result,
+                             m_op,
+                             m_tol);  
   return &(m_samples);
 }
 
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
 {
-	// we assume that any collapsing has been done before we get here
-	// since we only have one argument we don't need to think about only
-	// processing single points.
-	// we will also know we won't get identity nodes
+        // we assume that any collapsing has been done before we get here
+        // since we only have one argument we don't need to think about only
+        // processing single points.
+        // we will also know we won't get identity nodes
   if (m_readytype!='E')
   {
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
@@ -1215,7 +1033,7 @@ DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
     throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
   }
   size_t loffset=0;
-  const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
+  const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
 
   roffset=m_samplesize*tid;
   unsigned int ndpps=getNumDPPSample();
@@ -1224,39 +1042,39 @@ DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   switch (m_op)
   {
     case MINVAL:
-	{
-	  for (unsigned int z=0;z<ndpps;++z)
-	  {
-	    FMin op;
-	    *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
-	    loffset+=psize;
-	    result++;
-	  }
-	}
-	break;
+        {
+          for (unsigned int z=0;z<ndpps;++z)
+          {
+            FMin op;
+            *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
+            loffset+=psize;
+            result++;
+          }
+        }
+        break;
     case MAXVAL:
-	{
-	  for (unsigned int z=0;z<ndpps;++z)
-	  {
-	  FMax op;
-	  *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
-	  loffset+=psize;
-	  result++;
-	  }
-	}
-	break;
+        {
+          for (unsigned int z=0;z<ndpps;++z)
+          {
+          FMax op;
+          *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
+          loffset+=psize;
+          result++;
+          }
+        }
+        break;
     default:
-	throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
   }
   return &(m_samples);
 }
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 {
-	// we assume that any collapsing has been done before we get here
-	// since we only have one argument we don't need to think about only
-	// processing single points.
+        // we assume that any collapsing has been done before we get here
+        // since we only have one argument we don't need to think about only
+        // processing single points.
   if (m_readytype!='E')
   {
     throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
@@ -1266,7 +1084,7 @@ DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
     throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
   }
   size_t subroffset;
-  const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
+  const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
   roffset=m_samplesize*tid;
   size_t loop=0;
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
@@ -1275,33 +1093,33 @@ DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
   switch (m_op)
   {
     case SYM:
-	for (loop=0;loop<numsteps;++loop)
-	{
-	    DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
-	    subroffset+=step;
-	    offset+=step;
-	}
-	break;
+        for (loop=0;loop<numsteps;++loop)
+        {
+            escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
+            subroffset+=step;
+            offset+=step;
+        }
+        break;
     case NSYM:
-	for (loop=0;loop<numsteps;++loop)
-	{
-	    DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
-	    subroffset+=step;
-	    offset+=step;
-	}
-	break;
+        for (loop=0;loop<numsteps;++loop)
+        {
+            escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
+            subroffset+=step;
+            offset+=step;
+        }
+        break;
     default:
-	throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
   }
   return &m_samples;
 }
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
 {
-	// we assume that any collapsing has been done before we get here
-	// since we only have one argument we don't need to think about only
-	// processing single points.
+        // we assume that any collapsing has been done before we get here
+        // since we only have one argument we don't need to think about only
+        // processing single points.
   if (m_readytype!='E')
   {
     throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
@@ -1312,7 +1130,7 @@ DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   }
   size_t subroffset;
   size_t offset;
-  const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
+  const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
   roffset=m_samplesize*tid;
   offset=roffset;
   size_t loop=0;
@@ -1322,29 +1140,29 @@ DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   switch (m_op)
   {
     case TRACE:
-	for (loop=0;loop<numsteps;++loop)
-	{
-            DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
-	    subroffset+=instep;
-	    offset+=outstep;
-	}
-	break;
+        for (loop=0;loop<numsteps;++loop)
+        {
+            escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
+            subroffset+=instep;
+            offset+=outstep;
+        }
+        break;
     case TRANS:
-	for (loop=0;loop<numsteps;++loop)
-	{
-            DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
-	    subroffset+=instep;
-	    offset+=outstep;
-	}
-	break;
+        for (loop=0;loop<numsteps;++loop)
+        {
+            escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
+            subroffset+=instep;
+            offset+=outstep;
+        }
+        break;
     default:
-	throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
   }
   return &m_samples;
 }
 
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
 {
   if (m_readytype!='E')
@@ -1357,7 +1175,7 @@ DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   }
   size_t subroffset;
   size_t offset;
-  const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
+  const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
   roffset=m_samplesize*tid;
   offset=roffset;
   size_t loop=0;
@@ -1367,20 +1185,20 @@ DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   switch (m_op)
   {
     case SWAP:
-	for (loop=0;loop<numsteps;++loop)
-	{
-            DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
-	    subroffset+=instep;
-	    offset+=outstep;
-	}
-	break;
+        for (loop=0;loop<numsteps;++loop)
+        {
+            escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
+            subroffset+=instep;
+            offset+=outstep;
+        }
+        break;
     default:
-	throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
   }
   return &m_samples;
 }
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
 {
   if (m_readytype!='E')
@@ -1393,15 +1211,15 @@ DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
   }
   size_t subroffset;
 
-  const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
-  const ValueType* srcres=0;
+  const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
+  const RealVectorType* srcres=0;
   if ((*maskres)[subroffset]>0)
   {
-	srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
+        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
   }
   else
   {
-	srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
+        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
   }
 
   // Now we need to copy the result
@@ -1409,7 +1227,7 @@ DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
   roffset=m_samplesize*tid;
   for (int i=0;i<m_samplesize;++i)
   {
-	m_samples[roffset+i]=(*srcres)[subroffset+i];	
+        m_samples[roffset+i]=(*srcres)[subroffset+i];   
   }
 
   return &m_samples;
@@ -1424,121 +1242,121 @@ DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
 // There is an additional complication when scalar operations are considered.
 // For example, 2+Vector.
 // In this case each double within the point is treated individually
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
 {
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
 
-  size_t lroffset=0, rroffset=0;	// offsets in the left and right result vectors
-	// first work out which of the children are expanded
+  size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
+        // first work out which of the children are expanded
   bool leftExp=(m_left->m_readytype=='E');
   bool rightExp=(m_right->m_readytype=='E');
   if (!leftExp && !rightExp)
   {
-	throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
+        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
   }
   bool leftScalar=(m_left->getRank()==0);
   bool rightScalar=(m_right->getRank()==0);
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
   {
-	throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar."); 
+        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar."); 
   }
   size_t leftsize=m_left->getNoValues();
   size_t rightsize=m_right->getNoValues();
-  size_t chunksize=1;			// how many doubles will be processed in one go
-  int leftstep=0;		// how far should the left offset advance after each step
+  size_t chunksize=1;                   // how many doubles will be processed in one go
+  int leftstep=0;               // how far should the left offset advance after each step
   int rightstep=0;
-  int numsteps=0;		// total number of steps for the inner loop
-  int oleftstep=0;	// the o variables refer to the outer loop
-  int orightstep=0;	// The outer loop is only required in cases where there is an extended scalar
+  int numsteps=0;               // total number of steps for the inner loop
+  int oleftstep=0;      // the o variables refer to the outer loop
+  int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
   int onumsteps=1;
   
-  bool LES=(leftExp && leftScalar);	// Left is an expanded scalar
+  bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
   bool RES=(rightExp && rightScalar);
-  bool LS=(!leftExp && leftScalar);	// left is a single scalar
+  bool LS=(!leftExp && leftScalar);     // left is a single scalar
   bool RS=(!rightExp && rightScalar);
-  bool LN=(!leftExp && !leftScalar);	// left is a single non-scalar
+  bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
   bool RN=(!rightExp && !rightScalar);
-  bool LEN=(leftExp && !leftScalar);	// left is an expanded non-scalar
+  bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
   bool REN=(rightExp && !rightScalar);
 
-  if ((LES && RES) || (LEN && REN))	// both are Expanded scalars or both are expanded non-scalars
+  if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
   {
-	chunksize=m_left->getNumDPPSample()*leftsize;
-	leftstep=0;
-	rightstep=0;
-	numsteps=1;
+        chunksize=m_left->getNumDPPSample()*leftsize;
+        leftstep=0;
+        rightstep=0;
+        numsteps=1;
   }
   else if (LES || RES)
   {
-	chunksize=1;
-	if (LES)		// left is an expanded scalar
-	{
-		if (RS)
-		{
-		   leftstep=1;
-		   rightstep=0;
-		   numsteps=m_left->getNumDPPSample();
-		}
-		else		// RN or REN
-		{
-		   leftstep=0;
-		   oleftstep=1;
-		   rightstep=1;
-		   orightstep=(RN ? -(int)rightsize : 0);
-		   numsteps=rightsize;
-		   onumsteps=m_left->getNumDPPSample();
-		}
-	}
-	else		// right is an expanded scalar
-	{
-		if (LS)
-		{
-		   rightstep=1;
-		   leftstep=0;
-		   numsteps=m_right->getNumDPPSample();
-		}
-		else
-		{
-		   rightstep=0;
-		   orightstep=1;
-		   leftstep=1;
-		   oleftstep=(LN ? -(int)leftsize : 0);
-		   numsteps=leftsize;
-		   onumsteps=m_right->getNumDPPSample();
-		}
-	}
-  }
-  else 	// this leaves (LEN, RS), (LEN, RN) and their transposes
+        chunksize=1;
+        if (LES)                // left is an expanded scalar
+        {
+                if (RS)
+                {
+                   leftstep=1;
+                   rightstep=0;
+                   numsteps=m_left->getNumDPPSample();
+                }
+                else            // RN or REN
+                {
+                   leftstep=0;
+                   oleftstep=1;
+                   rightstep=1;
+                   orightstep=(RN ? -(int)rightsize : 0);
+                   numsteps=rightsize;
+                   onumsteps=m_left->getNumDPPSample();
+                }
+        }
+        else            // right is an expanded scalar
+        {
+                if (LS)
+                {
+                   rightstep=1;
+                   leftstep=0;
+                   numsteps=m_right->getNumDPPSample();
+                }
+                else
+                {
+                   rightstep=0;
+                   orightstep=1;
+                   leftstep=1;
+                   oleftstep=(LN ? -(int)leftsize : 0);
+                   numsteps=leftsize;
+                   onumsteps=m_right->getNumDPPSample();
+                }
+        }
+  }
+  else  // this leaves (LEN, RS), (LEN, RN) and their transposes
   {
-	if (LEN)	// and Right will be a single value 
-	{
-		chunksize=rightsize;
-		leftstep=rightsize;
-	   	rightstep=0;
-		numsteps=m_left->getNumDPPSample();
-		if (RS)
-		{
-		   numsteps*=leftsize;
-		}
-	}
-	else	// REN
-	{
-		chunksize=leftsize;
-		rightstep=leftsize;
-		leftstep=0;
-		numsteps=m_right->getNumDPPSample();
-		if (LS)
-		{
-		   numsteps*=rightsize;
-		}
-	}
-  }
-
-  int resultStep=max(leftstep,rightstep);	// only one (at most) should be !=0
-	// Get the values of sub-expressions
-  const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);	
-  const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
+        if (LEN)        // and Right will be a single value 
+        {
+                chunksize=rightsize;
+                leftstep=rightsize;
+                rightstep=0;
+                numsteps=m_left->getNumDPPSample();
+                if (RS)
+                {
+                   numsteps*=leftsize;
+                }
+        }
+        else    // REN
+        {
+                chunksize=leftsize;
+                rightstep=leftsize;
+                leftstep=0;
+                numsteps=m_right->getNumDPPSample();
+                if (LS)
+                {
+                   numsteps*=rightsize;
+                }
+        }
+  }
+
+  int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
+        // Get the values of sub-expressions
+  const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);       
+  const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
@@ -1552,26 +1370,96 @@ LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
 
 
   roffset=m_samplesize*tid;
-  double* resultp=&(m_samples[roffset]);		// results are stored at the vector offset we received
+  double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
   switch(m_op)
   {
     case ADD:
-        PROC_OP(NO_ARG,plus<double>());
-	break;
+        //PROC_OP(NO_ARG,plus<double>());
+      escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp, 
+			 &(*left)[0],
+			 &(*right)[0],
+			 chunksize,
+			 onumsteps,
+			 numsteps,
+			 resultStep,
+			 leftstep,
+			 rightstep,
+			 oleftstep,
+			 orightstep,
+			 lroffset,
+			 rroffset,
+			 escript::ES_optype::ADD);	
+        break;
     case SUB:
-	PROC_OP(NO_ARG,minus<double>());
-	break;
+      escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp, 
+			 &(*left)[0],
+			 &(*right)[0],
+			 chunksize,
+			 onumsteps,
+			 numsteps,
+			 resultStep,
+			 leftstep,
+			 rightstep,
+			 oleftstep,
+			 orightstep,
+			 lroffset,
+			 rroffset,
+			 escript::ES_optype::SUB);	      
+        //PROC_OP(NO_ARG,minus<double>());
+        break;
     case MUL:
-	PROC_OP(NO_ARG,multiplies<double>());
-	break;
+        //PROC_OP(NO_ARG,multiplies<double>());
+      escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp, 
+			 &(*left)[0],
+			 &(*right)[0],
+			 chunksize,
+			 onumsteps,
+			 numsteps,
+			 resultStep,
+			 leftstep,
+			 rightstep,
+			 oleftstep,
+			 orightstep,
+			 lroffset,
+			 rroffset,
+			 escript::ES_optype::MUL);	      
+        break;
     case DIV:
-	PROC_OP(NO_ARG,divides<double>());
-	break;
+        //PROC_OP(NO_ARG,divides<double>());
+      escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp, 
+			 &(*left)[0],
+			 &(*right)[0],
+			 chunksize,
+			 onumsteps,
+			 numsteps,
+			 resultStep,
+			 leftstep,
+			 rightstep,
+			 oleftstep,
+			 orightstep,
+			 lroffset,
+			 rroffset,
+			 escript::ES_optype::DIV);	      
+        break;
     case POW:
-       PROC_OP(double (double,double),::pow);
-	break;
+       //PROC_OP(double (double,double),::pow);
+      escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp, 
+			 &(*left)[0],
+			 &(*right)[0],
+			 chunksize,
+			 onumsteps,
+			 numsteps,
+			 resultStep,
+			 leftstep,
+			 rightstep,
+			 oleftstep,
+			 orightstep,
+			 lroffset,
+			 rroffset,
+			 escript::ES_optype::POW);	      
+        break;
     default:
-	throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
   }
 LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
   return &m_samples;
@@ -1581,26 +1469,26 @@ LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.
 // unlike the other resolve helpers, we must treat these datapoints separately.
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
 {
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
 
-  size_t lroffset=0, rroffset=0;	// offsets in the left and right result vectors
-	// first work out which of the children are expanded
+  size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
+        // first work out which of the children are expanded
   bool leftExp=(m_left->m_readytype=='E');
   bool rightExp=(m_right->m_readytype=='E');
   int steps=getNumDPPSample();
-  int leftStep=(leftExp? m_left->getNoValues() : 0);		// do not have scalars as input to this method
+  int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
   int rightStep=(rightExp?m_right->getNoValues() : 0);
 
   int resultStep=getNoValues();
   roffset=m_samplesize*tid;
   size_t offset=roffset;
 
-  const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
+  const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
 
-  const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
+  const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
 
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
 cout << getNoValues() << endl;)
@@ -1614,55 +1502,55 @@ LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
 
-  double* resultp=&(m_samples[offset]);		// results are stored at the vector offset we received
+  double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
   switch(m_op)
   {
     case PROD:
-	for (int i=0;i<steps;++i,resultp+=resultStep)
-	{
-    	  const double *ptr_0 = &((*left)[lroffset]);
-    	  const double *ptr_1 = &((*right)[rroffset]);
+        for (int i=0;i<steps;++i,resultp+=resultStep)
+        {
+          const double *ptr_0 = &((*left)[lroffset]);
+          const double *ptr_1 = &((*right)[rroffset]);
 
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
 
-    	  matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
+          matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
 
-	  lroffset+=leftStep;
-	  rroffset+=rightStep;
-	}
-	break;
+          lroffset+=leftStep;
+          rroffset+=rightStep;
+        }
+        break;
     default:
-	throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
   }
   roffset=offset;
   return &m_samples;
 }
 
 
-const DataTypes::ValueType*
+const DataTypes::RealVectorType*
 DataLazy::resolveSample(int sampleNo, size_t& roffset) const
 {
 #ifdef _OPENMP
-	int tid=omp_get_thread_num();
+        int tid=omp_get_thread_num();
 #else
-	int tid=0;
+        int tid=0;
 #endif 
 
 #ifdef LAZY_STACK_PROF
-	stackstart[tid]=&tid;
-	stackend[tid]=&tid;
-	const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
-	size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
-	#pragma omp critical
-	if (d>maxstackuse)
-	{
+        stackstart[tid]=&tid;
+        stackend[tid]=&tid;
+        const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
+        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
+        #pragma omp critical
+        if (d>maxstackuse)
+        {
 cout << "Max resolve Stack use " << d << endl;
-		maxstackuse=d;
-	}
-	return r;
+                maxstackuse=d;
+        }
+        return r;
 #else
-	return resolveNodeSample(tid, sampleNo, roffset);
+        return resolveNodeSample(tid, sampleNo, roffset);
 #endif
 }
 
@@ -1672,7 +1560,7 @@ void
 DataLazy::resolveToIdentity()
 {
    if (m_op==IDENTITY)
-	return;
+        return;
    DataReady_ptr p=resolveNodeWorker();
    makeIdentity(p);
 }
@@ -1709,75 +1597,75 @@ DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
 {
   if (dats.empty())
   {
-	return;
+        return;
   }
   vector<DataLazy*> work;
   FunctionSpace fs=dats[0]->getFunctionSpace();
   bool match=true;
   for (int i=dats.size()-1;i>=0;--i)
   {
-	if (dats[i]->m_readytype!='E')
-	{
-		dats[i]->collapse();
-	}
-	if (dats[i]->m_op!=IDENTITY)
-	{
-		work.push_back(dats[i]);
-		if (fs!=dats[i]->getFunctionSpace())
-		{
-			match=false;
-		}
-	}
+        if (dats[i]->m_readytype!='E')
+        {
+                dats[i]->collapse();
+        }
+        if (dats[i]->m_op!=IDENTITY)
+        {
+                work.push_back(dats[i]);
+                if (fs!=dats[i]->getFunctionSpace())
+                {
+                        match=false;
+                }
+        }
   }
   if (work.empty())
   {
-	return;		// no work to do
-  }
-  if (match)	// all functionspaces match.  Yes I realise this is overly strict
-  {		// it is possible that dats[0] is one of the objects which we discarded and
-		// all the other functionspaces match.
-	vector<DataExpanded*> dep;
-	vector<ValueType*> vecs;
-	for (int i=0;i<work.size();++i)
-	{
-		dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
-		vecs.push_back(&(dep[i]->getVectorRW()));
-	}
-	int totalsamples=work[0]->getNumSamples();
-	const ValueType* res=0;	// Storage for answer
-	int sample;
-	#pragma omp parallel private(sample, res)
-	{
-	    size_t roffset=0;
-	    #pragma omp for schedule(static)
-	    for (sample=0;sample<totalsamples;++sample)
-	    {
-		roffset=0;
-		int j;
-		for (j=work.size()-1;j>=0;--j)
-		{
+        return;         // no work to do
+  }
+  if (match)    // all functionspaces match.  Yes I realise this is overly strict
+  {             // it is possible that dats[0] is one of the objects which we discarded and
+                // all the other functionspaces match.
+        vector<DataExpanded*> dep;
+        vector<RealVectorType*> vecs;
+        for (int i=0;i<work.size();++i)
+        {
+                dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
+                vecs.push_back(&(dep[i]->getVectorRW()));
+        }
+        int totalsamples=work[0]->getNumSamples();
+        const RealVectorType* res=0; // Storage for answer
+        int sample;
+        #pragma omp parallel private(sample, res)
+        {
+            size_t roffset=0;
+            #pragma omp for schedule(static)
+            for (sample=0;sample<totalsamples;++sample)
+            {
+                roffset=0;
+                int j;
+                for (j=work.size()-1;j>=0;--j)
+                {
 #ifdef _OPENMP
-    		    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
+                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
 #else
-    		    res=work[j]->resolveNodeSample(0,sample,roffset);
+                    res=work[j]->resolveNodeSample(0,sample,roffset);
 #endif
-    		    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
-    		    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
-		}
-	    }
-	}
-	// Now we need to load the new results as identity ops into the lazy nodes
-	for (int i=work.size()-1;i>=0;--i)
-	{
-	    work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
-	}
-  }
-  else	// functionspaces do not match
+                    RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
+                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
+                }
+            }
+        }
+        // Now we need to load the new results as identity ops into the lazy nodes
+        for (int i=work.size()-1;i>=0;--i)
+        {
+            work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
+        }
+  }
+  else  // functionspaces do not match
   {
-	for (int i=0;i<work.size();++i)
-	{
-		work[i]->resolveToIdentity();
-	}
+        for (int i=0;i<work.size();++i)
+        {
+                work[i]->resolveToIdentity();
+        }
   }
 }
 
@@ -1787,54 +1675,54 @@ DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
 DataReady_ptr
 DataLazy::resolveNodeWorker()
 {
-  if (m_readytype!='E')		// if the whole sub-expression is Constant or Tagged, then evaluate it normally
+  if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
   {
     collapse();
   }
-  if (m_op==IDENTITY)		// So a lazy expression of Constant or Tagged data will be returned here. 
+  if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here. 
   {
     return m_id;
   }
-  	// from this point on we must have m_op!=IDENTITY and m_readytype=='E'
-  DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
-  ValueType& resvec=result->getVectorRW();
+        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
+  DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
+  RealVectorType& resvec=result->getVectorRW();
   DataReady_ptr resptr=DataReady_ptr(result);
 
   int sample;
   int totalsamples=getNumSamples();
-  const ValueType* res=0;	// Storage for answer
+  const RealVectorType* res=0;       // Storage for answer
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
   #pragma omp parallel private(sample,res)
   {
-	size_t roffset=0;
+        size_t roffset=0;
 #ifdef LAZY_STACK_PROF
-	stackstart[omp_get_thread_num()]=&roffset;
-	stackend[omp_get_thread_num()]=&roffset;
+        stackstart[omp_get_thread_num()]=&roffset;
+        stackend[omp_get_thread_num()]=&roffset;
 #endif
-	#pragma omp for schedule(static)
-  	for (sample=0;sample<totalsamples;++sample)
-  	{
-		roffset=0;
+        #pragma omp for schedule(static)
+        for (sample=0;sample<totalsamples;++sample)
+        {
+                roffset=0;
 #ifdef _OPENMP
-    		res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
+                res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
 #else
-    		res=resolveNodeSample(0,sample,roffset);
+                res=resolveNodeSample(0,sample,roffset);
 #endif
 LAZYDEBUG(cout << "Sample #" << sample << endl;)
 LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
-    		DataVector::size_type outoffset=result->getPointOffset(sample,0);
-    		memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
-  	}
+                RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
+                memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
+        }
   }
 #ifdef LAZY_STACK_PROF
   for (int i=0;i<getNumberOfThreads();++i)
   {
-	size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
-//	cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
-	if (r>maxstackuse)
-	{
-		maxstackuse=r;
-	}
+        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
+//      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
+        if (r>maxstackuse)
+        {
+                maxstackuse=r;
+        }
   }
   cout << "Max resolve Stack use=" << maxstackuse << endl;
 #endif
@@ -1846,17 +1734,17 @@ DataLazy::toString() const
 {
   ostringstream oss;
   oss << "Lazy Data: [depth=" << m_height<< "] ";
-  switch (escriptParams.getLAZY_STR_FMT())
+  switch (escriptParams.getLazyStrFmt())
   {
-  case 1:	// tree format
-	oss << endl;
-	intoTreeString(oss,"");	
-	break;
-  case 2:	// just the depth
-	break;
+  case 1:       // tree format
+        oss << endl;
+        intoTreeString(oss,""); 
+        break;
+  case 2:       // just the depth
+        break;
   default:
-	intoString(oss);
-	break;
+        intoString(oss);
+        break;
   }
   return oss.str();
 }
@@ -1869,64 +1757,64 @@ DataLazy::intoString(ostringstream& oss) const
   switch (getOpgroup(m_op))
   {
   case G_IDENTITY:
-	if (m_id->isExpanded())
-	{
-	   oss << "E";
-	}
-	else if (m_id->isTagged())
-	{
-	  oss << "T";
-	}
-	else if (m_id->isConstant())
-	{
-	  oss << "C";
-	}
-	else
-	{
-	  oss << "?";
-	}
-	oss << '@' << m_id.get();
-	break;
+        if (m_id->isExpanded())
+        {
+           oss << "E";
+        }
+        else if (m_id->isTagged())
+        {
+          oss << "T";
+        }
+        else if (m_id->isConstant())
+        {
+          oss << "C";
+        }
+        else
+        {
+          oss << "?";
+        }
+        oss << '@' << m_id.get();
+        break;
   case G_BINARY:
-	oss << '(';
-	m_left->intoString(oss);
-	oss << ' ' << opToString(m_op) << ' ';
-	m_right->intoString(oss);
-	oss << ')';
-	break;
+        oss << '(';
+        m_left->intoString(oss);
+        oss << ' ' << opToString(m_op) << ' ';
+        m_right->intoString(oss);
+        oss << ')';
+        break;
   case G_UNARY:
   case G_UNARY_P:
   case G_NP1OUT:
   case G_NP1OUT_P:
   case G_REDUCTION:
-	oss << opToString(m_op) << '(';
-	m_left->intoString(oss);
-	oss << ')';
-	break;
+        oss << opToString(m_op) << '(';
+        m_left->intoString(oss);
+        oss << ')';
+        break;
   case G_TENSORPROD:
-	oss << opToString(m_op) << '(';
-	m_left->intoString(oss);
-	oss << ", ";
-	m_right->intoString(oss);
-	oss << ')'; 
-	break;
+        oss << opToString(m_op) << '(';
+        m_left->intoString(oss);
+        oss << ", ";
+        m_right->intoString(oss);
+        oss << ')'; 
+        break;
   case G_NP1OUT_2P:
-	oss << opToString(m_op) << '(';
-	m_left->intoString(oss);
-	oss << ", " << m_axis_offset << ", " << m_transpose;
-	oss << ')';
-	break;
+        oss << opToString(m_op) << '(';
+        m_left->intoString(oss);
+        oss << ", " << m_axis_offset << ", " << m_transpose;
+        oss << ')';
+        break;
   case G_CONDEVAL:
-	oss << opToString(m_op)<< '(' ;
-	m_mask->intoString(oss);
-	oss << " ? ";
-	m_left->intoString(oss);
-	oss << " : ";
-	m_right->intoString(oss); 
-	oss << ')';
-	break;
+        oss << opToString(m_op)<< '(' ;
+        m_mask->intoString(oss);
+        oss << " ? ";
+        m_left->intoString(oss);
+        oss << " : ";
+        m_right->intoString(oss); 
+        oss << ')';
+        break;
   default:
-	oss << "UNKNOWN";
+        oss << "UNKNOWN";
   }
 }
 
@@ -1938,72 +1826,72 @@ DataLazy::intoTreeString(ostringstream& oss, string indent) const
   switch (getOpgroup(m_op))
   {
   case G_IDENTITY:
-	if (m_id->isExpanded())
-	{
-	   oss << "E";
-	}
-	else if (m_id->isTagged())
-	{
-	  oss << "T";
-	}
-	else if (m_id->isConstant())
-	{
-	  oss << "C";
-	}
-	else
-	{
-	  oss << "?";
-	}
-	oss << '@' << m_id.get() << endl;
-	break;
+        if (m_id->isExpanded())
+        {
+           oss << "E";
+        }
+        else if (m_id->isTagged())
+        {
+          oss << "T";
+        }
+        else if (m_id->isConstant())
+        {
+          oss << "C";
+        }
+        else
+        {
+          oss << "?";
+        }
+        oss << '@' << m_id.get() << endl;
+        break;
   case G_BINARY:
-	oss << opToString(m_op) << endl;
-	indent+='.';
-	m_left->intoTreeString(oss, indent);
-	m_right->intoTreeString(oss, indent);
-	break;
+        oss << opToString(m_op) << endl;
+        indent+='.';
+        m_left->intoTreeString(oss, indent);
+        m_right->intoTreeString(oss, indent);
+        break;
   case G_UNARY:
   case G_UNARY_P:
   case G_NP1OUT:
   case G_NP1OUT_P:
   case G_REDUCTION:
-	oss << opToString(m_op) << endl;
-	indent+='.';
-	m_left->intoTreeString(oss, indent);
-	break;
+        oss << opToString(m_op) << endl;
+        indent+='.';
+        m_left->intoTreeString(oss, indent);
+        break;
   case G_TENSORPROD:
-	oss << opToString(m_op) << endl;
-	indent+='.';
-	m_left->intoTreeString(oss, indent);
-	m_right->intoTreeString(oss, indent);
-	break;
+        oss << opToString(m_op) << endl;
+        indent+='.';
+        m_left->intoTreeString(oss, indent);
+        m_right->intoTreeString(oss, indent);
+        break;
   case G_NP1OUT_2P:
-	oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
-	indent+='.';
-	m_left->intoTreeString(oss, indent);
-	break;
+        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
+        indent+='.';
+        m_left->intoTreeString(oss, indent);
+        break;
   default:
-	oss << "UNKNOWN";
+        oss << "UNKNOWN";
   }
 }
 
 
 DataAbstract* 
-DataLazy::deepCopy()
+DataLazy::deepCopy() const
 {
   switch (getOpgroup(m_op))
   {
   case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
-  case G_UNARY:	
+  case G_UNARY: 
   case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
-  case G_UNARY_P:	return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
-  case G_BINARY:	return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
+  case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
+  case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
   case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
   case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
   case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
   case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
   default:
-	throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
+        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
   }
 }
 
@@ -2015,7 +1903,7 @@ DataLazy::deepCopy()
 // or it could be some function of the lengths of the DataReady instances which 
 // form part of the expression.
 // Rather than have people making assumptions, I have disabled the method.
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataLazy::getLength() const
 {
   throw DataException("getLength() does not make sense for lazy data.");
@@ -2030,39 +1918,39 @@ DataLazy::getSlice(const DataTypes::RegionType& region) const
 
 
 // To do this we need to rely on our child nodes
-DataTypes::ValueType::size_type 
+DataTypes::RealVectorType::size_type 
 DataLazy::getPointOffset(int sampleNo,
                  int dataPointNo)
 {
   if (m_op==IDENTITY)
   {
-	return m_id->getPointOffset(sampleNo,dataPointNo);
+        return m_id->getPointOffset(sampleNo,dataPointNo);
   }
   if (m_readytype!='E')
   {
-	collapse();
-	return m_id->getPointOffset(sampleNo,dataPointNo);
+        collapse();
+        return m_id->getPointOffset(sampleNo,dataPointNo);
   }
   // at this point we do not have an identity node and the expression will be Expanded
   // so we only need to know which child to ask
   if (m_left->m_readytype=='E')
   {
-	return m_left->getPointOffset(sampleNo,dataPointNo);
+        return m_left->getPointOffset(sampleNo,dataPointNo);
   }
   else
   {
-	return m_right->getPointOffset(sampleNo,dataPointNo);
+        return m_right->getPointOffset(sampleNo,dataPointNo);
   }
 }
 
 // To do this we need to rely on our child nodes
-DataTypes::ValueType::size_type 
+DataTypes::RealVectorType::size_type 
 DataLazy::getPointOffset(int sampleNo,
                  int dataPointNo) const
 {
   if (m_op==IDENTITY)
   {
-	return m_id->getPointOffset(sampleNo,dataPointNo);
+        return m_id->getPointOffset(sampleNo,dataPointNo);
   }
   if (m_readytype=='E')
   {
@@ -2070,16 +1958,16 @@ DataLazy::getPointOffset(int sampleNo,
     // so we only need to know which child to ask
     if (m_left->m_readytype=='E')
     {
-	return m_left->getPointOffset(sampleNo,dataPointNo);
+        return m_left->getPointOffset(sampleNo,dataPointNo);
     }
     else
     {
-	return m_right->getPointOffset(sampleNo,dataPointNo);
+        return m_right->getPointOffset(sampleNo,dataPointNo);
     }
   }
   if (m_readytype=='C')
   {
-	return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
+        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
   }
   throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
 }
@@ -2089,7 +1977,7 @@ DataLazy::getPointOffset(int sampleNo,
 void
 DataLazy::setToZero()
 {
-//   DataTypes::ValueType v(getNoValues(),0);
+//   DataTypes::RealVectorType v(getNoValues(),0);
 //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
 //   m_op=IDENTITY;
 //   m_right.reset();   
@@ -2104,7 +1992,8 @@ DataLazy::setToZero()
 bool
 DataLazy::actsExpanded() const
 {
-	return (m_readytype=='E');
+        return (m_readytype=='E');
 }
 
-}	// end namespace
+} // end namespace
+
diff --git a/escriptcore/src/DataLazy.h b/escriptcore/src/DataLazy.h
index e9270e6..0a06594 100644
--- a/escriptcore/src/DataLazy.h
+++ b/escriptcore/src/DataLazy.h
@@ -14,76 +14,21 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_DATALAZY_H__
+#define __ESCRIPT_DATALAZY_H__
 
-#if !defined escript_DataLazy_20081008_H
-#define escript_DataLazy_20081008_H
 #include "system_dep.h"
-
 #include "DataAbstract.h"
-
-#include <string>
-#include <functional>
-
-#include "LocalOps.h"		// for tensor_binary_op
+#include "ArrayOps.h"		// for tensor_binary_op
 #include "DataVector.h"		// for ElementType
+#include "ES_optype.h"
 
+#include <string>
 
 //#define LAZY_NODE_STORAGE
 
 namespace escript {
 
-// For the purposes of unit testing and maintaining sanity, it is important that this enum be contiguous
-enum ES_optype
-{
-	UNKNOWNOP=0,
-	IDENTITY=1,
-	ADD=2,
-	SUB=3,
-	MUL=4,
-	DIV=5,
-	POW=6,
-	SIN=POW+1,
-	COS=SIN+1,
-	TAN=SIN+2,
-	ASIN=SIN+3,
-	ACOS=SIN+4,
-	ATAN=SIN+5,
-	SINH=SIN+6,
-	COSH=SIN+7,
-	TANH=SIN+8,
-	ERF=SIN+9,
-	ASINH=SIN+10,
-	ACOSH=SIN+11,
-	ATANH=SIN+12,
-	LOG10=ATANH+1,
-	LOG=LOG10+1,
-	SIGN=LOG10+2,
-	ABS=LOG10+3,
-	NEG=LOG10+4,
-	POS=LOG10+5,
-	EXP=LOG10+6,
-	SQRT=LOG10+7,
-	RECIP=LOG10+8,
-	GZ=RECIP+1,
-	LZ=GZ+1,
-	GEZ=GZ+2,
-	LEZ=GZ+3,
-	NEZ=GZ+4,
-	EZ=GZ+5,
-	SYM=EZ+1,
-	NSYM=SYM+1,
-	PROD=NSYM+1,
-	TRANS=PROD+1,
-	TRACE=TRANS+1,
-	SWAP=TRACE+1,
-	MINVAL=SWAP+1,
-	MAXVAL=MINVAL+1,
-	CONDEVAL=MAXVAL+1
-};
-
-ESCRIPT_DLL_API
-const std::string&
-opToString(ES_optype op);
 
 /**
 \class escript::DataLazy
@@ -103,7 +48,6 @@ class DataLazy : public DataAbstract
 {
 
 typedef DataAbstract parent;
-typedef DataTypes::ValueType ValueType;
 typedef DataTypes::ShapeType ShapeType;
 
 public:
@@ -209,7 +153,7 @@ public:
 
   ESCRIPT_DLL_API
   DataAbstract* 
-  deepCopy();
+  deepCopy() const;
 
 
   /**
@@ -217,7 +161,7 @@ public:
      This method throws an exception. It does not really make sense to ask this question of lazy data.
   */
   ESCRIPT_DLL_API
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
 
@@ -226,11 +170,11 @@ public:
   getSlice(const DataTypes::RegionType& region) const;
 
 
-  DataTypes::ValueType::size_type 
+  DataTypes::RealVectorType::size_type 
   getPointOffset(int sampleNo,
                  int dataPointNo) const;
 
-  DataTypes::ValueType::size_type 
+  DataTypes::RealVectorType::size_type 
   getPointOffset(int sampleNo,
                  int dataPointNo);
 
@@ -250,7 +194,7 @@ public:
   The return value will be an existing vector so do not deallocate it.
   */
   ESCRIPT_DLL_API
-  const ValueType*
+  const DataTypes::RealVectorType*
   resolveSample(int sampleNo, size_t& roffset) const; 
 
   /**
@@ -294,7 +238,7 @@ private:
   size_t m_height;
 
   int* m_sampleids;		// may be NULL
-  mutable DataVector m_samples;  
+  mutable DataTypes::RealVectorType m_samples;  
 
   /**
   Allocates sample storage at each node
@@ -302,32 +246,32 @@ private:
   void LazyNodeSetup();
 
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const;
 
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const;  
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeSample(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const;
 
-  const DataTypes::ValueType*
+  const DataTypes::RealVectorType*
   resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const;
 
   /**
@@ -383,4 +327,6 @@ private:
 };
 
 }
-#endif
+
+#endif // __ESCRIPT_DATALAZY_H__
+
diff --git a/escriptcore/src/DataMaths.cpp b/escriptcore/src/DataMaths.cpp
deleted file mode 100644
index ea084a6..0000000
--- a/escriptcore/src/DataMaths.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "DataTypes.h"
-#include "DataMaths.h"
-#include <sstream>
-
-namespace
-{
-const int SUCCESS=0;
-const int BADRANK=1;
-const int NOTSQUARE=2;
-const int SHAPEMISMATCH=3;
-const int NOINVERSE=4;
-const int NEEDLAPACK=5;
-const int ERRFACTORISE=6;
-const int ERRINVERT=7;
-}
-
-namespace escript
-{
-namespace DataMaths
-{
-
-  void
-  matMult(const DataTypes::ValueType& left, 
-	  const DataTypes::ShapeType& leftShape,
-	  DataTypes::ValueType::size_type leftOffset,
-          const DataTypes::ValueType& right,
-   	  const DataTypes::ShapeType& rightShape,
-	  DataTypes::ValueType::size_type rightOffset,
-          DataTypes::ValueType& result,
-	  const DataTypes::ShapeType& resultShape)
-   {
-      using namespace escript::DataTypes;
-      using namespace std; 
-
-      int leftRank=getRank(leftShape);
-      int rightRank=getRank(rightShape);
-      int resultRank=getRank(resultShape);
-      if (leftRank==0 || rightRank==0) {
-         stringstream temp;
-         temp << "Error - (matMult) Invalid for rank 0 objects.";
-         throw DataException(temp.str());
-      }
-
-      if (leftShape[leftRank-1] != rightShape[0]) {
-         stringstream temp;
-         temp << "Error - (matMult) Dimension: " << leftRank 
-              << ", size: " << leftShape[leftRank-1] 
-              << " of LHS and dimension: 1, size: " << rightShape[0]
-              << " of RHS don't match.";
-         throw DataException(temp.str());
-      }
-
-      int outputRank = leftRank+rightRank-2;
-
-      if (outputRank < 0) {
-         stringstream temp;
-         temp << "Error - (matMult) LHS and RHS cannot be multiplied "
-              << "as they have incompatible rank.";
-         throw DataException(temp.str());
-      }
-
-      if (outputRank != resultRank) {
-         stringstream temp;
-         temp << "Error - (matMult) Rank of result array is: " 
-              << resultRank 
-              << " it must be: " << outputRank;
-         throw DataException(temp.str());
-      }
-
-      for (int i=0; i<(leftRank-1); i++) {
-         if (leftShape[i] != resultShape[i]) {
-            stringstream temp;
-            temp << "Error - (matMult) Dimension: " << i 
-                 << " of LHS and result array don't match.";
-            throw DataException(temp.str());
-         }
-      }
-
-      for (int i=1; i<rightRank; i++) {
-         if (rightShape[i] != resultShape[i+leftRank-2]) {
-            stringstream temp;
-            temp << "Error - (matMult) Dimension: " << i
-                 << ", size: " << rightShape[i]
-                 << " of RHS and dimension: " << i+leftRank-1 
-                 << ", size: " << resultShape[i+leftRank-1]
-                 << " of result array don't match.";
-            throw DataException(temp.str());
-         }
-      }
-
-      switch (leftRank) {
-
-      case 1:
-         switch (rightRank) {
-         case 1:
-            result[0]=0;
-            for (int i=0;i<leftShape[0];i++) {
-               result[0]+=left[i+leftOffset]*right[i+rightOffset];
-            }
-            break;
-         case 2:
-            for (int i=0;i<resultShape[0];i++) {
-               result[i]=0;
-               for (int j=0;j<rightShape[0];j++) {
-                  result[i]+=left[j+leftOffset]*right[getRelIndex(rightShape,j,i)+rightOffset];
-               }
-            }
-            break;
-         default:
-            stringstream temp; temp << "Error - (matMult) Invalid rank. Programming error.";
-            throw DataException(temp.str());
-            break;
-         }
-         break;
-
-      case 2:
-         switch (rightRank) {
-         case 1:
-            result[0]=0;
-            for (int i=0;i<leftShape[0];i++) {
-               result[i]=0;
-               for (int j=0;j<leftShape[1];j++) {
-                  result[i]+=left[leftOffset+getRelIndex(leftShape,i,j)]*right[i+rightOffset];
-               }
-            }
-	    break;
-         case 2:
-            for (int i=0;i<resultShape[0];i++) {
-               for (int j=0;j<resultShape[1];j++) {
-                  result[getRelIndex(resultShape,i,j)]=0;
-                  for (int jR=0;jR<rightShape[0];jR++) {
-                     result[getRelIndex(resultShape,i,j)]+=left[leftOffset+getRelIndex(leftShape,i,jR)]*right[rightOffset+getRelIndex(rightShape,jR,j)];
-                  }
-               }
-            }
-            break;
-         default:
-            stringstream temp; temp << "Error - (matMult) Invalid rank. Programming error.";
-            throw DataException(temp.str());
-            break;
-         }
-         break;
-
-      default:
-         stringstream temp; temp << "Error - (matMult) Not supported for rank: " << leftRank;
-         throw DataException(temp.str());
-         break;
-      }
-
-   }
-
-
-   DataTypes::ShapeType
-   determineResultShape(const DataTypes::ShapeType& left,
-                       const DataTypes::ShapeType& right)
-   {
-      DataTypes::ShapeType result;
-      for (int i=0; i<(DataTypes::getRank(left)-1); i++) {
-         result.push_back(left[i]);
-      }
-      for (int i=1; i<DataTypes::getRank(right); i++) {
-         result.push_back(right[i]);
-      }
-      return result;
-   }
-
-
-
-
-void matrixInverseError(int err)
-{
-    switch (err)
-    {
-    case 0: break;	// not an error
-    case BADRANK: throw DataException("matrix_inverse: input and output must be rank 2.");
-    case NOTSQUARE: throw DataException("matrix_inverse: matrix must be square.");
-    case SHAPEMISMATCH: throw DataException("matrix_inverse: programmer error input and output must be the same shape.");
-    case NOINVERSE: throw DataException("matrix_inverse: argument not invertible.");
-    case NEEDLAPACK:throw DataException("matrix_inverse: matrices larger than 3x3 require lapack support."); 
-    case ERRFACTORISE: throw DataException("matrix_inverse: argument not invertible (factorise stage).");
-    case ERRINVERT: throw DataException("matrix_inverse: argument not invertible (inverse stage).");
-    default:
-	throw DataException("matrix_inverse: unknown error.");
-    }
-}
-
-
-
-// Copied from the python version in util.py
-int
-matrix_inverse(const DataTypes::ValueType& in, 
-	    const DataTypes::ShapeType& inShape,
-            DataTypes::ValueType::size_type inOffset,
-            DataTypes::ValueType& out,
-	    const DataTypes::ShapeType& outShape,
-            DataTypes::ValueType::size_type outOffset,
-	    int count,
-	    LapackInverseHelper& helper)
-{
-    using namespace DataTypes;
-    using namespace std;
-    int inRank=getRank(inShape);
-    int outRank=getRank(outShape);
-    int size=DataTypes::noValues(inShape);
-    if ((inRank!=2) || (outRank!=2))
-    {
-	return BADRANK;		
-    }
-    if (inShape[0]!=inShape[1])
-    {
-	return NOTSQUARE; 		
-    }
-    if (inShape!=outShape)
-    {
-	return SHAPEMISMATCH;	
-    }
-    if (inShape[0]==1)
-    {
-	for (int i=0;i<count;++i)
-	{
-	    if (in[inOffset+i]!=0)
-	    {
-	    	out[outOffset+i]=1/in[inOffset+i];
-	    }
-	    else
-	    {
-		return NOINVERSE;
-	    }
-	}
-    }
-    else if (inShape[0]==2)
-    {
-	int step=0;
-	for (int i=0;i<count;++i)
-	{	
-          double A11=in[inOffset+step+getRelIndex(inShape,0,0)];
-          double A12=in[inOffset+step+getRelIndex(inShape,0,1)];
-          double A21=in[inOffset+step+getRelIndex(inShape,1,0)];
-          double A22=in[inOffset+step+getRelIndex(inShape,1,1)];
-          double D = A11*A22-A12*A21;
-	  if (D!=0)
-	  {
-          	D=1/D;
-		out[outOffset+step+getRelIndex(inShape,0,0)]= A22*D;
-         	out[outOffset+step+getRelIndex(inShape,1,0)]=-A21*D;
-          	out[outOffset+step+getRelIndex(inShape,0,1)]=-A12*D;
-          	out[outOffset+step+getRelIndex(inShape,1,1)]= A11*D;
-	  }
-	  else
-	  {
-		return NOINVERSE;
-	  }
-	  step+=size;
-	}
-    }
-    else if (inShape[0]==3)
-    {
-	int step=0;
-	for (int i=0;i<count;++i)
-	{	
-          double A11=in[inOffset+step+getRelIndex(inShape,0,0)];
-          double A21=in[inOffset+step+getRelIndex(inShape,1,0)];
-          double A31=in[inOffset+step+getRelIndex(inShape,2,0)];
-          double A12=in[inOffset+step+getRelIndex(inShape,0,1)];
-          double A22=in[inOffset+step+getRelIndex(inShape,1,1)];
-          double A32=in[inOffset+step+getRelIndex(inShape,2,1)];
-          double A13=in[inOffset+step+getRelIndex(inShape,0,2)];
-          double A23=in[inOffset+step+getRelIndex(inShape,1,2)];
-          double A33=in[inOffset+step+getRelIndex(inShape,2,2)];
-          double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
-	  if (D!=0)
-	  {
-		D=1/D;
-          	out[outOffset+step+getRelIndex(inShape,0,0)]=(A22*A33-A23*A32)*D;
-          	out[outOffset+step+getRelIndex(inShape,1,0)]=(A31*A23-A21*A33)*D;
-          	out[outOffset+step+getRelIndex(inShape,2,0)]=(A21*A32-A31*A22)*D;
-          	out[outOffset+step+getRelIndex(inShape,0,1)]=(A13*A32-A12*A33)*D;
-          	out[outOffset+step+getRelIndex(inShape,1,1)]=(A11*A33-A31*A13)*D;
-          	out[outOffset+step+getRelIndex(inShape,2,1)]=(A12*A31-A11*A32)*D;
-          	out[outOffset+step+getRelIndex(inShape,0,2)]=(A12*A23-A13*A22)*D;
-          	out[outOffset+step+getRelIndex(inShape,1,2)]=(A13*A21-A11*A23)*D;
-          	out[outOffset+step+getRelIndex(inShape,2,2)]=(A11*A22-A12*A21)*D;
-          }
-	  else
-	  {
-		return NOINVERSE;
-	  }
-	  step+=size;
-	}
-    }
-    else	// inShape[0] >3  (or negative but that can hopefully never happen)
-    {
-#ifndef USE_LAPACK
-	return NEEDLAPACK;
-#else
-	int step=0;
-	
-	
-	for (int i=0;i<count;++i)
-	{
-		// need to make a copy since blas overwrites its input
-		for (int j=0;j<size;++j)
-		{
-		    out[outOffset+step+j]=in[inOffset+step+j];
-		}
-		double* arr=&(out[outOffset+step]);
-		int res=helper.invert(arr);
-		if (res!=0)
-		{
-		    return res;
-		}
-		step+=size;
-	}
-#endif
-    }
-    return SUCCESS;
-}
-
-}    // end namespace
-}    // end namespace
-
diff --git a/escriptcore/src/DataMaths.h b/escriptcore/src/DataMaths.h
deleted file mode 100644
index e356ed8..0000000
--- a/escriptcore/src/DataMaths.h
+++ /dev/null
@@ -1,952 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef escript_DataMaths_20080822_H
-#define escript_DataMaths_20080822_H
-#include "DataAbstract.h"
-#include "DataException.h"
-#include "LocalOps.h"
-#include "LapackInverseHelper.h"
-
-/**
-\file DataMaths.h 
-\brief Describes binary operations performed on DataVector.
-
-
-For operations on DataAbstract see BinaryOp.h.
-For operations on double* see LocalOps.h.
-*/
-
-
-namespace escript
-{
-namespace DataMaths
-{
-
-/**
-\namespace escript::DataMaths
-\brief Contains maths operations performed on data vectors.
-
-In order to properly identify the datapoints, in most cases, the vector, shape and offset of the point must all be supplied.
-Note that vector in this context refers to a data vector storing datapoints not a mathematical vector. (However, datapoints within the data vector could represent scalars, vectors, matricies, ...).
-*/
-
-
-  /**
-     \brief
-     Perform the unary operation on the data point specified by the given
-     offset. Applies the specified operation to each value in the data
-     point. Operation must be a pointer to a function.
-
-     Called by escript::unaryOp.
-
-     \param data - vector containing the datapoint
-     \param shape - shape of the point
-     \param offset - offset of the point within data
-     \param operation - Input -
-                  Operation to apply. Must be a pointer to a function.
-  */
-  template <class UnaryFunction>
-  void
-  unaryOp(DataTypes::ValueType& data, const DataTypes::ShapeType& shape,
-          DataTypes::ValueType::size_type offset,
-          UnaryFunction operation);
-
-  /**
-     \brief
-     Perform the binary operation on the data points specified by the given
-     offsets in the "left" and "right" vectors. Applies the specified operation
-     to corresponding values in both data points. Operation must be a pointer
-     to a function.
-
-     Called by escript::binaryOp.
-     \param left,right - vectors containing the datapoints
-     \param leftShape,rightShape - shapes of datapoints in the vectors
-     \param leftOffset,rightOffset - beginnings of datapoints in the vectors
-     \param operation - Input -
-                  Operation to apply. Must be a pointer to a function.
-  */
-  template <class BinaryFunction>
-  void
-  binaryOp(DataTypes::ValueType& left, 
-	   const DataTypes::ShapeType& leftShape, 
-           DataTypes::ValueType::size_type leftOffset,
-           const DataTypes::ValueType& right, 
-           const DataTypes::ShapeType& rightShape,
-           DataTypes::ValueType::size_type rightOffset,
-           BinaryFunction operation);
-
-  /**
-     \brief
-     Perform the binary operation on the data point specified by the given
-     offset in the vector using the scalar value "right". Applies the specified
-     operation to values in the data point. Operation must be a pointer
-     to a function.
-
-     Called by escript::binaryOp.
-
-     \param left - vector containing the datapoints
-     \param shape - shape of datapoint in the vector
-     \param offset - beginning of datapoint in the vector
-     \param right - scalar value for the right hand side of the operation
-     \param operation - Input -
-                  Operation to apply. Must be a pointer to a function.
-  */
-  template <class BinaryFunction>
-  void
-  binaryOp(DataTypes::ValueType& left, 
-           const DataTypes::ShapeType& shape,
- 	   DataTypes::ValueType::size_type offset,
-           double right,
-           BinaryFunction operation);
-
-  /**
-     \brief
-     Perform the given data point reduction operation on the data point
-     specified by the given offset into the view. Reduces all elements of
-     the data point using the given operation, returning the result as a 
-     scalar. Operation must be a pointer to a function.
-
-     Called by escript::algorithm.
-
-     \param left - vector containing the datapoint
-     \param shape - shape of datapoints in the vector
-     \param offset - beginning of datapoint in the vector
-     \param operation - Input -
-                  Operation to apply. Must be a pointer to a function.
-     \param initial_value 
-  */
-  template <class BinaryFunction>
-  double
-  reductionOp(const DataTypes::ValueType& left, 
-	      const DataTypes::ShapeType& shape,
- 	      DataTypes::ValueType::size_type offset,
-              BinaryFunction operation,
-              double initial_value);
-
- /**
-     \brief
-     Perform a matrix multiply of the given views.
-
-     NB: Only multiplies together the two given datapoints,
-     would need to call this over all data-points to multiply the entire
-     Data objects involved.
-
-     \param left,right - vectors containing the datapoints
-     \param leftShape,rightShape - shapes of datapoints in the vectors
-     \param leftOffset,rightOffset - beginnings of datapoints in the vectors
-     \param result - Vector to store the resulting datapoint in
-     \param resultShape - expected shape of the resulting datapoint
-  */
-  ESCRIPT_DLL_API
-  void
-  matMult(const DataTypes::ValueType& left, 
-	  const DataTypes::ShapeType& leftShape,
-	  DataTypes::ValueType::size_type leftOffset,
-          const DataTypes::ValueType& right,
-	  const DataTypes::ShapeType& rightShape,
-	  DataTypes::ValueType::size_type rightOffset,
-          DataTypes::ValueType& result,
-	  const DataTypes::ShapeType& resultShape);
-// Hmmmm why is there no offset for the result??
-
-
-
-
-  /**
-     \brief
-     Determine the shape of the result array for a matrix multiplication
-     of the given views.
-
-     \param left,right - shapes of the left and right matricies
-     \return the shape of the matrix which would result from multiplying left and right
-  */
-  ESCRIPT_DLL_API
-  DataTypes::ShapeType
-  determineResultShape(const DataTypes::ShapeType& left,
-                       const DataTypes::ShapeType& right);
-
-  /**
-     \brief
-     computes a symmetric matrix from your square matrix A: (A + transpose(A)) / 2
-
-     \param in - vector containing the matrix A
-     \param inShape - shape of the matrix A
-     \param inOffset - the beginning of A within the vector in
-     \param ev - vector to store the output matrix
-     \param evShape - expected shape of the output matrix
-     \param evOffset - starting location for storing ev in vector ev
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  symmetric(const DataTypes::ValueType& in, 
-	    const DataTypes::ShapeType& inShape,
-            DataTypes::ValueType::size_type inOffset,
-            DataTypes::ValueType& ev, 
-	    const DataTypes::ShapeType& evShape,
-            DataTypes::ValueType::size_type evOffset)
-  {
-   if (DataTypes::getRank(inShape) == 2) {
-     int i0, i1;
-     int s0=inShape[0];
-     int s1=inShape[1];
-     for (i0=0; i0<s0; i0++) {
-       for (i1=0; i1<s1; i1++) {
-         ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] + in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)]) / 2.0;
-       }
-     }
-   }
-   else if (DataTypes::getRank(inShape) == 4) {
-     int i0, i1, i2, i3;
-     int s0=inShape[0];
-     int s1=inShape[1];
-     int s2=inShape[2];
-     int s3=inShape[3];
-     for (i0=0; i0<s0; i0++) {
-       for (i1=0; i1<s1; i1++) {
-         for (i2=0; i2<s2; i2++) {
-           for (i3=0; i3<s3; i3++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] + in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)]) / 2.0;
-           }
-         }
-       }
-     }
-   }
-  }
-
-  /**
-     \brief
-     computes a nonsymmetric matrix from your square matrix A: (A - transpose(A)) / 2
-
-     \param in - vector containing the matrix A
-     \param inShape - shape of the matrix A
-     \param inOffset - the beginning of A within the vector in
-     \param ev - vector to store the output matrix
-     \param evShape - expected shape of the output matrix
-     \param evOffset - starting location for storing ev in vector ev
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  nonsymmetric(const DataTypes::ValueType& in, 
-	       const DataTypes::ShapeType& inShape,
-               DataTypes::ValueType::size_type inOffset,
-               DataTypes::ValueType& ev, 
-	       const DataTypes::ShapeType& evShape,
-               DataTypes::ValueType::size_type evOffset)
-  {
-   if (DataTypes::getRank(inShape) == 2) {
-     int i0, i1;
-     int s0=inShape[0];
-     int s1=inShape[1];
-     for (i0=0; i0<s0; i0++) {
-       for (i1=0; i1<s1; i1++) {
-         ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] - in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)]) / 2.0;
-       }
-     }
-   }
-   else if (DataTypes::getRank(inShape) == 4) {
-     int i0, i1, i2, i3;
-     int s0=inShape[0];
-     int s1=inShape[1];
-     int s2=inShape[2];
-     int s3=inShape[3];
-     for (i0=0; i0<s0; i0++) {
-       for (i1=0; i1<s1; i1++) {
-         for (i2=0; i2<s2; i2++) {
-           for (i3=0; i3<s3; i3++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] - in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)]) / 2.0;
-           }
-         }
-       }
-     }
-   }
-  }
-
-  /**
-     \brief
-     computes the trace of a matrix
-
-     \param in - vector containing the input matrix
-     \param inShape - shape of the input matrix
-     \param inOffset - the beginning of the input matrix within the vector "in"
-     \param ev - vector to store the output matrix
-     \param evShape - expected shape of the output matrix
-     \param evOffset - starting location for storing the output matrix in vector ev
-     \param axis_offset
-  */
-  inline
-  void
-  trace(const DataTypes::ValueType& in, 
-	    const DataTypes::ShapeType& inShape,
-            DataTypes::ValueType::size_type inOffset,
-            DataTypes::ValueType& ev,
-	    const DataTypes::ShapeType& evShape,
-            DataTypes::ValueType::size_type evOffset,
-	    int axis_offset)
-  {
-   for (int j=0;j<DataTypes::noValues(evShape);++j)
-   {
-      ev[evOffset+j]=0;
-   }
-   if (DataTypes::getRank(inShape) == 2) {
-     int s0=inShape[0]; // Python wrapper limits to square matrix
-     int i;
-     for (i=0; i<s0; i++) {
-       ev[evOffset/*+DataTypes::getRelIndex(evShape)*/] += in[inOffset+DataTypes::getRelIndex(inShape,i,i)];
-     }
-   }
-   else if (DataTypes::getRank(inShape) == 3) {
-     if (axis_offset==0) {
-       int s0=inShape[0];
-       int s2=inShape[2];
-       int i0, i2;
-       for (i0=0; i0<s0; i0++) {
-         for (i2=0; i2<s2; i2++) {
-           ev[evOffset+DataTypes::getRelIndex(evShape,i2)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i0,i2)];
-         }
-       }
-     }
-     else if (axis_offset==1) {
-       int s0=inShape[0];
-       int s1=inShape[1];
-       int i0, i1;
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           ev[evOffset+DataTypes::getRelIndex(evShape,i0)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i1)];
-         }
-       }
-     }
-   }
-   else if (DataTypes::getRank(inShape) == 4) {
-     if (axis_offset==0) {
-       int s0=inShape[0];
-       int s2=inShape[2];
-       int s3=inShape[3];
-       int i0, i2, i3;
-       for (i0=0; i0<s0; i0++) {
-         for (i2=0; i2<s2; i2++) {
-           for (i3=0; i3<s3; i3++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i2,i3)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i0,i2,i3)];
-           }
-         }
-       }
-     }
-     else if (axis_offset==1) {
-       int s0=inShape[0];
-       int s1=inShape[1];
-       int s3=inShape[3];
-       int i0, i1, i3;
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i3=0; i3<s3; i3++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i3)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i1,i3)];
-           }
-         }
-       }
-     }
-     else if (axis_offset==2) {
-       int s0=inShape[0];
-       int s1=inShape[1];
-       int s2=inShape[2];
-       int i0, i1, i2;
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i2)];
-           }
-         }
-       }
-     }
-   }
-  }
-
-  /**
-     \brief
-     Transpose each data point of this Data object around the given axis.
-
-     \param in - vector containing the input matrix
-     \param inShape - shape of the input matrix
-     \param inOffset - the beginning of the input matrix within the vector "in"
-     \param ev - vector to store the output matrix
-     \param evShape - expected shape of the output matrix
-     \param evOffset - starting location for storing the output matrix in vector ev
-     \param axis_offset
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  transpose(const DataTypes::ValueType& in, 
-	    const DataTypes::ShapeType& inShape,
-            DataTypes::ValueType::size_type inOffset,
-            DataTypes::ValueType& ev,
-            const DataTypes::ShapeType& evShape,
-            DataTypes::ValueType::size_type evOffset,
-	    int axis_offset)
-  {
-   int inRank=DataTypes::getRank(inShape);
-   if ( inRank== 4) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int s2=evShape[2];
-     int s3=evShape[3];
-     int i0, i1, i2, i3;
-     if (axis_offset==1) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             for (i3=0; i3<s3; i3++) {
-               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i3,i0,i1,i2)];
-             }
-           }
-         }
-       }
-     }
-     else if (axis_offset==2) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             for (i3=0; i3<s3; i3++) {
-               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)];
-             }
-           }
-         }
-       }
-     }
-     else if (axis_offset==3) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             for (i3=0; i3<s3; i3++) {
-               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i2,i3,i0)];
-             }
-           }
-         }
-       }
-     }
-     else {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             for (i3=0; i3<s3; i3++) {
-               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)];
-             }
-           }
-         }
-       }
-     }
-   }
-   else if (inRank == 3) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int s2=evShape[2];
-     int i0, i1, i2;
-     if (axis_offset==1) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i0,i1)];
-           }
-         }
-       }
-     }
-     else if (axis_offset==2) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i2,i0)];
-           }
-         }
-       }
-     }
-     else {
-       // Copy the matrix unchanged
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           for (i2=0; i2<s2; i2++) {
-             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2)];
-           }
-         }
-       }
-     }
-   }
-   else if (inRank == 2) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int i0, i1;
-     if (axis_offset==1) {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)];
-         }
-       }
-     }
-     else {
-       for (i0=0; i0<s0; i0++) {
-         for (i1=0; i1<s1; i1++) {
-           ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)];
-         }
-       }
-     }
-   }
-   else if (inRank == 1) {
-     int s0=evShape[0];
-     int i0;
-     for (i0=0; i0<s0; i0++) {
-       ev[evOffset+DataTypes::getRelIndex(evShape,i0)] = in[inOffset+DataTypes::getRelIndex(inShape,i0)];
-     }
-   }
-   else if (inRank == 0) {
-     ev[evOffset/*+DataTypes::getRelIndex(evShape,)*/] = in[inOffset/*+DataTypes::getRelIndex(inShape,)*/];
-   }
-   else {
-      throw DataException("Error - DataArrayView::transpose can only be calculated for rank 0, 1, 2, 3 or 4 objects.");
-   }
-  }
-
-  /**
-     \brief
-     swaps the components axis0 and axis1.
-
-     \param in - vector containing the input matrix
-     \param inShape - shape of the input matrix
-     \param inOffset - the beginning of the input matrix within the vector "in"
-     \param ev - vector to store the output matrix
-     \param evShape - expected shape of the output matrix
-     \param evOffset - starting location for storing the output matrix in vector ev
-     \param axis0 - axis index
-     \param axis1 - axis index
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  swapaxes(const DataTypes::ValueType& in, 
-	   const DataTypes::ShapeType& inShape,
-           DataTypes::ValueType::size_type inOffset,
-           DataTypes::ValueType& ev,
-	   const DataTypes::ShapeType& evShape,
-           DataTypes::ValueType::size_type evOffset,
-           int axis0, 
-	   int axis1)
-  {
-     int inRank=DataTypes::getRank(inShape);
-     if (inRank == 4) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int s2=evShape[2];
-     int s3=evShape[3];
-     int i0, i1, i2, i3;
-     if (axis0==0) {
-        if (axis1==1) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0,i2,i3)];
-                  }
-                }
-              }
-            }
-        } else if (axis1==2) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i1,i0,i3)];
-                  }
-                }
-              }
-            }
-
-        } else if (axis1==3) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i3,i1,i2,i0)];
-                  }
-                }
-              }
-            }
-        }
-     } else if (axis0==1) {
-        if (axis1==2) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i2,i1,i3)];
-                  }
-                }
-              }
-            }
-        } else if (axis1==3) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i3,i2,i1)];
-                  }
-                }
-              }
-            }
-        }
-     } else if (axis0==2) {
-        if (axis1==3) {
-            for (i0=0; i0<s0; i0++) {
-              for (i1=0; i1<s1; i1++) {
-                for (i2=0; i2<s2; i2++) {
-                  for (i3=0; i3<s3; i3++) {
-                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i3,i2)];
-                  }
-                }
-              }
-            }
-        }
-     }
-
-   } else if ( inRank == 3) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int s2=evShape[2];
-     int i0, i1, i2;
-     if (axis0==0) {
-        if (axis1==1) {
-           for (i0=0; i0<s0; i0++) {
-             for (i1=0; i1<s1; i1++) {
-               for (i2=0; i2<s2; i2++) {
-                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0,i2)];
-               }
-             }
-           }
-        } else if (axis1==2) {
-           for (i0=0; i0<s0; i0++) {
-             for (i1=0; i1<s1; i1++) {
-               for (i2=0; i2<s2; i2++) {
-                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i1,i0)];
-               }
-             }
-           }
-       }
-     } else if (axis0==1) {
-        if (axis1==2) {
-           for (i0=0; i0<s0; i0++) {
-             for (i1=0; i1<s1; i1++) {
-               for (i2=0; i2<s2; i2++) {
-                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i2,i1)];
-               }
-             }
-           }
-        }
-     }
-   } else if ( inRank == 2) {
-     int s0=evShape[0];
-     int s1=evShape[1];
-     int i0, i1;
-     if (axis0==0) {
-        if (axis1==1) {
-           for (i0=0; i0<s0; i0++) {
-             for (i1=0; i1<s1; i1++) {
-                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)];
-             }
-           }
-        }
-    }
-  } else {
-      throw DataException("Error - DataArrayView::swapaxes can only be calculated for rank 2, 3 or 4 objects.");
-  }
- }
-
-  /**
-     \brief
-     solves a local eigenvalue problem 
-
-     \param in - vector containing the input matrix
-     \param inShape - shape of the input matrix
-     \param inOffset - the beginning of the input matrix within the vector "in"
-     \param ev - vector to store the eigenvalues
-     \param evShape - expected shape of the eigenvalues
-     \param evOffset - starting location for storing the eigenvalues in vector ev
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  eigenvalues(const DataTypes::ValueType& in, 
-	      const DataTypes::ShapeType& inShape,
-              DataTypes::ValueType::size_type inOffset,
-              DataTypes::ValueType& ev,
-	      const DataTypes::ShapeType& evShape,
-              DataTypes::ValueType::size_type evOffset)
-  {
-   double in00,in10,in20,in01,in11,in21,in02,in12,in22;
-   double ev0,ev1,ev2;
-   int s=inShape[0];
-   if (s==1) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      eigenvalues1(in00,&ev0);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-
-   } else  if (s==2) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
-      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
-      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
-      eigenvalues2(in00,(in01+in10)/2.,in11,&ev0,&ev1);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
-
-   } else  if (s==3) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
-      in20=in[inOffset+DataTypes::getRelIndex(inShape,2,0)];
-      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
-      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
-      in21=in[inOffset+DataTypes::getRelIndex(inShape,2,1)];
-      in02=in[inOffset+DataTypes::getRelIndex(inShape,0,2)];
-      in12=in[inOffset+DataTypes::getRelIndex(inShape,1,2)];
-      in22=in[inOffset+DataTypes::getRelIndex(inShape,2,2)];
-      eigenvalues3(in00,(in01+in10)/2.,(in02+in20)/2.,in11,(in21+in12)/2.,in22,
-                 &ev0,&ev1,&ev2);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
-      ev[evOffset+DataTypes::getRelIndex(evShape,2)]=ev2;
-
-   }
-  }
-
-  /**
-     \brief
-     solves a local eigenvalue problem 
-
-     \param in - vector containing the input matrix
-     \param inShape - shape of the input matrix
-     \param inOffset - the beginning of the input matrix within the vector "in"
-     \param ev - vector to store the eigenvalues
-     \param evShape - expected shape of the eigenvalues
-     \param evOffset - starting location for storing the eigenvalues in ev
-     \param V - vector to store the eigenvectors
-     \param VShape - expected shape of the eigenvectors
-     \param VOffset - starting location for storing the eigenvectors in V
-     \param tol - Input - eigenvalues with relative difference tol are treated as equal
-  */
-  ESCRIPT_DLL_API
-  inline
-  void
-  eigenvalues_and_eigenvectors(const DataTypes::ValueType& in, const DataTypes::ShapeType& inShape,
-                               DataTypes::ValueType::size_type inOffset,
-                               DataTypes::ValueType& ev, const DataTypes::ShapeType& evShape, 
-                               DataTypes::ValueType::size_type evOffset,
-                               DataTypes::ValueType& V, const DataTypes::ShapeType& VShape,
-                               DataTypes::ValueType::size_type VOffset,
-                               const double tol=1.e-13)
-  {
-   double in00,in10,in20,in01,in11,in21,in02,in12,in22;
-   double V00,V10,V20,V01,V11,V21,V02,V12,V22;
-   double ev0,ev1,ev2;
-   int s=inShape[0];
-   if (s==1) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      eigenvalues_and_eigenvectors1(in00,&ev0,&V00,tol);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
-   } else  if (s==2) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
-      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
-      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
-      eigenvalues_and_eigenvectors2(in00,(in01+in10)/2.,in11,
-                   &ev0,&ev1,&V00,&V10,&V01,&V11,tol);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
-      V[inOffset+DataTypes::getRelIndex(VShape,1,0)]=V10;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,1)]=V01;
-      V[inOffset+DataTypes::getRelIndex(VShape,1,1)]=V11;
-   } else  if (s==3) {
-      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
-      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
-      in20=in[inOffset+DataTypes::getRelIndex(inShape,2,0)];
-      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
-      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
-      in21=in[inOffset+DataTypes::getRelIndex(inShape,2,1)];
-      in02=in[inOffset+DataTypes::getRelIndex(inShape,0,2)];
-      in12=in[inOffset+DataTypes::getRelIndex(inShape,1,2)];
-      in22=in[inOffset+DataTypes::getRelIndex(inShape,2,2)];
-      eigenvalues_and_eigenvectors3(in00,(in01+in10)/2.,(in02+in20)/2.,in11,(in21+in12)/2.,in22,
-                 &ev0,&ev1,&ev2,
-                 &V00,&V10,&V20,&V01,&V11,&V21,&V02,&V12,&V22,tol);
-      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
-      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
-      ev[evOffset+DataTypes::getRelIndex(evShape,2)]=ev2;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
-      V[inOffset+DataTypes::getRelIndex(VShape,1,0)]=V10;
-      V[inOffset+DataTypes::getRelIndex(VShape,2,0)]=V20;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,1)]=V01;
-      V[inOffset+DataTypes::getRelIndex(VShape,1,1)]=V11;
-      V[inOffset+DataTypes::getRelIndex(VShape,2,1)]=V21;
-      V[inOffset+DataTypes::getRelIndex(VShape,0,2)]=V02;
-      V[inOffset+DataTypes::getRelIndex(VShape,1,2)]=V12;
-      V[inOffset+DataTypes::getRelIndex(VShape,2,2)]=V22;
-
-   }
- }
-
-
-/**
-   Inline function definitions.
-*/
-
-inline
-bool
-checkOffset(const DataTypes::ValueType& data,
-	    const DataTypes::ShapeType& shape,
-	    DataTypes::ValueType::size_type offset)
-{
-	return (data.size() >= (offset+DataTypes::noValues(shape))); 
-}
-
-template <class UnaryFunction>
-inline
-void
-unaryOp(DataTypes::ValueType& data, const DataTypes::ShapeType& shape,
-          DataTypes::ValueType::size_type offset,
-          UnaryFunction operation)
-{
-  EsysAssert((data.size()>0)&&checkOffset(data,shape,offset),
-               "Error - Couldn't perform unaryOp due to insufficient storage.");
-  DataTypes::ValueType::size_type nVals=DataTypes::noValues(shape);
-  for (DataTypes::ValueType::size_type i=0;i<nVals;i++) {
-    data[offset+i]=operation(data[offset+i]);
-  }
-}
-
-
-template <class BinaryFunction>
-inline
-void
-binaryOp(DataTypes::ValueType& left, 
-			const DataTypes::ShapeType& leftShape,
-			DataTypes::ValueType::size_type leftOffset,
-                        const DataTypes::ValueType& right,
-			const DataTypes::ShapeType& rightShape,
-                        DataTypes::ValueType::size_type rightOffset,
-                        BinaryFunction operation)
-{
-  EsysAssert(leftShape==rightShape,
-	     "Error - Couldn't perform binaryOp due to shape mismatch,");
-  EsysAssert(((left.size()>0)&&checkOffset(left,leftShape, leftOffset)),
-             "Error - Couldn't perform binaryOp due to insufficient storage in left object.");
-  EsysAssert(((right.size()>0)&&checkOffset(right,rightShape,rightOffset)),
-             "Error - Couldn't perform binaryOp due to insufficient storage in right object.");
-  for (DataTypes::ValueType::size_type i=0;i<DataTypes::noValues(leftShape);i++) {
-    left[leftOffset+i]=operation(left[leftOffset+i],right[rightOffset+i]);
-  }
-}
-
-template <class BinaryFunction>
-inline
-void
-binaryOp(DataTypes::ValueType& left, 
-			const DataTypes::ShapeType& leftShape,
-			DataTypes::ValueType::size_type offset,
-                        double right,
-                        BinaryFunction operation)
-{
-  EsysAssert(((left.size()>0)&&checkOffset(left,leftShape,offset)),
-             "Error - Couldn't perform binaryOp due to insufficient storage in left object.");
-  for (DataTypes::ValueType::size_type i=0;i<DataTypes::noValues(leftShape);i++) {
-    left[offset+i]=operation(left[offset+i],right);
-  }
-}
-
-template <class BinaryFunction>
-inline
-double
-reductionOp(const DataTypes::ValueType& left, 
-			   const DataTypes::ShapeType& leftShape,
-			   DataTypes::ValueType::size_type offset,
-                           BinaryFunction operation,
-                           double initial_value)
-{
-  EsysAssert(((left.size()>0)&&checkOffset(left,leftShape,offset)),
-               "Error - Couldn't perform reductionOp due to insufficient storage.");
-  double current_value=initial_value;
-  for (DataTypes::ValueType::size_type i=0;i<DataTypes::noValues(leftShape);i++) {
-    current_value=operation(current_value,left[offset+i]);
-  }
-  return current_value;
-}
-
-/**
-     \brief
-     computes the inverses of square (up to 3x3) matricies 
-
-     \param in - vector containing the input matricies
-     \param inShape - shape of the input matricies
-     \param inOffset - the beginning of the input matricies within the vector "in"
-     \param out - vector to store the inverses
-     \param outShape - expected shape of the inverses
-     \param outOffset - starting location for storing the inverses in out
-     \param count - number of matricies to invert
-     \param helper - associated working storage
-
-     \exception DataException if input and output are not the correct shape or if any of the matricies are not invertible.
-     \return 0 on success, on failure the return value should be passed to matrixInverseError(int err).
-*/
-int
-matrix_inverse(const DataTypes::ValueType& in, 
-	    const DataTypes::ShapeType& inShape,
-            DataTypes::ValueType::size_type inOffset,
-            DataTypes::ValueType& out,
-	    const DataTypes::ShapeType& outShape,
-            DataTypes::ValueType::size_type outOffset,
-	    int count,
-	    LapackInverseHelper& helper);
-
-/**
-   \brief
-   throws an appropriate exception based on failure of matrix_inverse.
-
-   \param err - error code returned from matrix_inverse
-   \warning do not call in a parallel region since it throws.
-*/
-void 
-matrixInverseError(int err);
-
-/**
-   \brief returns true if the vector contains NaN
-
-*/
-inline 
-bool
-vectorHasNaN(const DataTypes::ValueType& in, DataTypes::ValueType::size_type inOffset, size_t count)
-{
-	for (size_t z=inOffset;z<inOffset+count;++z)
-	{
-	    if (nancheck(in[z]))
-	    {
-		return true;
-	    }
-	}
-	return false;
-}
-
-}  // end namespace DataMath
-}  // end namespace escript
-#endif
-
diff --git a/escriptcore/src/DataReady.cpp b/escriptcore/src/DataReady.cpp
index efbb1d8..764fb44 100644
--- a/escriptcore/src/DataReady.cpp
+++ b/escriptcore/src/DataReady.cpp
@@ -14,25 +14,21 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataReady.h"
 
 namespace escript
 {
 
-DataReady::DataReady(const FunctionSpace& what, const ShapeType& shape, bool isDataEmpty)
-	:parent(what,shape,isDataEmpty)
+DataReady::DataReady(const FunctionSpace& what, const ShapeType& shape,
+                     bool isDataEmpty)
+    : parent(what, shape, isDataEmpty)
 {
 }
 
 
-DataReady_ptr 
-DataReady::resolve()
+DataReady_ptr DataReady::resolve()
 {
-	return boost::dynamic_pointer_cast<DataReady>(this->getPtr());
+    return REFCOUNTNS::dynamic_pointer_cast<DataReady>(this->getPtr());
 }
 
 
diff --git a/escriptcore/src/DataReady.h b/escriptcore/src/DataReady.h
index ca4fd3f..cb10b66 100644
--- a/escriptcore/src/DataReady.h
+++ b/escriptcore/src/DataReady.h
@@ -46,26 +46,67 @@ public:
      Return the sample data for the given sample number.
   */
   ESCRIPT_DLL_API
-  double*
-  getSampleDataRW(ValueType::size_type sampleNo);
+  DataTypes::real_t*
+  getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy=0);
+  
+  ESCRIPT_DLL_API
+  DataTypes::cplx_t*
+  getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy);  
 
   ESCRIPT_DLL_API
-  const double*
-  getSampleDataRO(ValueType::size_type sampleNo) const;
+  const DataTypes::real_t*
+  getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy=0) const;
+  
+  ESCRIPT_DLL_API
+  const DataTypes::cplx_t*
+  getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy) const;
+  
 
   /**
 	\brief Provide access to underlying storage. Internal use only!
   */
 
   ESCRIPT_DLL_API
-  virtual DataTypes::ValueType&
+  virtual DataTypes::RealVectorType&
   getVectorRW()=0;
 
 
   ESCRIPT_DLL_API
-  virtual const DataTypes::ValueType&
+  virtual const DataTypes::RealVectorType&
   getVectorRO() const=0;
 
+  ESCRIPT_DLL_API
+  virtual DataTypes::CplxVectorType&
+  getVectorRWC()=0;
+
+
+  ESCRIPT_DLL_API
+  virtual const DataTypes::CplxVectorType&
+  getVectorROC() const=0;
+  
+  /**
+     \brief These versions use the type system rather than method name to determine return type
+  */
+  ESCRIPT_DLL_API
+  virtual DataTypes::RealVectorType&
+  getTypedVectorRW(DataTypes::real_t dummy)=0;  
+  
+  ESCRIPT_DLL_API
+  virtual const DataTypes::RealVectorType&
+  getTypedVectorRO(DataTypes::real_t dummy) const=0;
+
+  ESCRIPT_DLL_API
+  virtual DataTypes::CplxVectorType&
+  getTypedVectorRW(DataTypes::cplx_t dummy)=0;
+  
+  ESCRIPT_DLL_API
+  virtual const DataTypes::CplxVectorType&
+  getTypedVectorRO(DataTypes::cplx_t dummy) const=0;  
+  
+
+  
+  
+  
   /**
   \brief return true if data contains NaN.
   \warning This is dependent on the ability to reliably detect NaNs on your compiler.
@@ -80,7 +121,14 @@ public:
   */
   ESCRIPT_DLL_API
   virtual void
-  replaceNaN(double value) = 0;
+  replaceNaN(DataTypes::real_t value) = 0;
+  
+  /**
+  \brief replaces all NaN values with value 
+  */
+  ESCRIPT_DLL_API
+  virtual void
+  replaceNaN(DataTypes::cplx_t value) = 0;  
   
   /**
      \brief
@@ -100,15 +148,28 @@ public:
      \brief get a reference to the beginning of a data point
  */
   ESCRIPT_DLL_API
-  DataTypes::ValueType::const_reference
-  getDataAtOffsetRO(DataTypes::ValueType::size_type i) const;
+  DataTypes::RealVectorType::const_reference
+  getDataAtOffsetRO(DataTypes::RealVectorType::size_type i) const;
 
 
   ESCRIPT_DLL_API
-  DataTypes::ValueType::reference
-  getDataAtOffsetRW(DataTypes::ValueType::size_type i);
+  DataTypes::RealVectorType::reference
+  getDataAtOffsetRW(DataTypes::RealVectorType::size_type i);
+  
+  ESCRIPT_DLL_API
+  DataTypes::CplxVectorType::const_reference
+  getDataAtOffsetROC(DataTypes::CplxVectorType::size_type i) const;
+
 
   ESCRIPT_DLL_API
+  DataTypes::CplxVectorType::reference
+  getDataAtOffsetRWC(DataTypes::CplxVectorType::size_type i);  
+  
+  
+  
+  
+  
+  ESCRIPT_DLL_API
   DataReady_ptr 
   resolve();
 
@@ -116,34 +177,64 @@ public:
 
 
 inline
-DataAbstract::ValueType::value_type*
-DataReady::getSampleDataRW(ValueType::size_type sampleNo)
+DataTypes::real_t*
+DataReady::getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy)
 {
   return &(getVectorRW()[getPointOffset(sampleNo,0)]);		// exclusive write checks will be done in getVectorRW()
 }
 
-inline const double*
-DataReady::getSampleDataRO(ValueType::size_type sampleNo) const
+inline
+DataTypes::cplx_t*
+DataReady::getSampleDataRW(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy)
+{
+  return &(getVectorRWC()[getPointOffset(sampleNo,0)]);		// exclusive write checks will be done in getVectorRW()
+}
+
+
+inline const DataTypes::real_t*
+DataReady::getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::real_t dummy) const
 {
   return &(getVectorRO()[getPointOffset(sampleNo,0)]);		
 }
 
+inline const DataTypes::cplx_t*
+DataReady::getSampleDataRO(DataTypes::RealVectorType::size_type sampleNo, DataTypes::cplx_t dummy) const
+{
+  return &(getVectorROC()[getPointOffset(sampleNo,0)]);		
+}
+
+
 
 inline
-DataTypes::ValueType::const_reference
-DataReady::getDataAtOffsetRO(DataTypes::ValueType::size_type i) const
+DataTypes::RealVectorType::const_reference
+DataReady::getDataAtOffsetRO(DataTypes::RealVectorType::size_type i) const
 {
    return getVectorRO()[i];
 }
 
 inline
-DataTypes::ValueType::reference
-DataReady::getDataAtOffsetRW(DataTypes::ValueType::size_type i)	// exclusive write checks will be done in getVectorRW()
+DataTypes::RealVectorType::reference
+DataReady::getDataAtOffsetRW(DataTypes::RealVectorType::size_type i)	// exclusive write checks will be done in getVectorRW()
 {
    return getVectorRW()[i];
 }
 
 
+inline
+DataTypes::CplxVectorType::const_reference
+DataReady::getDataAtOffsetROC(DataTypes::CplxVectorType::size_type i) const
+{
+   return getVectorROC()[i];
+}
+
+inline
+DataTypes::CplxVectorType::reference
+DataReady::getDataAtOffsetRWC(DataTypes::CplxVectorType::size_type i)	// exclusive write checks will be done in getVectorRW()
+{
+   return getVectorRWC()[i];
+}
+
+
 
 }
 
diff --git a/escriptcore/src/DataTagged.cpp b/escriptcore/src/DataTagged.cpp
index 622510c..656a4d6 100644
--- a/escriptcore/src/DataTagged.cpp
+++ b/escriptcore/src/DataTagged.cpp
@@ -14,56 +14,76 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Data.h"
-#include "DataTagged.h"
 #include "DataConstant.h"
 #include "DataException.h"
-#include "esysUtils/Esys_MPI.h"
+#include "DataVectorOps.h"
+#include "DataTagged.h"
+
+#include <complex>
 
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#include "DataMaths.h"
-
-
-#define CHECK_FOR_EX_WRITE if (!checkNoSharing()) {throw DataException("Attempt to modify shared object");}
-
-// #define CHECK_FOR_EX_WRITE if (!checkNoSharing()) {std::ostringstream ss; ss << " Attempt to modify shared object. line " << __LINE__ << " of " << __FILE__; throw DataException(ss.str());}
+#ifdef SLOWSHARECHECK
+  #define CHECK_FOR_EX_WRITE if (isShared()) {throw DataException("Attempt to modify shared object");}
+#else
+  #define CHECK_FOR_EX_WRITE
+#endif
 
 using namespace std;
 
 namespace escript {
 
-DataTagged::DataTagged()
-  : parent(FunctionSpace(),DataTypes::scalarShape)
+DataTagged::DataTagged(const FunctionSpace& what,
+                       const DataTypes::ShapeType &shape,
+                       const int tags[],
+                       const DataTypes::RealVectorType& data)
+  : parent(what,shape)
 {
-  // default constructor
+  // alternative constructor
+  // not unit_tested tested yet
+  // It is not explicitly unit tested yet, but it is called from DataFactory
+
+  if (!what.canTag())
+  {
+    throw DataException("Programming error - DataTag created with a non-taggable FunctionSpace.");
+  }
+  // copy the data
+  m_data_r=data;
 
-  // create a scalar default value
-  m_data.resize(1,0.,1);
+  // we can't rely on the tag array to give us the number of tags so 
+  // use the data we have been passed
+  int valsize=DataTypes::noValues(shape);
+  int ntags=data.size()/valsize;
+
+  // create the tag lookup map
+  // we assume that the first value and first tag are the default value so we skip
+  for (int i=1;i<ntags;++i)
+  {
+    m_offsetLookup.insert(DataMapType::value_type(tags[i],i*valsize));
+  }
 }
 
+
 DataTagged::DataTagged(const FunctionSpace& what,
                        const DataTypes::ShapeType &shape,
                        const int tags[],
-                       const ValueType& data)
+                       const DataTypes::CplxVectorType& data)
   : parent(what,shape)
 {
   // alternative constructor
   // not unit_tested tested yet
   // It is not explicitly unit tested yet, but it is called from DataFactory
 
+  m_iscompl=true;
   if (!what.canTag())
   {
     throw DataException("Programming error - DataTag created with a non-taggable FunctionSpace.");
   }
   // copy the data
-  m_data=data;
+  m_data_c=data;
 
   // we can't rely on the tag array to give us the number of tags so 
   // use the data we have been passed
@@ -78,10 +98,12 @@ DataTagged::DataTagged(const FunctionSpace& what,
   }
 }
 
+
+
 DataTagged::DataTagged(const FunctionSpace& what,
                        const DataTypes::ShapeType &shape,
                        const TagListType& tags,
-                       const ValueType& data)
+                       const DataTypes::RealVectorType& data)
   : parent(what,shape)
 {
   // alternative constructor
@@ -92,17 +114,42 @@ DataTagged::DataTagged(const FunctionSpace& what,
   }
 
   // copy the data
-  m_data=data;
+  m_data_r=data;
 
-  // create the view of the data
-//   DataArrayView tempView(m_data,shape);
-//   setPointDataView(tempView);
+  // The above code looks like it will create a map the wrong way around
+
+  int valsize=DataTypes::noValues(shape);
+  int npoints=(data.size()/valsize)-1;
+  int ntags=tags.size();
+  if (ntags>npoints)
+  {     // This throw is not unit tested yet
+        throw DataException("Programming error - Too many tags for the supplied values.");
+  }
 
   // create the tag lookup map
+  // we assume that the first value is the default value so we skip it (hence the i+1 below)
+  for (int i=0;i<ntags;++i)
+  {
+    m_offsetLookup.insert(DataMapType::value_type(tags[i],(i+1)*valsize));
+  }
+}
+
+
+DataTagged::DataTagged(const FunctionSpace& what,
+                       const DataTypes::ShapeType &shape,
+                       const TagListType& tags,
+                       const DataTypes::CplxVectorType& data)
+  : parent(what,shape)
+{
+  // alternative constructor
+  m_iscompl=true;
+  if (!what.canTag())
+  {
+    throw DataException("Programming error - DataTag created with a non-taggable FunctionSpace.");
+  }
 
-//   for (int sampleNo=0; sampleNo<getNumSamples(); sampleNo++) {
-//     m_offsetLookup.insert(DataMapType::value_type(sampleNo,tags[sampleNo]));
-//   }
+  // copy the data
+  m_data_c=data;
 
   // The above code looks like it will create a map the wrong way around
 
@@ -110,8 +157,8 @@ DataTagged::DataTagged(const FunctionSpace& what,
   int npoints=(data.size()/valsize)-1;
   int ntags=tags.size();
   if (ntags>npoints)
-  {		// This throw is not unit tested yet
-	throw DataException("Programming error - Too many tags for the supplied values.");
+  {             // This throw is not unit tested yet
+        throw DataException("Programming error - Too many tags for the supplied values.");
   }
 
   // create the tag lookup map
@@ -123,23 +170,21 @@ DataTagged::DataTagged(const FunctionSpace& what,
 }
 
 
+
 DataTagged::DataTagged(const DataTagged& other)
   : parent(other.getFunctionSpace(),other.getShape()),
   m_offsetLookup(other.m_offsetLookup),
-  m_data(other.m_data)
+  m_data_r(other.m_data_r), m_data_c(other.m_data_c)
 {
   // copy constructor
-
-  // create the data view
-//   DataArrayView temp(m_data,other.getPointDataView().getShape());
-//   setPointDataView(temp);
+    m_iscompl=other.m_iscompl;
 }
 
 DataTagged::DataTagged(const DataConstant& other)
   : parent(other.getFunctionSpace(),other.getShape())
 {
   // copy constructor
-
+  m_iscompl=other.isComplex();
   if (!other.getFunctionSpace().canTag())
   {
     throw DataException("Programming error - DataTag created with a non-taggable FunctionSpace.");
@@ -147,9 +192,21 @@ DataTagged::DataTagged(const DataConstant& other)
 
   // fill the default value with the constant value item from "other"
   int len = other.getNoValues();
-  m_data.resize(len,0.,len);
-  for (int i=0; i<len; i++) {
-    m_data[i]=other.getVectorRO()[i];
+  if (m_iscompl)
+  {
+      DataTypes::cplx_t dummy=0;
+      m_data_c.resize(len,0.,len);
+      for (int i=0; i<len; i++) {
+        m_data_c[i]=other.getTypedVectorRO(dummy)[i];
+      }
+  }
+  else
+  {
+      DataTypes::real_t dummy=0;
+      m_data_r.resize(len,0.,len);
+      for (int i=0; i<len; i++) {
+        m_data_r[i]=other.getTypedVectorRO(dummy)[i];
+      }
   }
 }
 
@@ -157,7 +214,7 @@ DataTagged::DataTagged(const DataConstant& other)
 // Create a new object by copying tags
 DataTagged::DataTagged(const FunctionSpace& what,
              const DataTypes::ShapeType& shape,
-	     const DataTypes::ValueType& defaultvalue,
+             const DataTypes::RealVectorType& defaultvalue,
              const DataTagged* tagsource)
  : parent(what,shape)
 {
@@ -175,26 +232,66 @@ DataTagged::DataTagged(const FunctionSpace& what,
 
   if (tagsource!=0)
   {
-       m_data.resize(defaultvalue.size(),0.);	// since this is tagged data, we should have blocksize=1
+       m_data_r.resize(defaultvalue.size(),0.); // since this is tagged data, we should have blocksize=1
 
        DataTagged::DataMapType::const_iterator i;
        for (i=tagsource->getTagLookup().begin();i!=tagsource->getTagLookup().end();i++) {
-	  addTag(i->first);
+          addTag(i->first);
        }
   }
   else
   {
-	m_data.resize(defaultvalue.size());
+        m_data_r.resize(defaultvalue.size());
   }
 
   // need to set the default value ....
   for (int i=0; i<defaultvalue.size(); i++) {
-     m_data[i]=defaultvalue[i];
+     m_data_r[i]=defaultvalue[i];
   }
 }
 
+// Create a new object by copying tags
+DataTagged::DataTagged(const FunctionSpace& what,
+             const DataTypes::ShapeType& shape,
+             const DataTypes::CplxVectorType& defaultvalue,
+             const DataTagged* tagsource)
+ : parent(what,shape)
+{
+// This constructor has not been unit tested yet
+  m_iscompl=true;
+  
+  if (defaultvalue.size()!=DataTypes::noValues(shape)) {
+    throw DataException("Programming error - defaultvalue does not match supplied shape.");
+  }
+
+  if (!what.canTag())
+  {
+    throw DataException("Programming error - DataTag created with a non-taggable FunctionSpace.");
+  }
+
+  if (tagsource!=0)
+  {
+       m_data_r.resize(defaultvalue.size(),0.); // since this is tagged data, we should have blocksize=1
+
+       DataTagged::DataMapType::const_iterator i;
+       for (i=tagsource->getTagLookup().begin();i!=tagsource->getTagLookup().end();i++) {
+          addTag(i->first);
+       }
+  }
+  else
+  {
+        m_data_r.resize(defaultvalue.size());
+  }
+
+  // need to set the default value ....
+  for (int i=0; i<defaultvalue.size(); i++) {
+     m_data_c[i]=defaultvalue[i];
+  }
+}
+
+
 DataAbstract*
-DataTagged::deepCopy()
+DataTagged::deepCopy() const
 {
   return new DataTagged(*this);
 }
@@ -206,11 +303,13 @@ DataTagged::getSlice(const DataTypes::RegionType& region) const
 }
 
 DataTagged::DataTagged(const DataTagged& other, 
-		       const DataTypes::RegionType& region)
+                       const DataTypes::RegionType& region)
   : parent(other.getFunctionSpace(),DataTypes::getResultSliceShape(region))
 {
   // slice constructor
-
+  m_iscompl=other.isComplex();
+  
+  
   // get the shape of the slice to copy from other
   DataTypes::ShapeType regionShape(DataTypes::getResultSliceShape(region));
   DataTypes::RegionLoopRangeType regionLoopRange=DataTypes::getSliceRegionLoopRange(region);
@@ -218,20 +317,41 @@ DataTagged::DataTagged(const DataTagged& other,
   // allocate enough space in this for all values
   // (need to add one to allow for the default value)
   int len = DataTypes::noValues(regionShape)*(other.m_offsetLookup.size()+1);
-  m_data.resize(len,0.0,len);
-
-  // copy the default value from other to this
-  const DataTypes::ShapeType& otherShape=other.getShape();
-  const DataTypes::ValueType& otherData=other.getVectorRO();
-  DataTypes::copySlice(getVectorRW(),getShape(),getDefaultOffset(),otherData,otherShape,other.getDefaultOffset(), regionLoopRange);
-
-  // loop through the tag values copying these
-  DataMapType::const_iterator pos;
-  DataTypes::ValueType::size_type tagOffset=getNoValues();
-  for (pos=other.m_offsetLookup.begin();pos!=other.m_offsetLookup.end();pos++){
-    DataTypes::copySlice(m_data,getShape(),tagOffset,otherData, otherShape, pos->second, regionLoopRange);
-    m_offsetLookup.insert(DataMapType::value_type(pos->first,tagOffset));
-    tagOffset+=getNoValues();
+  if (m_iscompl)
+  {
+      m_data_c.resize(len,0.0,len);
+      // copy the default value from other to this
+      const DataTypes::ShapeType& otherShape=other.getShape();
+      const DataTypes::CplxVectorType& otherData=other.getTypedVectorRO((DataTypes::cplx_t)0);
+      DataTypes::copySlice(getTypedVectorRW((DataTypes::cplx_t)0),getShape(),getDefaultOffset(),otherData,otherShape,other.getDefaultOffset(), regionLoopRange);
+
+      // loop through the tag values copying these
+      DataMapType::const_iterator pos;
+      DataTypes::CplxVectorType::size_type tagOffset=getNoValues();
+      for (pos=other.m_offsetLookup.begin();pos!=other.m_offsetLookup.end();pos++){
+        DataTypes::copySlice(m_data_c,getShape(),tagOffset,otherData, otherShape, pos->second, regionLoopRange);
+        m_offsetLookup.insert(DataMapType::value_type(pos->first,tagOffset));
+        tagOffset+=getNoValues();
+      }      
+      
+      
+  }
+  else
+  {
+      m_data_r.resize(len,0.0,len);    
+      // copy the default value from other to this
+      const DataTypes::ShapeType& otherShape=other.getShape();
+      const DataTypes::RealVectorType& otherData=other.getTypedVectorRO((DataTypes::real_t)0);
+      DataTypes::copySlice(getTypedVectorRW((DataTypes::real_t)0),getShape(),getDefaultOffset(),otherData,otherShape,other.getDefaultOffset(), regionLoopRange);
+
+      // loop through the tag values copying these
+      DataMapType::const_iterator pos;
+      DataTypes::RealVectorType::size_type tagOffset=getNoValues();
+      for (pos=other.m_offsetLookup.begin();pos!=other.m_offsetLookup.end();pos++){
+        DataTypes::copySlice(m_data_r,getShape(),tagOffset,otherData, otherShape, pos->second, regionLoopRange);
+        m_offsetLookup.insert(DataMapType::value_type(pos->first,tagOffset));
+        tagOffset+=getNoValues();
+      }      
   }
 }
 
@@ -246,8 +366,13 @@ DataTagged::setSlice(const DataAbstract* other,
   if (otherTemp==0) {
     throw DataException("Programming error - casting to DataTagged.");
   }
-
+  if (isComplex()!=other->isComplex())
+  {
+    throw DataException("Error - cannot copy between slices of different complexity.");
+  }
   CHECK_FOR_EX_WRITE
+  
+  
 
   // determine shape of the specified region
   DataTypes::ShapeType regionShape(DataTypes::getResultSliceShape(region));
@@ -264,10 +389,20 @@ DataTagged::setSlice(const DataAbstract* other,
                          "Error - Couldn't copy slice due to shape mismatch.",regionShape,other->getShape()));
   }
 
-  const DataTypes::ValueType& otherData=otherTemp->getVectorRO();
+
   const DataTypes::ShapeType& otherShape=otherTemp->getShape();
-  // copy slice from other default value to this default value
-  DataTypes::copySliceFrom(m_data,getShape(),getDefaultOffset(),otherData,otherShape,otherTemp->getDefaultOffset(),regionLoopRange);
+  if (isComplex())      // from check earlier, other will have the same complexity
+  {
+      // copy slice from other default value to this default value
+      DataTypes::copySliceFrom(m_data_c,getShape(),getDefaultOffset(),otherTemp->getTypedVectorRO((DataTypes::cplx_t)0),
+                               otherShape,otherTemp->getDefaultOffset(),regionLoopRange);
+  } 
+  else
+  {
+      // copy slice from other default value to this default value
+      DataTypes::copySliceFrom(m_data_r,getShape(),getDefaultOffset(),otherTemp->getTypedVectorRO((DataTypes::real_t)0),
+                               otherShape,otherTemp->getDefaultOffset(),regionLoopRange);
+  }
 
   // loop through tag values in other, adding any which aren't in this, using default value
   DataMapType::const_iterator pos;
@@ -276,13 +411,22 @@ DataTagged::setSlice(const DataAbstract* other,
       addTag(pos->first);
     }
   }
-
-  // loop through the tag values copying slices from other to this
-  for (pos=m_offsetLookup.begin();pos!=m_offsetLookup.end();pos++) {
-    DataTypes::copySliceFrom(m_data,getShape(),getOffsetForTag(pos->first),otherData, otherShape, otherTemp->getOffsetForTag(pos->first), regionLoopRange);
-
+  if (isComplex())
+  {
+    // loop through the tag values copying slices from other to this
+    for (pos=m_offsetLookup.begin();pos!=m_offsetLookup.end();pos++) {
+      DataTypes::copySliceFrom(m_data_c,getShape(),getOffsetForTag(pos->first),otherTemp->getTypedVectorRO((DataTypes::cplx_t)0),
+                               otherShape, otherTemp->getOffsetForTag(pos->first), regionLoopRange);
+    }
+  }
+  else
+  {
+    // loop through the tag values copying slices from other to this
+    for (pos=m_offsetLookup.begin();pos!=m_offsetLookup.end();pos++) {
+      DataTypes::copySliceFrom(m_data_r,getShape(),getOffsetForTag(pos->first),otherTemp->getTypedVectorRO((DataTypes::real_t)0),
+                               otherShape, otherTemp->getOffsetForTag(pos->first), regionLoopRange);
+    }
   }
-
 }
 
 int
@@ -317,14 +461,46 @@ DataTagged::getTagNumber(int dpno)
 
 void
 DataTagged::setTaggedValue(int tagKey,
-			   const DataTypes::ShapeType& pointshape,
-                           const ValueType& value,
-			   int dataOffset)
+                           const DataTypes::ShapeType& pointshape,
+                           const DataTypes::RealVectorType& value,
+                           int dataOffset)
+{
+  if (!DataTypes::checkShape(getShape(), pointshape)) {
+      throw DataException(DataTypes::createShapeErrorMessage(
+                          "Error - Cannot setTaggedValue due to shape mismatch.", pointshape,getShape()));
+  }
+  if (isComplex())
+  {
+      throw DataException("Programming Error - attempt to set real value on complex data.");
+  }
+  CHECK_FOR_EX_WRITE
+  DataMapType::iterator pos(m_offsetLookup.find(tagKey));
+  if (pos==m_offsetLookup.end()) {
+    // tag couldn't be found so use addTaggedValue
+    addTaggedValue(tagKey,pointshape, value, dataOffset);
+  } else {
+    // copy the values into the data array at the offset determined by m_offsetLookup
+    int offset=pos->second;
+    for (unsigned int i=0; i<getNoValues(); i++) {
+      m_data_r[offset+i]=value[i+dataOffset];
+    }
+  }
+}
+
+void
+DataTagged::setTaggedValue(int tagKey,
+                           const DataTypes::ShapeType& pointshape,
+                           const DataTypes::CplxVectorType& value,
+                           int dataOffset)
 {
   if (!DataTypes::checkShape(getShape(), pointshape)) {
       throw DataException(DataTypes::createShapeErrorMessage(
                           "Error - Cannot setTaggedValue due to shape mismatch.", pointshape,getShape()));
   }
+  if (!isComplex())
+  {
+      throw DataException("Programming Error - attempt to set a complex value on real data");
+  }
   CHECK_FOR_EX_WRITE
   DataMapType::iterator pos(m_offsetLookup.find(tagKey));
   if (pos==m_offsetLookup.end()) {
@@ -334,7 +510,7 @@ DataTagged::setTaggedValue(int tagKey,
     // copy the values into the data array at the offset determined by m_offsetLookup
     int offset=pos->second;
     for (unsigned int i=0; i<getNoValues(); i++) {
-      m_data[offset+i]=value[i+dataOffset];
+      m_data_c[offset+i]=value[i+dataOffset];
     }
   }
 }
@@ -342,13 +518,13 @@ DataTagged::setTaggedValue(int tagKey,
 
 void
 DataTagged::addTaggedValues(const TagListType& tagKeys,
-                            const ValueBatchType& values,
+                            const FloatBatchType& values,
                             const ShapeType& vShape)
 {
-  DataTypes::ValueType t(values.size(),0);
+  DataTypes::RealVectorType t(values.size(),0);
   for (size_t i=0;i<values.size();++i)
   {
-	t[i]=values[i];
+        t[i]=values[i];
   }
   addTaggedValues(tagKeys,t,vShape);
 }
@@ -357,7 +533,7 @@ DataTagged::addTaggedValues(const TagListType& tagKeys,
 // Note: The check to see if vShape==our shape is done in the addTaggedValue method
 void
 DataTagged::addTaggedValues(const TagListType& tagKeys,
-                            const ValueType& values,
+                            const DataTypes::RealVectorType& values,
                             const ShapeType& vShape)
 {
   unsigned int n=getNoValues();
@@ -379,7 +555,7 @@ DataTagged::addTaggedValues(const TagListType& tagKeys,
     if (tagKeys.size()!=numVals) {
       stringstream temp;
       temp << "Error - (addTaggedValue) Number of tags: " << tagKeys.size()
-	   << " doesn't match number of values: " << values.size();
+           << " doesn't match number of values: " << values.size();
       throw DataException(temp.str());
     } else {
       unsigned int i;
@@ -396,14 +572,18 @@ DataTagged::addTaggedValues(const TagListType& tagKeys,
 
 void
 DataTagged::addTaggedValue(int tagKey,
-			   const DataTypes::ShapeType& pointshape,
-                           const ValueType& value,
-			   int dataOffset)
+                           const DataTypes::ShapeType& pointshape,
+                           const DataTypes::RealVectorType& value,
+                           int dataOffset)
 {
   if (!DataTypes::checkShape(getShape(), pointshape)) {
     throw DataException(DataTypes::createShapeErrorMessage(
                         "Error - Cannot addTaggedValue due to shape mismatch.", pointshape,getShape()));
   }
+  if (isComplex())
+  {
+      throw DataException("Programming Error - attempt to set a real value on complex data");
+  }
   CHECK_FOR_EX_WRITE
   DataMapType::iterator pos(m_offsetLookup.find(tagKey));
   if (pos!=m_offsetLookup.end()) {
@@ -411,62 +591,133 @@ DataTagged::addTaggedValue(int tagKey,
     setTaggedValue(tagKey,pointshape, value, dataOffset);
   } else {
     // save the key and the location of its data in the lookup tab
-    m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data.size()));
-    // add the data given in "value" at the end of m_data
-    // need to make a temp copy of m_data, resize m_data, then copy
-    // all the old values plus the value to be added back into m_data
-    ValueType m_data_temp(m_data);
-    int oldSize=m_data.size();
-    int newSize=m_data.size()+getNoValues();
-    m_data.resize(newSize,0.,newSize);
+    m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data_r.size()));
+    // add the data given in "value" at the end of m_data_r
+    // need to make a temp copy of m_data_r, resize m_data_r, then copy
+    // all the old values plus the value to be added back into m_data_r
+    DataTypes::RealVectorType m_data_r_temp(m_data_r);
+    int oldSize=m_data_r.size();
+    int newSize=m_data_r.size()+getNoValues();
+    m_data_r.resize(newSize,0.,newSize);
     for (int i=0;i<oldSize;i++) {
-      m_data[i]=m_data_temp[i];
+      m_data_r[i]=m_data_r_temp[i];
     }
     for (unsigned int i=0;i<getNoValues();i++) {
-      m_data[oldSize+i]=value[i+dataOffset];
+      m_data_r[oldSize+i]=value[i+dataOffset];
     }
   }
 }
 
+
 void
-DataTagged::addTag(int tagKey)
+DataTagged::addTaggedValue(int tagKey,
+                           const DataTypes::ShapeType& pointshape,
+                           const DataTypes::CplxVectorType& value,
+                           int dataOffset)
 {
+  if (!DataTypes::checkShape(getShape(), pointshape)) {
+    throw DataException(DataTypes::createShapeErrorMessage(
+                        "Error - Cannot addTaggedValue due to shape mismatch.", pointshape,getShape()));
+  }
+  if (!isComplex())
+  {
+      throw DataException("Programming error - attempt to set a complex value on real data.");
+  }
   CHECK_FOR_EX_WRITE
   DataMapType::iterator pos(m_offsetLookup.find(tagKey));
   if (pos!=m_offsetLookup.end()) {
     // tag already exists so use setTaggedValue
-//    setTaggedValue(tagKey,value);
+    setTaggedValue(tagKey,pointshape, value, dataOffset);
   } else {
     // save the key and the location of its data in the lookup tab
-    m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data.size()));
-    // add the data given in "value" at the end of m_data
-    // need to make a temp copy of m_data, resize m_data, then copy
-    // all the old values plus the value to be added back into m_data
-    ValueType m_data_temp(m_data);
-    int oldSize=m_data.size();
-    int newSize=m_data.size()+getNoValues();
-    m_data.resize(newSize,0.,newSize);
+    m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data_c.size()));
+    // add the data given in "value" at the end of m_data_c
+    // need to make a temp copy of m_data_c, resize m_data_c, then copy
+    // all the old values plus the value to be added back into m_data_c
+    DataTypes::CplxVectorType m_data_c_temp(m_data_c);
+    int oldSize=m_data_c.size();
+    int newSize=m_data_c.size()+getNoValues();
+    m_data_c.resize(newSize,0.,newSize);
     for (int i=0;i<oldSize;i++) {
-      m_data[i]=m_data_temp[i];
+      m_data_c[i]=m_data_c_temp[i];
     }
     for (unsigned int i=0;i<getNoValues();i++) {
-      m_data[oldSize+i]=m_data[m_defaultValueOffset+i];
+      m_data_c[oldSize+i]=value[i+dataOffset];
     }
   }
 }
 
+void
+DataTagged::addTag(int tagKey)
+{
+  CHECK_FOR_EX_WRITE
+  DataMapType::iterator pos(m_offsetLookup.find(tagKey));
+  if (pos==m_offsetLookup.end()) {
+    if (isComplex())
+    {
+	// save the key and the location of its data in the lookup tab
+	m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data_c.size()));
+	// add the data given in "value" at the end of m_data_c
+	// need to make a temp copy of m_data_c, resize m_data_c, then copy
+	// all the old values plus the value to be added back into m_data_c
+	DataTypes::CplxVectorType m_data_c_temp(m_data_c);
+	int oldSize=m_data_c.size();
+	int newSize=m_data_c.size()+getNoValues();
+	m_data_c.resize(newSize,0.,newSize);
+	for (int i=0;i<oldSize;i++) {
+	  m_data_c[i]=m_data_c_temp[i];
+	}
+	for (unsigned int i=0;i<getNoValues();i++) {
+	  m_data_c[oldSize+i]=m_data_c[m_defaultValueOffset+i];
+	}
+    }
+    else
+    {
+	// save the key and the location of its data in the lookup tab
+	m_offsetLookup.insert(DataMapType::value_type(tagKey,m_data_r.size()));
+	// add the data given in "value" at the end of m_data_r
+	// need to make a temp copy of m_data_r, resize m_data_r, then copy
+	// all the old values plus the value to be added back into m_data_r
+	DataTypes::RealVectorType m_data_r_temp(m_data_r);
+	int oldSize=m_data_r.size();
+	int newSize=m_data_r.size()+getNoValues();
+	m_data_r.resize(newSize,0.,newSize);
+	for (int i=0;i<oldSize;i++) {
+	  m_data_r[i]=m_data_r_temp[i];
+	}
+	for (unsigned int i=0;i<getNoValues();i++) {
+	  m_data_r[oldSize+i]=m_data_r[m_defaultValueOffset+i];
+	}
+    }
+  }
+}
+
+
+DataTypes::real_t*
+DataTagged::getSampleDataByTag(int tag, DataTypes::real_t dummy)
+{
+  CHECK_FOR_EX_WRITE
+  DataMapType::iterator pos(m_offsetLookup.find(tag));
+  if (pos==m_offsetLookup.end()) {
+    // tag couldn't be found so return the default value
+    return &(m_data_r[0]);
+  } else {
+    // return the data-point corresponding to the given tag
+    return &(m_data_r[pos->second]);
+  }
+}
 
-double*
-DataTagged::getSampleDataByTag(int tag)
+DataTypes::cplx_t*
+DataTagged::getSampleDataByTag(int tag, DataTypes::cplx_t dummy)
 {
   CHECK_FOR_EX_WRITE
   DataMapType::iterator pos(m_offsetLookup.find(tag));
   if (pos==m_offsetLookup.end()) {
     // tag couldn't be found so return the default value
-    return &(m_data[0]);
+    return &(m_data_c[0]);
   } else {
     // return the data-point corresponding to the given tag
-    return &(m_data[pos->second]);
+    return &(m_data_c[pos->second]);
   }
 }
 
@@ -475,29 +726,82 @@ bool
 DataTagged::hasNaN() const
 {
   bool haveNaN=false;
-  #pragma omp parallel for
-	for (ValueType::size_type i=0;i<m_data.size();++i)
-	{
-		if (nancheck(m_data[i]))	// can't assume we have new standard NaN checking
-		{
-        #pragma omp critical 
+  if (isComplex())
+  {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+          if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))
+          {
+              #pragma omp critical 
+              {
+                  haveNaN=true;
+              }
+          }
+      }
+  }
+  else
+  {
+      #pragma omp parallel for
+      for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
+      {
+          if (std::isnan(m_data_r[i]))
+          {
+              #pragma omp critical 
+              {
+                  haveNaN=true;
+              }
+          }
+      }
+  }
+  return haveNaN;
+}
+
+void
+DataTagged::replaceNaN(double value) {
+  CHECK_FOR_EX_WRITE  
+  if (isComplex())
+  {
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+        if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag()))  
         {
-            haveNaN=true;
+          m_data_c[i] = value;
         }
-		}
-	}
-	return haveNaN;
+      }
+  }
+  else
+  {
+      #pragma omp parallel for
+      for (DataTypes::RealVectorType::size_type i=0;i<m_data_r.size();++i)
+      {
+        if (std::isnan(m_data_r[i]))  
+        {
+          m_data_r[i] = value;
+        }
+      }    
+  }
 }
 
 void
-DataTagged::replaceNaN(double value) {
-  #pragma omp parallel for
-  for (ValueType::size_type i=0;i<m_data.size();++i)
+DataTagged::replaceNaN(DataTypes::cplx_t value) {
+  CHECK_FOR_EX_WRITE  
+  if (isComplex())
   {
-    if (nancheck(m_data[i]))  
-    {
-      m_data[i] = value;
-    }
+      #pragma omp parallel for
+      for (DataTypes::CplxVectorType::size_type i=0;i<m_data_c.size();++i)
+      {
+        if (std::isnan(m_data_c[i].real()) || std::isnan(m_data_c[i].imag())) 
+        {
+          m_data_c[i] = value;
+        }
+      }
+  }
+  else
+  {
+      complicate();
+      replaceNaN(value);
   }
 }
 
@@ -510,76 +814,96 @@ DataTagged::toString() const
   stringstream temp;
   DataMapType::const_iterator i;
   temp << "Tag(Default)" << endl;
-  temp << pointToString(m_data,getShape(),getDefaultOffset(),empty) << endl;
-  // create a temporary view as the offset will be changed
-//   DataArrayView tempView(getPointDataView().getData(), getPointDataView().getShape());
-  for (i=m_offsetLookup.begin();i!=m_offsetLookup.end();++i) {
-    temp << "Tag(" << i->first << ")" << endl;
-    temp << pointToString(m_data,getShape(),i->second,empty) << endl;
-//     tempView.setOffset(i->second);
-//     temp << tempView.toString() << endl;
+  
+  if (isComplex())
+  {
+  
+      temp << pointToString(m_data_c,getShape(),getDefaultOffset(),empty) << endl;
+      for (i=m_offsetLookup.begin();i!=m_offsetLookup.end();++i) {
+        temp << "Tag(" << i->first << ")" << endl;
+        temp << pointToString(m_data_c,getShape(),i->second,empty) << endl;
+      }
+  }
+  else
+  {
+  
+      temp << pointToString(m_data_r,getShape(),getDefaultOffset(),empty) << endl;
+      for (i=m_offsetLookup.begin();i!=m_offsetLookup.end();++i) {
+        temp << "Tag(" << i->first << ")" << endl;
+        temp << pointToString(m_data_r,getShape(),i->second,empty) << endl;
+      }    
   }
   return temp.str();
 }
 
-DataTypes::ValueType::size_type 
+DataTypes::RealVectorType::size_type 
 DataTagged::getPointOffset(int sampleNo,
                            int dataPointNo) const
 {
   int tagKey=getFunctionSpace().getTagFromSampleNo(sampleNo);
   DataMapType::const_iterator pos(m_offsetLookup.find(tagKey));
-  DataTypes::ValueType::size_type offset=m_defaultValueOffset;
+  DataTypes::RealVectorType::size_type offset=m_defaultValueOffset;
   if (pos!=m_offsetLookup.end()) {
     offset=pos->second;
   }
   return offset;
 }
 
-DataTypes::ValueType::size_type 
-DataTagged::getPointOffset(int sampleNo,
-                           int dataPointNo)
+DataTypes::RealVectorType::size_type
+DataTagged::getOffsetForTag(int tag) const
 {
-  int tagKey=getFunctionSpace().getTagFromSampleNo(sampleNo);
-  DataMapType::const_iterator pos(m_offsetLookup.find(tagKey));
-  DataTypes::ValueType::size_type offset=m_defaultValueOffset;
+  DataMapType::const_iterator pos(m_offsetLookup.find(tag));
+  DataTypes::RealVectorType::size_type offset=m_defaultValueOffset;
   if (pos!=m_offsetLookup.end()) {
     offset=pos->second;
   }
   return offset;
 }
 
-DataTypes::ValueType::size_type
-DataTagged::getOffsetForTag(int tag) const
+DataTypes::RealVectorType::const_reference
+DataTagged::getDataByTagRO(int tag, DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy) const
 {
   DataMapType::const_iterator pos(m_offsetLookup.find(tag));
-  DataTypes::ValueType::size_type offset=m_defaultValueOffset;
+  DataTypes::RealVectorType::size_type offset=m_defaultValueOffset;
   if (pos!=m_offsetLookup.end()) {
     offset=pos->second;
   }
-  return offset;
+  return m_data_r[offset+i];
 }
 
-DataTypes::ValueType::const_reference
-DataTagged::getDataByTagRO(int tag, DataTypes::ValueType::size_type i) const
+DataTypes::RealVectorType::reference
+DataTagged::getDataByTagRW(int tag, DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy)
 {
+  CHECK_FOR_EX_WRITE
   DataMapType::const_iterator pos(m_offsetLookup.find(tag));
-  DataTypes::ValueType::size_type offset=m_defaultValueOffset;
+  DataTypes::RealVectorType::size_type offset=m_defaultValueOffset;
   if (pos!=m_offsetLookup.end()) {
     offset=pos->second;
   }
-  return m_data[offset+i];
+  return m_data_r[offset+i];
 }
 
-DataTypes::ValueType::reference
-DataTagged::getDataByTagRW(int tag, DataTypes::ValueType::size_type i)
+DataTypes::CplxVectorType::const_reference
+DataTagged::getDataByTagRO(int tag, DataTypes::RealVectorType::size_type i, DataTypes::cplx_t dummy) const
+{
+  DataMapType::const_iterator pos(m_offsetLookup.find(tag));
+  DataTypes::CplxVectorType::size_type offset=m_defaultValueOffset;
+  if (pos!=m_offsetLookup.end()) {
+    offset=pos->second;
+  }
+  return m_data_c[offset+i];
+}
+
+DataTypes::CplxVectorType::reference
+DataTagged::getDataByTagRW(int tag, DataTypes::RealVectorType::size_type i, DataTypes::cplx_t dummy)
 {
   CHECK_FOR_EX_WRITE
   DataMapType::const_iterator pos(m_offsetLookup.find(tag));
-  DataTypes::ValueType::size_type offset=m_defaultValueOffset;
+  DataTypes::CplxVectorType::size_type offset=m_defaultValueOffset;
   if (pos!=m_offsetLookup.end()) {
     offset=pos->second;
   }
-  return m_data[offset+i];
+  return m_data_c[offset+i];
 }
 
 void
@@ -592,37 +916,119 @@ DataTagged::symmetric(DataAbstract* ev)
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
+
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::symmetric(m_data_c,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::symmetric(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getTypedVectorRW(0.0);
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::symmetric(m_data_r,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::symmetric(m_data_r,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
+  }
+}
+
+
+void
+DataTagged::antisymmetric(DataAbstract* ev)
+{
+  DataTagged* temp_ev=dynamic_cast<DataTagged*>(ev);
+  if (temp_ev==0) {
+    throw DataException("Error - DataTagged::antisymmetric casting to DataTagged failed (probably a programming error).");
+  }
+  const DataTagged::DataMapType& thisLookup=getTagLookup();
+  DataTagged::DataMapType::const_iterator i;
+  DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
+  const ShapeType& evShape=temp_ev->getShape();
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::antisymmetric(m_data_c,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::antisymmetric(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getTypedVectorRW(0.0);
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::antisymmetric(m_data_r,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::antisymmetric(m_data_r,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
+  }  
+}
+
+void
+DataTagged::hermitian(DataAbstract* ev)
+{
+  DataTagged* temp_ev=dynamic_cast<DataTagged*>(ev);
+  if (temp_ev==0) {
+    throw DataException("Error - DataTagged::hermitian casting to DataTagged failed (probably a programming error).");
+  }
+  if (!isComplex() || !temp_ev->isComplex())
+  {
+      throw DataException("DataTagged::hermitian: do not call this method with real data");
+  }  
+  
+  const DataTagged::DataMapType& thisLookup=getTagLookup();
+  DataTagged::DataMapType::const_iterator i;
+  DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
+  const ShapeType& evShape=temp_ev->getShape();
+
+  DataTypes::CplxVectorType& evVec=temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
   for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
       temp_ev->addTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::symmetric(m_data,getShape(),offset,evVec, evShape, evoffset);
+      DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+      DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+      escript::hermitian(m_data_c,getShape(),offset,evVec, evShape, evoffset);
   }
-  DataMaths::symmetric(m_data,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());
+  escript::hermitian(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
 }
 
 
 void
-DataTagged::nonsymmetric(DataAbstract* ev)
+DataTagged::antihermitian(DataAbstract* ev)
 {
   DataTagged* temp_ev=dynamic_cast<DataTagged*>(ev);
   if (temp_ev==0) {
-    throw DataException("Error - DataTagged::nonsymmetric casting to DataTagged failed (probably a programming error).");
+    throw DataException("Error - DataTagged::antihermitian casting to DataTagged failed (probably a programming error).");
   }
+  if (!isComplex() || !temp_ev->isComplex())
+  {
+      throw DataException("DataTagged::antihermitian: do not call this method with real data");
+  }  
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
+  DataTypes::CplxVectorType& evVec=temp_ev->getTypedVectorRW((DataTypes::cplx_t)0);
   for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
       temp_ev->addTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::nonsymmetric(m_data,getShape(),offset,evVec, evShape, evoffset);
+      DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+      DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+      escript::antihermitian(m_data_c,getShape(),offset,evVec, evShape, evoffset);
   }
-  DataMaths::nonsymmetric(m_data,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());
+  escript::antihermitian(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset());      
 }
 
 
@@ -636,15 +1042,29 @@ DataTagged::trace(DataAbstract* ev, int axis_offset)
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
-      temp_ev->addTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::trace(m_data,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::trace(m_data_c,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+      }
+      escript::trace(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::trace(m_data_r,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+      }
+      escript::trace(m_data_r,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
   }
-  DataMaths::trace(m_data,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
 }
 
 void
@@ -657,15 +1077,29 @@ DataTagged::transpose(DataAbstract* ev, int axis_offset)
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
-      temp_ev->addTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::transpose(m_data,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::transpose(m_data_c,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+      }
+      escript::transpose(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::transpose(m_data_r,getShape(),offset,evVec, evShape, evoffset, axis_offset);
+      }
+      escript::transpose(m_data_r,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
   }
-  DataMaths::transpose(m_data,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis_offset);
 }
 
 void
@@ -678,15 +1112,29 @@ DataTagged::swapaxes(DataAbstract* ev, int axis0, int axis1)
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
-      temp_ev->addTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::swapaxes(m_data,getShape(),offset,evVec, evShape, evoffset,axis0,axis1);
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::swapaxes(m_data_c,getShape(),offset,evVec, evShape, evoffset,axis0,axis1);
+      }
+      escript::swapaxes(m_data_c,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis0,axis1);    
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();  
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::swapaxes(m_data_r,getShape(),offset,evVec, evShape, evoffset,axis0,axis1);
+      }
+      escript::swapaxes(m_data_r,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis0,axis1);
   }
-  DataMaths::swapaxes(m_data,getShape(),getDefaultOffset(),evVec,evShape,temp_ev->getDefaultOffset(),axis0,axis1);
 }
 
 void
@@ -699,17 +1147,31 @@ DataTagged::eigenvalues(DataAbstract* ev)
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
-      temp_ev->addTag(i->first);
-//       DataArrayView thisView=getDataPointByTag(i->first);
-//       DataArrayView evView=temp_ev->getDataPointByTag(i->first);
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataMaths::eigenvalues(m_data,getShape(),offset,evVec, evShape, evoffset);
+  if (isComplex())
+  {
+      DataTypes::CplxVectorType& evVec=temp_ev->getVectorRWC();
+
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::CplxVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::CplxVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::eigenvalues(m_data_c,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::eigenvalues(m_data_c,getShape(),getDefaultOffset(),evVec, evShape, temp_ev->getDefaultOffset());
+  }
+  else
+  {
+      DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();
+
+      for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
+	  temp_ev->addTag(i->first);
+	  DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+	  DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+	  escript::eigenvalues(m_data_r,getShape(),offset,evVec, evShape, evoffset);
+      }
+      escript::eigenvalues(m_data_r,getShape(),getDefaultOffset(),evVec, evShape, temp_ev->getDefaultOffset());
   }
-  DataMaths::eigenvalues(m_data,getShape(),getDefaultOffset(),evVec, evShape, temp_ev->getDefaultOffset());
 }
 void
 DataTagged::eigenvalues_and_eigenvectors(DataAbstract* ev,DataAbstract* V,const double tol)
@@ -725,9 +1187,9 @@ DataTagged::eigenvalues_and_eigenvectors(DataAbstract* ev,DataAbstract* V,const
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& evVec=temp_ev->getVectorRW();
+  DataTypes::RealVectorType& evVec=temp_ev->getVectorRW();
   const ShapeType& evShape=temp_ev->getShape();
-  ValueType& VVec=temp_V->getVectorRW();
+  DataTypes::RealVectorType& VVec=temp_V->getVectorRW();
   const ShapeType& VShape=temp_V->getShape();
   for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
       temp_ev->addTag(i->first);
@@ -735,16 +1197,16 @@ DataTagged::eigenvalues_and_eigenvectors(DataAbstract* ev,DataAbstract* V,const
 /*      DataArrayView thisView=getDataPointByTag(i->first);
       DataArrayView evView=temp_ev->getDataPointByTag(i->first);
       DataArrayView VView=temp_V->getDataPointByTag(i->first);*/
-      DataTypes::ValueType::size_type offset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type Voffset=temp_V->getOffsetForTag(i->first);
+      DataTypes::RealVectorType::size_type offset=getOffsetForTag(i->first);
+      DataTypes::RealVectorType::size_type evoffset=temp_ev->getOffsetForTag(i->first);
+      DataTypes::RealVectorType::size_type Voffset=temp_V->getOffsetForTag(i->first);
 /*      DataArrayView::eigenvalues_and_eigenvectors(thisView,0,evView,0,VView,0,tol);*/
-      DataMaths::eigenvalues_and_eigenvectors(m_data,getShape(),offset,evVec, evShape, evoffset,VVec,VShape,Voffset,tol);
+      escript::eigenvalues_and_eigenvectors(m_data_r,getShape(),offset,evVec, evShape, evoffset,VVec,VShape,Voffset,tol);
 
   }
-  DataMaths::eigenvalues_and_eigenvectors(m_data,getShape(),getDefaultOffset(),evVec, evShape,
-					  temp_ev->getDefaultOffset(),VVec,VShape,
-					  temp_V->getDefaultOffset(), tol);
+  escript::eigenvalues_and_eigenvectors(m_data_r,getShape(),getDefaultOffset(),evVec, evShape,
+                                          temp_ev->getDefaultOffset(),VVec,VShape,
+                                          temp_V->getDefaultOffset(), tol);
 
 
 }
@@ -755,30 +1217,30 @@ DataTagged::matrixInverse(DataAbstract* out) const
   DataTagged* temp=dynamic_cast<DataTagged*>(out);
   if (temp==0)
   {
-	throw DataException("Error - DataTagged::matrixInverse: casting to DataTagged failed (probably a programming error).");
+        throw DataException("Error - DataTagged::matrixInverse: casting to DataTagged failed (probably a programming error).");
   }
   if (getRank()!=2)
   {
-	throw DataException("Error - DataExpanded::matrixInverse: input must be rank 2.");
+        throw DataException("Error - DataExpanded::matrixInverse: input must be rank 2.");
   }
   const DataTagged::DataMapType& thisLookup=getTagLookup();
   DataTagged::DataMapType::const_iterator i;
   DataTagged::DataMapType::const_iterator thisLookupEnd=thisLookup.end();
-  ValueType& outVec=temp->getVectorRW();
+  DataTypes::RealVectorType& outVec=temp->getVectorRW();
   const ShapeType& outShape=temp->getShape();
   LapackInverseHelper h(getShape()[0]);
   int err=0;
   for (i=thisLookup.begin();i!=thisLookupEnd;i++) {
       temp->addTag(i->first);
-      DataTypes::ValueType::size_type inoffset=getOffsetForTag(i->first);
-      DataTypes::ValueType::size_type outoffset=temp->getOffsetForTag(i->first);
+      DataTypes::RealVectorType::size_type inoffset=getOffsetForTag(i->first);
+      DataTypes::RealVectorType::size_type outoffset=temp->getOffsetForTag(i->first);
 
-      err=DataMaths::matrix_inverse(m_data, getShape(), inoffset, outVec, outShape, outoffset, 1, h);
+      err=escript::matrix_inverse(m_data_r, getShape(), inoffset, outVec, outShape, outoffset, 1, h);
       if (!err) break;
   }
   if (!err)
   {
-      DataMaths::matrix_inverse(m_data, getShape(), getDefaultOffset(), outVec, outShape, temp->getDefaultOffset(), 1, h);
+      escript::matrix_inverse(m_data_r, getShape(), getDefaultOffset(), outVec, outShape, temp->getDefaultOffset(), 1, h);
   }
   return err;
 }
@@ -786,14 +1248,14 @@ DataTagged::matrixInverse(DataAbstract* out) const
 void
 DataTagged::setToZero(){
     CHECK_FOR_EX_WRITE
-    DataTypes::ValueType::size_type n=m_data.size();
-    for (int i=0; i<n ;++i) m_data[i]=0.;
+    DataTypes::RealVectorType::size_type n=m_data_r.size();
+    for (int i=0; i<n ;++i) m_data_r[i]=0.;
 }
 
 void
 DataTagged::dump(const std::string fileName) const
 {
-   #ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
    const int ldims=DataTypes::maxRank+1;
    const NcDim* ncdims[ldims];
    NcVar *var, *tags_var;
@@ -801,24 +1263,23 @@ DataTagged::dump(const std::string fileName) const
    int type=  getFunctionSpace().getTypeCode();
    int ndims =0;
    long dims[ldims];
-   const double* d_ptr=&(m_data[0]);
+   const double* d_ptr=&(m_data_r[0]);
    DataTypes::ShapeType shape = getShape();
-   int mpi_iam=getFunctionSpace().getDomain()->getMPIRank();
-   int mpi_num=getFunctionSpace().getDomain()->getMPISize();
+   JMPI mpiInfo(getFunctionSpace().getDomain()->getMPI());
 #ifdef ESYS_MPI
+   const int mpi_iam = mpiInfo->rank;
+   const int mpi_num = mpiInfo->size;
    MPI_Status status;
-#endif
 
-#ifdef ESYS_MPI
    /* Serialize NetCDF I/O */
-   if (mpi_iam>0) MPI_Recv(&ndims, 0, MPI_INT, mpi_iam-1, 81803, MPI_COMM_WORLD, &status);
+   if (mpi_iam > 0)
+       MPI_Recv(&ndims, 0, MPI_INT, mpi_iam-1, 81803, mpiInfo->comm, &status);
 #endif
 
    // netCDF error handler
    NcError err(NcError::verbose_nonfatal);
    // Create the file.
-   const std::string newFileName(esysUtils::appendRankToFileName(fileName,
-                                                            mpi_num, mpi_iam));
+   const std::string newFileName(mpiInfo->appendRankToFileName(fileName));
    NcFile dataFile(newFileName.c_str(), NcFile::Replace);
    // check if writing was successful
    if (!dataFile.is_valid())
@@ -886,17 +1347,56 @@ DataTagged::dump(const std::string fileName) const
    #endif
 }
 
-DataTypes::ValueType&
+DataTypes::RealVectorType&
 DataTagged::getVectorRW()
 {
     CHECK_FOR_EX_WRITE
-    return m_data;
+    return m_data_r;
 }
 
-const DataTypes::ValueType&
+const DataTypes::RealVectorType&
 DataTagged::getVectorRO() const
 {
-	return m_data;
+        return m_data_r;
+}
+
+DataTypes::CplxVectorType&
+DataTagged::getVectorRWC()
+{
+    CHECK_FOR_EX_WRITE
+    return m_data_c;
+}
+
+const DataTypes::CplxVectorType&
+DataTagged::getVectorROC() const
+{
+        return m_data_c;
+}
+
+DataTypes::RealVectorType&
+DataTagged::getTypedVectorRW(DataTypes::real_t dummy)
+{
+  CHECK_FOR_EX_WRITE
+  return m_data_r;
+}
+
+const DataTypes::RealVectorType&
+DataTagged::getTypedVectorRO(DataTypes::real_t dummy) const
+{
+  return m_data_r;
+}
+
+DataTypes::CplxVectorType&
+DataTagged::getTypedVectorRW(DataTypes::cplx_t dummy)
+{
+  CHECK_FOR_EX_WRITE
+  return m_data_c;
+}
+
+const DataTypes::CplxVectorType&
+DataTagged::getTypedVectorRO(DataTypes::cplx_t dummy) const
+{
+  return m_data_c;
 }
 
 size_t
@@ -905,4 +1405,16 @@ DataTagged::getTagCount() const
     return m_offsetLookup.size();
 }
 
+
+void DataTagged::complicate()
+{
+    if (!isComplex())
+    {
+        fillComplexFromReal(m_data_r, m_data_c);
+        this->m_iscompl=true;
+        m_data_r.resize(0,0,1);
+    }
+}
+
 }  // end of namespace
+
diff --git a/escriptcore/src/DataTagged.h b/escriptcore/src/DataTagged.h
index a0bcd72..91bc8ad 100644
--- a/escriptcore/src/DataTagged.h
+++ b/escriptcore/src/DataTagged.h
@@ -49,8 +49,8 @@ class ESCRIPT_DLL_API DataTagged : public DataReady
   //
   // Types for the lists of tags and values.
   typedef std::vector<int> TagListType;
-  typedef DataTypes::ValueType ValueType;
-  typedef std::vector<ValueType::ElementType> ValueBatchType;
+  typedef std::vector<DataTypes::RealVectorType::ElementType> FloatBatchType;
+  typedef std::vector<DataTypes::CplxVectorType::ElementType> CplxBatchType;
 
   //
   // Map from a tag to an offset into the data array. 
@@ -58,18 +58,6 @@ class ESCRIPT_DLL_API DataTagged : public DataReady
 
   /**
      \brief
-     Default constructor for DataTagged.
-
-     Description:
-     Default constructor for DataTagged. Creates a DataTagged object for which
-     the default data-point is a scalar data-point with value 0.0, and no other
-     tag values are stored.
-    T
-  */
-  DataTagged();
-
-  /**
-     \brief
      Alternative Constructor for DataTagged.
 
      Description:
@@ -80,10 +68,17 @@ class ESCRIPT_DLL_API DataTagged : public DataReady
      \param data - The data values for each tag.
     NB: no unit testing yet
   */
-  DataTagged(const FunctionSpace& what,
+  explicit DataTagged(const FunctionSpace& what,
+             const DataTypes::ShapeType &shape,
+             const int tags[],
+             const DataTypes::RealVectorType& data);
+  
+  
+  explicit DataTagged(const FunctionSpace& what,
              const DataTypes::ShapeType &shape,
              const int tags[],
-             const ValueType& data);
+             const DataTypes::CplxVectorType& data);  
+  
 
  /**
      \brief
@@ -97,10 +92,16 @@ class ESCRIPT_DLL_API DataTagged : public DataReady
      \param data - The data values for each tag.
 TODO Make sure to document the relationship between tags and data, ie: data also contains the default value
  */
-  DataTagged(const FunctionSpace& what,
+  explicit DataTagged(const FunctionSpace& what,
+             const DataTypes::ShapeType &shape,
+             const TagListType& tags,
+             const DataTypes::RealVectorType& data);
+  
+  explicit DataTagged(const FunctionSpace& what,
              const DataTypes::ShapeType &shape,
              const TagListType& tags,
-             const ValueType& data);
+             const DataTypes::CplxVectorType& data);  
+  
 
   /**
      \brief
@@ -117,7 +118,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
      The default value will be the value of the DataConstant object.
     T
   */
-  DataTagged(const DataConstant& other);
+  explicit DataTagged(const DataConstant& other);
 
   /**
      \brief
@@ -129,11 +130,17 @@ TODO Make sure to document the relationship between tags and data, ie: data also
      \param defaultvalue - Input - Default value for new DataTagged
      \param tagsource - Input - A DataTagged object which supplies the tags. 
   */
-  DataTagged(const FunctionSpace& what,
+  explicit DataTagged(const FunctionSpace& what,
              const DataTypes::ShapeType& shape,
-             const DataTypes::ValueType& defaultvalue,
+             const DataTypes::RealVectorType& defaultvalue,
              const DataTagged* tagsource=0);
 
+  explicit DataTagged(const FunctionSpace& what,
+             const DataTypes::ShapeType& shape,
+             const DataTypes::CplxVectorType& defaultvalue,
+             const DataTagged* tagsource=0);  
+  
+  
   /**
      \brief
      Destructor
@@ -157,14 +164,22 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   \brief replaces all NaN values with value 
   */
   void
-  replaceNaN(double value);
+  replaceNaN(DataTypes::real_t value);
+  
+  /**
+  \brief replaces all NaN values with value 
+  */
+  void
+  replaceNaN(DataTypes::cplx_t value);
+  
+  
   
   /**
      \brief Return a deep copy of the current object.
   */
   virtual
   DataAbstract*
-  deepCopy();
+  deepCopy() const;
 
 
   /**
@@ -180,9 +195,14 @@ TODO Make sure to document the relationship between tags and data, ie: data also
     T
   */
   virtual
-  double*
-  getSampleDataByTag(int tag);
+  DataTypes::real_t*
+  getSampleDataByTag(int tag, DataTypes::real_t dummy=0);  
 
+  virtual
+  DataTypes::cplx_t*
+  getSampleDataByTag(int tag, DataTypes::cplx_t dummy);  
+  
+  
   /**
      \brief
      Write the data as a string.
@@ -240,15 +260,10 @@ TODO Make sure to document the relationship between tags and data, ie: data also
     T
   */
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getPointOffset(int sampleNo,
                  int dataPointNo) const;
 
-  virtual
-  ValueType::size_type
-  getPointOffset(int sampleNo,
-                 int dataPointNo);
-
  /**
      \brief
      addTaggedValues
@@ -268,7 +283,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
  */
   void
   addTaggedValues(const TagListType& tagKeys,
-                            const ValueBatchType& values,
+                            const FloatBatchType& values,
                             const ShapeType& vShape);
 
 
@@ -290,7 +305,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   */
   void
   addTaggedValues(const TagListType& tagKeys,
-                            const ValueType& values,
+                            const DataTypes::RealVectorType& values,
                             const ShapeType& vShape);
 
 
@@ -311,8 +326,15 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   void
   addTaggedValue(int tagKey,
                  const DataTypes::ShapeType& pointshape,
-                 const ValueType& value,
+                 const DataTypes::RealVectorType& value,
                  int dataOffset=0);
+  
+  void
+  addTaggedValue(int tagKey,
+                 const DataTypes::ShapeType& pointshape,
+                 const DataTypes::CplxVectorType& value,
+                 int dataOffset=0);  
+  
 
   /**
      \brief
@@ -342,8 +364,15 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   void
   setTaggedValue(int tagKey,
                  const DataTypes::ShapeType& pointshape,
-                 const ValueType& value,
+                 const DataTypes::RealVectorType& value,
                  int dataOffset=0);
+  
+  void
+  setTaggedValue(int tagKey,
+                 const DataTypes::ShapeType& pointshape,
+                 const DataTypes::CplxVectorType& value,
+                 int dataOffset=0);  
+  
 
   /**
      \brief
@@ -355,13 +384,18 @@ TODO Make sure to document the relationship between tags and data, ie: data also
      \param i - position in the underlying datastructure
   */
 
-  DataTypes::ValueType::reference
-  getDataByTagRW(int tag, DataTypes::ValueType::size_type i);
+  DataTypes::RealVectorType::reference
+  getDataByTagRW(int tag, DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0);
+
+  DataTypes::RealVectorType::const_reference
+  getDataByTagRO(int tag, DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0) const;
 
-  DataTypes::ValueType::const_reference
-  getDataByTagRO(int tag, DataTypes::ValueType::size_type i) const;
 
+  DataTypes::CplxVectorType::reference
+  getDataByTagRW(int tag, DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy);
 
+  DataTypes::CplxVectorType::const_reference
+  getDataByTagRO(int tag, DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy) const;
 
   /**
       \brief 
@@ -372,7 +406,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
 
       Note: If the tag is not valid, the offset of the default value is returned instead.
   */
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getOffsetForTag(int tag) const;
 
 
@@ -381,13 +415,35 @@ TODO Make sure to document the relationship between tags and data, ie: data also
      Return a reference to the underlying DataVector.
   */
 
-  DataTypes::ValueType&
+  DataTypes::RealVectorType&
   getVectorRW();
 
-  const DataTypes::ValueType&
+  const DataTypes::RealVectorType&
   getVectorRO() const;
 
 
+  DataTypes::CplxVectorType&
+  getVectorRWC();
+
+  const DataTypes::CplxVectorType&
+  getVectorROC() const;
+  
+
+  virtual DataTypes::RealVectorType&
+  getTypedVectorRW(DataTypes::real_t dummy);  
+  
+  virtual const DataTypes::RealVectorType&
+  getTypedVectorRO(DataTypes::real_t dummy) const;
+
+  virtual DataTypes::CplxVectorType&
+  getTypedVectorRW(DataTypes::cplx_t dummy);
+  
+  virtual const DataTypes::CplxVectorType&
+  getTypedVectorRO(DataTypes::cplx_t dummy) const;  
+
+  
+  
+
 
   /**
      \brief 
@@ -423,12 +479,17 @@ TODO Make sure to document the relationship between tags and data, ie: data also
      is not explicitly recorded in this DataTagged object's tag map.
      \param i - position in the underlying datastructure
   */
-  DataTypes::ValueType::reference
-  getDefaultValueRW(DataTypes::ValueType::size_type i);
+  DataTypes::RealVectorType::reference
+  getDefaultValueRW(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0);
+
+  DataTypes::RealVectorType::const_reference
+  getDefaultValueRO(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy=0) const;
 
-  DataTypes::ValueType::const_reference
-  getDefaultValueRO(DataTypes::ValueType::size_type i) const;
+  DataTypes::CplxVectorType::reference
+  getDefaultValueRW(DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy);
 
+  DataTypes::CplxVectorType::const_reference
+  getDefaultValueRO(DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy) const;
 
 
 
@@ -442,7 +503,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
     T
   */
   virtual
-  ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getLength() const;
 
   /**
@@ -501,13 +562,33 @@ TODO Make sure to document the relationship between tags and data, ie: data also
 
   /**
      \brief
-     Computes a nonsymmetric matrix (A - AT) / 2
+     Computes a antisymmetric matrix (A - AT) / 2
+
+     \param ev - Output - antisymmetric matrix
+
+  */
+  virtual void
+  antisymmetric(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an hermitian matrix (A + A*) / 2
+
+     \param ev - Output - hermitian matrix
+
+  */
+  virtual void
+  hermitian(DataAbstract* ev);
+
+  /**
+     \brief
+     Computes an antihermitian matrix (A - A*) / 2
 
-     \param ev - Output - nonsymmetric matrix
+     \param ev - Output - anti-hermitian matrix
 
   */
   virtual void
-  nonsymmetric(DataAbstract* ev);
+  antihermitian(DataAbstract* ev);
 
   /**
      \brief
@@ -568,7 +649,7 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   /**
      \brief  Returns the offset in the structure which stores the default value
   */
-  DataTypes::ValueType::size_type
+  DataTypes::RealVectorType::size_type
   getDefaultOffset() const;
   
   /**
@@ -577,6 +658,9 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   size_t
   getTagCount() const;
   
+  void
+  complicate();
+  
  protected:
 
  private:
@@ -588,10 +672,11 @@ TODO Make sure to document the relationship between tags and data, ie: data also
   //
   // the offset to the default value
   static const int m_defaultValueOffset = 0;
-
-  //
-  // The actual data
-  ValueType m_data;
+  
+  // the actual data
+  DataTypes::RealVectorType m_data_r;
+  DataTypes::CplxVectorType m_data_c;  
+  
 
 };
 
@@ -604,27 +689,44 @@ DataTagged::isCurrentTag(int tag) const
 }
 
 inline 
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataTagged::getDefaultOffset() const
 {
   return m_defaultValueOffset;  
 }
 
 inline
-DataTypes::ValueType::reference
-DataTagged::getDefaultValueRW(DataTypes::ValueType::size_type i)
+DataTypes::RealVectorType::reference
+DataTagged::getDefaultValueRW(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy)
 {       
         return getVectorRW()[i];                // getVectorRW has exclusive write checks
 }
 
 inline
-DataTypes::ValueType::const_reference
-DataTagged::getDefaultValueRO(DataTypes::ValueType::size_type i) const
+DataTypes::RealVectorType::const_reference
+DataTagged::getDefaultValueRO(DataTypes::RealVectorType::size_type i, DataTypes::real_t dummy) const
 {
         return getVectorRO()[i];
 }
 
 inline
+DataTypes::CplxVectorType::reference
+DataTagged::getDefaultValueRW(DataTypes::RealVectorType::size_type i, DataTypes::cplx_t dummy)
+{       
+        return getVectorRWC()[i];                // getVectorRW has exclusive write checks
+}
+
+inline
+DataTypes::CplxVectorType::const_reference
+DataTagged::getDefaultValueRO(DataTypes::CplxVectorType::size_type i, DataTypes::cplx_t dummy) const
+{
+        return getVectorROC()[i];
+}
+
+
+
+
+inline
 const DataTagged::DataMapType&
 DataTagged::getTagLookup() const
 {
@@ -632,10 +734,10 @@ DataTagged::getTagLookup() const
 }
 
 inline
-DataTypes::ValueType::size_type
+DataTypes::RealVectorType::size_type
 DataTagged::getLength() const
 {
-  return m_data.size();
+  return std::max(m_data_c.size(), m_data_r.size());
 }
 
 } // end of namespace
diff --git a/escriptcore/src/DataTypes.cpp b/escriptcore/src/DataTypes.cpp
index bf525f9..4c55621 100644
--- a/escriptcore/src/DataTypes.cpp
+++ b/escriptcore/src/DataTypes.cpp
@@ -14,18 +14,17 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataTypes.h"
+#include "DataException.h"
+
 #include <sstream>
 #include <boost/python/extract.hpp>
 #include <boost/python/tuple.hpp>
-#include "DataException.h"
+
+namespace bp = boost::python;
 
 namespace {
-using namespace boost::python;
+
 using namespace escript;
 using namespace escript::DataTypes;
 
@@ -41,12 +40,11 @@ using namespace escript::DataTypes;
   /param key - Input - key object specifying slice range.
 */
    std::pair<int,int>
-   getSliceRange(const boost::python::object& key,
-              const int shape)
+   getSliceRange(const bp::object& key, int shape)
    {
       /* default slice range is range of entire shape dimension */
       int s0=0, s1=shape;;
-      extract<int> slice_int(key);
+      bp::extract<int> slice_int(key);
       if (slice_int.check()) {
          /* if the key is a single int set start=key and end=key */
          /* in this case, we want to return a rank-1 dimension object from
@@ -56,15 +54,15 @@ using namespace escript::DataTypes;
          s1=s0;
       } else {
          /* if key is a pair extract begin and end values */
-         extract<int> step(key.attr("step"));
+         bp::extract<int> step(key.attr("step"));
          if (step.check() && step()!=1) {
             throw DataException("Error - Data does not support increments in slicing ");
          } else {
-            extract<int> start(key.attr("start"));
+            bp::extract<int> start(key.attr("start"));
             if (start.check()) {
                s0=start();
             }
-            extract<int> stop(key.attr("stop"));
+            bp::extract<int> stop(key.attr("stop"));
             if (stop.check()) {
                s1=stop();
             }
@@ -80,11 +78,9 @@ using namespace escript::DataTypes;
          throw DataException("Error - lower index must less or equal upper index.");
       return std::pair<int,int>(s0,s1);
    }
-}
+} // anonymous namespace
 
 
-using namespace boost::python;
-
 namespace escript
 {
 namespace DataTypes
@@ -137,7 +133,7 @@ namespace DataTypes
 
 
    DataTypes::RegionType
-   getSliceRegion(const DataTypes::ShapeType& shape, const boost::python::object& key)
+   getSliceRegion(const DataTypes::ShapeType& shape, const bp::object& key)
    {
       int slice_rank, i;
       int this_rank=shape.size();
@@ -146,9 +142,9 @@ namespace DataTypes
       want to generate a rank-1 dimension object, as opposed to eg: [1,2]
       which implies we want to take a rank dimensional object with one
       dimension of size 1 */
-      extract<tuple> key_tuple(key);
+      bp::extract<bp::tuple> key_tuple(key);
       if (key_tuple.check()) {
-         slice_rank=extract<int> (key.attr("__len__")());
+         slice_rank=bp::extract<int> (key.attr("__len__")());
          /* ensure slice is correctly dimensioned */
          if (slice_rank>this_rank) {
             throw DataException("Error - rank of slices does not match rank of slicee");
@@ -217,436 +213,6 @@ namespace DataTypes
       return temp.str();
    }
 
-
-// Additional slice operations
-
-   inline
-   bool
-   checkOffset(ValueType::size_type offset, int size, int noval)
-   {
-      return (size >= (offset+noval));
-   }
-
-
-   void
-   copySlice(ValueType& left,
-			    const ShapeType& leftShape,
-			    ValueType::size_type thisOffset,
-                            const ValueType& other,
-			    const ShapeType& otherShape,
-                            ValueType::size_type otherOffset,
-                            const RegionLoopRangeType& region)
-   {
-      //
-      // Make sure views are not empty
-
-      EsysAssert(!left.size()==0,
-                 "Error - left data is empty.");
-      EsysAssert(!other.size()==0,
-                 "Error - other data is empty.");
-
-      //
-      // Check the view to be sliced from is compatible with the region to be sliced,
-      // and that the region to be sliced is compatible with this view:
-      EsysAssert(checkOffset(thisOffset,left.size(),noValues(leftShape)),
-                 "Error - offset incompatible with this view.");
-      EsysAssert(otherOffset+noValues(leftShape)<=other.size(),
-                 "Error - offset incompatible with other view.");
-
-      EsysAssert(getRank(otherShape)==region.size(),
-                 "Error - slice not same rank as view to be sliced from.");
-
-      EsysAssert(noValues(leftShape)==noValues(getResultSliceShape(region)),
-                 "Error - slice shape not compatible shape for this view.");
-
-      //
-      // copy the values in the specified region of the other view into this view
-
-      // the following loops cannot be parallelised due to the numCopy counter
-      int numCopy=0;
-
-      switch (region.size()) {
-      case 0:
-         /* this case should never be encountered, 
-         as python will never pass us an empty region.
-         here for completeness only, allows slicing of a scalar */
-//          (*m_data)[thisOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex()];
-
-         left[thisOffset+numCopy]=other[otherOffset];
-         numCopy++;
-         break;
-      case 1:
-         for (int i=region[0].first;i<region[0].second;i++) {
-            left[thisOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i)];
-            numCopy++;
-         }
-         break;
-      case 2:
-         for (int j=region[1].first;j<region[1].second;j++) {
-            for (int i=region[0].first;i<region[0].second;i++) {
-/*               (*m_data)[thisOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j)];*/
-               left[thisOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j)];
-               numCopy++;
-            }
-         }
-         break;
-      case 3:
-         for (int k=region[2].first;k<region[2].second;k++) {
-            for (int j=region[1].first;j<region[1].second;j++) {
-               for (int i=region[0].first;i<region[0].second;i++) {
-//                  (*m_data)[thisOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j,k)];
-                  left[thisOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j,k)];
-                  numCopy++;
-               }
-            }
-         }
-         break;
-      case 4:
-         for (int l=region[3].first;l<region[3].second;l++) {
-            for (int k=region[2].first;k<region[2].second;k++) {
-               for (int j=region[1].first;j<region[1].second;j++) {
-                  for (int i=region[0].first;i<region[0].second;i++) {
-/*                     (*m_data)[thisOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j,k,l)];*/
-                     left[thisOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j,k,l)];
-                     numCopy++;
-                  }
-               }
-            }
-         }
-         break;
-      default:
-         std::stringstream mess;
-         mess << "Error - (copySlice) Invalid slice region rank: " << region.size();
-         throw DataException(mess.str());
-      }
-   }
-
-
-   void
-   copySliceFrom(ValueType& left,
-				const ShapeType& leftShape,
-				ValueType::size_type thisOffset,
-                                const ValueType& other,
-				const ShapeType& otherShape,
-                                ValueType::size_type otherOffset,
-                                const RegionLoopRangeType& region)
-   {
-      //
-      // Make sure views are not empty
-
-      EsysAssert(left.size()!=0,
-                 "Error - this view is empty.");
-      EsysAssert(other.size()!=0,
-                 "Error - other view is empty.");
-
-      //
-      // Check this view is compatible with the region to be sliced,
-      // and that the region to be sliced is compatible with the other view:
-
-      EsysAssert(checkOffset(otherOffset,other.size(),noValues(otherShape)),
-                 "Error - offset incompatible with other view.");
-      EsysAssert(thisOffset+noValues(otherShape)<=left.size(),
-                 "Error - offset incompatible with this view.");
-
-      EsysAssert(getRank(leftShape)==region.size(),
-                 "Error - slice not same rank as this view.");
-
-      EsysAssert(getRank(otherShape)==0 || noValues(otherShape)==noValues(getResultSliceShape(region)),
-                 "Error - slice shape not compatible shape for other view.");
-
-      //
-      // copy the values in the other view into the specified region of this view
-
-      // allow for case where other view is a scalar
-      if (getRank(otherShape)==0) {
-
-         // the following loops cannot be parallelised due to the numCopy counter
-         int numCopy=0;
-
-         switch (region.size()) {
-         case 0:
-            /* this case should never be encountered, 
-            as python will never pass us an empty region.
-            here for completeness only, allows slicing of a scalar */
-            //(*m_data)[thisOffset+relIndex()]=(*other.m_data)[otherOffset];
-	    left[thisOffset]=other[otherOffset];
-            numCopy++;
-            break;
-         case 1:
-            for (int i=region[0].first;i<region[0].second;i++) {
-               left[thisOffset+getRelIndex(leftShape,i)]=other[otherOffset];
-               numCopy++;
-            }
-            break;
-         case 2:
-            for (int j=region[1].first;j<region[1].second;j++) {
-               for (int i=region[0].first;i<region[0].second;i++) {
-                  left[thisOffset+getRelIndex(leftShape,i,j)]=other[otherOffset];
-                  numCopy++;
-               }
-            }
-            break;
-         case 3:
-            for (int k=region[2].first;k<region[2].second;k++) {
-               for (int j=region[1].first;j<region[1].second;j++) {
-                  for (int i=region[0].first;i<region[0].second;i++) {
-                     left[thisOffset+getRelIndex(leftShape,i,j,k)]=other[otherOffset];
-                     numCopy++;
-                  }
-               }
-            }
-            break;
-         case 4:
-            for (int l=region[3].first;l<region[3].second;l++) {
-               for (int k=region[2].first;k<region[2].second;k++) {
-                  for (int j=region[1].first;j<region[1].second;j++) {
-                     for (int i=region[0].first;i<region[0].second;i++) {
-                        left[thisOffset+getRelIndex(leftShape,i,j,k,l)]=other[otherOffset];
-                        numCopy++;
-                     }
-                  }
-               }
-            }
-            break;
-         default:
-            std::stringstream mess;
-            mess << "Error - (copySliceFrom) Invalid slice region rank: " << region.size();
-            throw DataException(mess.str());
-         }
-
-      } else {
-
-         // the following loops cannot be parallelised due to the numCopy counter
-         int numCopy=0;
-
-         switch (region.size()) {
-         case 0:
-            /* this case should never be encountered, 
-            as python will never pass us an empty region.
-            here for completeness only, allows slicing of a scalar */
-            //(*m_data)[thisOffset+relIndex()]=(*other.m_data)[otherOffset+numCopy];
-	    left[thisOffset]=other[otherOffset+numCopy];
-            numCopy++;
-            break;
-         case 1:
-            for (int i=region[0].first;i<region[0].second;i++) {
-               left[thisOffset+getRelIndex(leftShape,i)]=other[otherOffset+numCopy];
-               numCopy++;
-            }
-            break;
-         case 2:
-            for (int j=region[1].first;j<region[1].second;j++) {
-               for (int i=region[0].first;i<region[0].second;i++) {
-                  left[thisOffset+getRelIndex(leftShape,i,j)]=other[otherOffset+numCopy];
-                  numCopy++;
-               }
-            }
-            break;
-         case 3:
-            for (int k=region[2].first;k<region[2].second;k++) {
-               for (int j=region[1].first;j<region[1].second;j++) {
-                  for (int i=region[0].first;i<region[0].second;i++) {
-                     left[thisOffset+getRelIndex(leftShape,i,j,k)]=other[otherOffset+numCopy];
-                     numCopy++;
-                  }
-               }
-            }
-            break;
-         case 4:
-            for (int l=region[3].first;l<region[3].second;l++) {
-               for (int k=region[2].first;k<region[2].second;k++) {
-                  for (int j=region[1].first;j<region[1].second;j++) {
-                     for (int i=region[0].first;i<region[0].second;i++) {
-                        left[thisOffset+getRelIndex(leftShape,i,j,k,l)]=other[otherOffset+numCopy];
-                        numCopy++;
-                     }
-                  }
-               }
-            }
-            break;
-         default:
-            std::stringstream mess;
-            mess << "Error - (copySliceFrom) Invalid slice region rank: " << region.size();
-            throw DataException(mess.str());
-         }
-
-      }
-
-   }
-
-
-   void
-   pointToStream(std::ostream& os, const ValueType::ElementType* data,const ShapeType& shape, int offset, bool needsep, const std::string& sep)
-   {
-      using namespace std;
-      EsysAssert(data!=0, "Error - data is null");
-//      EsysAssert(data.size()>0,"Error - Data object is empty.");
-      switch (getRank(shape)) {
-      case 0:
-	 if (needsep)
-	 {
-		os << sep;
-	 }
-	 else
-	 {
-		needsep=true;
-	 }
-         os << data[offset];
-         break;
-      case 1:
-         for (int i=0;i<shape[0];i++) {
-	    if (needsep)
-	    {
-		os << sep;
-	    }
-	    else
-	    {
-		needsep=true;
-	    }
-	    os << data[i+offset];
-         }
-         break;
-      case 2:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-		if (needsep)
-		{
-			os << sep;
-		}
-		else
-		{
-			needsep=true;
-		}
-                os << data[offset+getRelIndex(shape,i,j)];
-            }
-         }
-         break;
-      case 3:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-               for (int k=0;k<shape[2];k++) {
-		   if (needsep)
-		   {
-			os << sep;
-		   }
-		   else
-		   {
-			needsep=true;
-		   }
-                   os << data[offset+getRelIndex(shape,i,j,k)];
-               }
-            }
-         }
-         break;
-      case 4:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-               for (int k=0;k<shape[2];k++) {
-                  for (int l=0;l<shape[3];l++) {
-			if (needsep)
-			{
-				os << sep;
-			}
-			else
-			{
-				needsep=true;
-			}
-			os << data[offset+getRelIndex(shape,i,j,k,l)];
-                  }
-               }
-            }
-         }
-         break;
-      default:
-         stringstream mess;
-         mess << "Error - (pointToStream) Invalid rank: " << getRank(shape);
-         throw DataException(mess.str());
-      }
-   }
-
-
-   std::string
-   pointToString(const ValueType& data,const ShapeType& shape, int offset, const std::string& prefix)
-   {
-      using namespace std;
-      EsysAssert(data.size()>0,"Error - Data object is empty.");
-      stringstream temp;
-      string finalPrefix=prefix;
-      if (prefix.length() > 0) {
-         finalPrefix+=" ";
-      }
-      switch (getRank(shape)) {
-      case 0:
-         temp << finalPrefix << data[offset];
-         break;
-      case 1:
-         for (int i=0;i<shape[0];i++) {
-            temp << finalPrefix << "(" << i <<  ") " << data[i+offset];
-            if (i!=(shape[0]-1)) {
-               temp << endl;
-            }
-         }
-         break;
-      case 2:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-               temp << finalPrefix << "(" << i << "," << j << ") " << data[offset+getRelIndex(shape,i,j)];
-               if (!(i==(shape[0]-1) && j==(shape[1]-1))) {
-                  temp << endl;
-               }
-            }
-         }
-         break;
-      case 3:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-               for (int k=0;k<shape[2];k++) {
-                  temp << finalPrefix << "(" << i << "," << j << "," << k << ") " << data[offset+getRelIndex(shape,i,j,k)];
-                  if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1))) {
-                     temp << endl;
-                  }
-               }
-            }
-         }
-         break;
-      case 4:
-         for (int i=0;i<shape[0];i++) {
-            for (int j=0;j<shape[1];j++) {
-               for (int k=0;k<shape[2];k++) {
-                  for (int l=0;l<shape[3];l++) {
-                     temp << finalPrefix << "(" << i << "," << j << "," << k << "," << l << ") " << data[offset+getRelIndex(shape,i,j,k,l)];
-                     if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1) && l==(shape[3]-1))) {
-                        temp << endl;
-                     }
-                  }
-               }
-            }
-         }
-         break;
-      default:
-         stringstream mess;
-         mess << "Error - (toString) Invalid rank: " << getRank(shape);
-         throw DataException(mess.str());
-      }
-      return temp.str();
-   }
-
-
-   void copyPoint(ValueType& dest, ValueType::size_type doffset, ValueType::size_type nvals, const ValueType& src, ValueType::size_type soffset)
-   {
-      EsysAssert((dest.size()>0&&src.size()>0&&checkOffset(doffset,dest.size(),nvals)),
-                 "Error - Couldn't copy due to insufficient storage.");
-//       EsysAssert((checkShape(other.getShape())),
-//                  createShapeErrorMessage("Error - Couldn't copy due to shape mismatch.",other.getShape(),m_shape));
-      if (checkOffset(doffset,dest.size(),nvals) && checkOffset(soffset,src.size(),nvals)) {
-         memcpy(&dest[doffset],&src[soffset],sizeof(double)*nvals);
-      } else {
-         throw DataException("Error - invalid offset specified.");
-      }
-
-
-
-   } 
-
 }	// end namespace DataTypes
 }	// end namespace escript
+
diff --git a/escriptcore/src/DataTypes.h b/escriptcore/src/DataTypes.h
index 8f537cd..68799f1 100644
--- a/escriptcore/src/DataTypes.h
+++ b/escriptcore/src/DataTypes.h
@@ -14,46 +14,102 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_DATATYPES_H__
+#define __ESCRIPT_DATATYPES_H__
 
-#if !defined escript_DataTypes_20080811_H
-#define escript_DataTypes_20080811_H
-#include "system_dep.h"
-#include "DataVector.h"
-#include <vector>
+#include <boost/python/object_fwd.hpp>
+
+#include "Assert.h"
+
+#include <complex>
+#include <limits>
 #include <string>
-#include <boost/python/object.hpp>
-#include <boost/python/extract.hpp>
+#include <vector>
 
 namespace escript {
 
 namespace DataTypes {
 
 /**
-\namespace escript::DataTypes 
-\brief Contains the types to represent Shapes, Regions, RegionLoop ranges and vectors of data as well as the functions to manipulate them.
+\namespace escript::DataTypes
+\brief Contains the types to represent Shapes, Regions, RegionLoop ranges and
+       vectors of data as well as the functions to manipulate them.
+\note The contents of the namespace are spread between DataTypes.h and DataVector.h
 */
+
   //
   // Some basic types which define the data values and view shapes.
-  typedef escript::DataVector               ValueType;//!< Vector to store underlying data.
   typedef std::vector<int>                  ShapeType;//!< The shape of a single datapoint.
   typedef std::vector<std::pair<int, int> > RegionType;
   typedef std::vector<std::pair<int, int> > RegionLoopRangeType;
   static const int maxRank=4;//!< The maximum number of dimensions a datapoint can have.
   static const ShapeType scalarShape;//!< Use this instead of creating empty shape objects for scalars.
+  typedef long vec_size_type;
+
+  /// type of all real-valued scalars in escript
+  typedef double real_t;
+
+  /// complex data type
+  typedef std::complex<real_t> cplx_t;
+
+  /// type for array/matrix indices used both globally and on each rank
+#ifdef ESYS_INDEXTYPE_LONG
+  typedef long index_t;
+#else
+  typedef int index_t;
+#endif
+
+  typedef std::vector<index_t> IndexVector;
+
+  typedef index_t dim_t;
+
+  /**
+     \brief
+     Returns the minimum finite value for the index_t type.
+  */
+  inline index_t index_t_min()
+  {
+      return std::numeric_limits<index_t>::min();
+  }
+
+  /**
+     \brief
+     Returns the maximum finite value for the index_t type.
+  */
+  inline index_t index_t_max()
+  {
+      return std::numeric_limits<index_t>::max();
+  }
+
+  /**
+     \brief
+     Returns the maximum finite value for the real_t type.
+  */
+  inline real_t real_t_max()
+  {
+      return std::numeric_limits<real_t>::max();
+  }
+
+  /**
+     \brief
+     Returns the machine epsilon for the real_t type.
+  */
+  inline real_t real_t_eps()
+  {
+      return std::numeric_limits<real_t>::epsilon();
+  }
 
   /**
      \brief
      Calculate the number of values in a datapoint with the given shape.
   */
-  ESCRIPT_DLL_API
   int
   noValues(const DataTypes::ShapeType& shape);
 
-  /** 
+  /**
      \brief
      Calculate the number of values for the given region.
   */
-  ESCRIPT_DLL_API
   int
   noValues(const DataTypes::RegionLoopRangeType& region);
 
@@ -63,7 +119,6 @@ namespace DataTypes {
 
      \param shape - Input.
   */
-  ESCRIPT_DLL_API
   std::string
   shapeToString(const DataTypes::ShapeType& shape);
 
@@ -73,7 +128,6 @@ namespace DataTypes {
 
      \param region - Input - Slice region
   */
-  ESCRIPT_DLL_API
   DataTypes::ShapeType
   getResultSliceShape(const DataTypes::RegionType& region);
 
@@ -135,7 +189,6 @@ namespace DataTypes {
 
      Note: Not unit tested in c++.
   */
-   ESCRIPT_DLL_API
    DataTypes::RegionType
    getSliceRegion(const DataTypes::ShapeType& shape, const boost::python::object& key);
 
@@ -151,7 +204,6 @@ namespace DataTypes {
    the slice region is of size 1. So in the above example, we modify the above
    region like so: <<1,2><0,3><0,3>> and take this slice.
   */
-  ESCRIPT_DLL_API
   DataTypes::RegionLoopRangeType
   getSliceRegionLoopRange(const DataTypes::RegionType& region);
 
@@ -161,7 +213,6 @@ namespace DataTypes {
    \param shape
    \return the rank.
   */
-  ESCRIPT_DLL_API
   inline
   int
   getRank(const DataTypes::ShapeType& shape)
@@ -177,13 +228,12 @@ namespace DataTypes {
   \param i - Input - subscript to locate.
   \return offset relative to the beginning of the datapoint.
   */
-  ESCRIPT_DLL_API
   inline
-  DataTypes::ValueType::size_type
-  getRelIndex(const DataTypes::ShapeType& shape, DataTypes::ValueType::size_type i)
+  vec_size_type
+  getRelIndex(const DataTypes::ShapeType& shape, vec_size_type i)
   {
-  	EsysAssert((getRank(shape)==1),"Incorrect number of indices for the rank of this object.");
-	EsysAssert((i < DataTypes::noValues(shape)), "Error - Invalid index.");
+  	ESYS_ASSERT(getRank(shape)==1, "Incorrect number of indices for the rank of this object.");
+	ESYS_ASSERT(i < DataTypes::noValues(shape), "Invalid index.");
 	return i;
   }
 
@@ -195,16 +245,15 @@ namespace DataTypes {
   \param j - Input - column
   \return offset relative to the beginning of the datapoint.
   */
-  ESCRIPT_DLL_API
   inline
-  DataTypes::ValueType::size_type
-  getRelIndex(const DataTypes::ShapeType& shape, DataTypes::ValueType::size_type i,
-	   DataTypes::ValueType::size_type j)
+  vec_size_type
+  getRelIndex(const DataTypes::ShapeType& shape, vec_size_type i,
+	   vec_size_type j)
   {
 	// Warning: This is not C ordering. Do not try to figure out the params by looking at the code
-  	EsysAssert((getRank(shape)==2),"Incorrect number of indices for the rank of this object.");
-  	DataTypes::ValueType::size_type temp=i+j*shape[0];
-  	EsysAssert((temp < DataTypes::noValues(shape)), "Error - Invalid index.");
+  	ESYS_ASSERT(getRank(shape)==2, "Incorrect number of indices for the rank of this object.");
+  	vec_size_type temp=i+j*shape[0];
+  	ESYS_ASSERT(temp < DataTypes::noValues(shape), "Invalid index.");
 	return temp;
   }
 
@@ -215,16 +264,15 @@ namespace DataTypes {
   \param i,j,k - Input - subscripts to locate.
   \return offset relative to the beginning of the datapoint.
   */
-  ESCRIPT_DLL_API
   inline
-  DataTypes::ValueType::size_type
-  getRelIndex(const DataTypes::ShapeType& shape, DataTypes::ValueType::size_type i,
-	   DataTypes::ValueType::size_type j, DataTypes::ValueType::size_type k)
+  vec_size_type
+  getRelIndex(const DataTypes::ShapeType& shape, vec_size_type i,
+	   vec_size_type j, vec_size_type k)
   {
 	// Warning: This is not C ordering. Do not try to figure out the params by looking at the code
-  	EsysAssert((getRank(shape)==3),"Incorrect number of indices for the rank of this object.");
-  	DataTypes::ValueType::size_type temp=i+j*shape[0]+k*shape[1]*shape[0];
-  	EsysAssert((temp < DataTypes::noValues(shape)), "Error - Invalid index.");
+  	ESYS_ASSERT(getRank(shape)==3, "Incorrect number of indices for the rank of this object.");
+  	vec_size_type temp=i+j*shape[0]+k*shape[1]*shape[0];
+  	ESYS_ASSERT(temp < DataTypes::noValues(shape), "Invalid index.");
   	return temp;
   }
 
@@ -235,24 +283,22 @@ namespace DataTypes {
   \param i,j,k,m - Input - subscripts to locate.
   \return offset relative to the beginning of the datapoint.
   */
-  ESCRIPT_DLL_API
   inline
-  DataTypes::ValueType::size_type
-  getRelIndex(const DataTypes::ShapeType& shape, DataTypes::ValueType::size_type i,
-	   DataTypes::ValueType::size_type j, DataTypes::ValueType::size_type k,
-	   DataTypes::ValueType::size_type m)
+  vec_size_type
+  getRelIndex(const DataTypes::ShapeType& shape, vec_size_type i,
+	   vec_size_type j, vec_size_type k,
+	   vec_size_type m)
   {
 	// Warning: This is not C ordering. Do not try to figure out the params by looking at the code
-	EsysAssert((getRank(shape)==4),"Incorrect number of indices for the rank of this object.");
-	DataTypes::ValueType::size_type temp=i+j*shape[0]+k*shape[1]*shape[0]+m*shape[2]*shape[1]*shape[0];
-	EsysAssert((temp < DataTypes::noValues(shape)), "Error - Invalid index.");
+	ESYS_ASSERT(getRank(shape)==4, "Incorrect number of indices for the rank of this object.");
+	vec_size_type temp=i+j*shape[0]+k*shape[1]*shape[0]+m*shape[2]*shape[1]*shape[0];
+	ESYS_ASSERT(temp < DataTypes::noValues(shape), "Invalid index.");
 	return temp;
   }
 
   /**
      \brief Test if two shapes are equal.
   */
-  ESCRIPT_DLL_API
   inline
   bool
   checkShape(const ShapeType& s1, const ShapeType& s2)
@@ -267,107 +313,21 @@ namespace DataTypes {
    \param other - displayed in the message as "Other shape"
    \param thisShape - displayed in the message as "This shape"
   */
-   ESCRIPT_DLL_API
-   std::string 
+   std::string
    createShapeErrorMessage(const std::string& messagePrefix,
                                           const DataTypes::ShapeType& other,
 					  const DataTypes::ShapeType& thisShape);
 
-
-  /**
-     \brief
-     Copy a data slice specified by the given region and offset from the
-     "other" view into the "left" view at the given offset.
-     
-     \param left - vector to copy into
-     \param leftShape - shape of datapoints for the left vector
-     \param leftOffset - location within left to start copying to
-     \param other - vector to copy from
-     \param otherShape - shape of datapoints for the other vector
-     \param otherOffset - location within other vector to start copying from
-     \param region - Input -
-                      Region in other view to copy data from.
-  */
-   ESCRIPT_DLL_API
-   void
-   copySlice(ValueType& left,
-			    const ShapeType& leftShape,
-			    ValueType::size_type leftOffset,
-                            const ValueType& other,
-			    const ShapeType& otherShape,
-                            ValueType::size_type otherOffset,
-                            const RegionLoopRangeType& region);
-
-  /**
-     \brief
-     Copy data into a slice specified by the given region and offset in
-     the left vector from the other vector at the given offset.
-
-     \param left - vector to copy into
-     \param leftShape - shape of datapoints for the left vector
-     \param leftOffset - location within left to start copying to
-     \param other - vector to copy from
-     \param otherShape - shape of datapoints for the other vector
-     \param otherOffset - location within other vector to start copying from
-     \param region - Input -
-                      Region in the left vector to copy data to.
-  */
-   ESCRIPT_DLL_API
-   void
-   copySliceFrom(ValueType& left,
-				const ShapeType& leftShape,
-				ValueType::size_type leftOffset,
-                                const ValueType& other,
-				const ShapeType& otherShape,
-                                ValueType::size_type otherOffset,
-                                const RegionLoopRangeType& region);
-
-
-   /**
-      \brief Display a single value (with the specified shape) from the data.
-
-     Despite its similar name this function behaves differently to pointToString.
-     There are no prefixes or (i,j,k) identifiers on each field. each datapoint is printed without
-     new lines.
-     It also works with double* rather than vectors so be careful what you pass it.
-
-     \param os - stream to write to
-     \param data - vector containing the datapoint
-     \param shape - shape of the datapoint
-     \param offset - start of the datapoint within data
-     \param needsep - Does this output need to start with a separator
-     \param sep - separator string to print between components
-   */
-   void
-   pointToStream(std::ostream& os, const ValueType::ElementType* data,const ShapeType& shape, int offset, bool needsep=true, const std::string& sep=",");
-
-   /**
-      \brief Display a single value (with the specified shape) from the data.
-
-     \param data - vector containing the datapoint
-     \param shape - shape of the datapoint
-     \param offset - start of the datapoint within data
-     \param prefix - string to prepend to the output
-   */
-   std::string
-   pointToString(const ValueType& data,const ShapeType& shape, int offset, const std::string& prefix);
-
-
-   /**
-      \brief  Copy a point from one vector to another. Note: This version does not check to see if shapes are the same.
-
-   \param dest - vector to copy to
-   \param doffset - beginning of the target datapoint in dest
-   \param nvals - the number of values comprising the datapoint
-   \param src - vector to copy from
-   \param soffset - beginning of the datapoint in src
-   */
-   void copyPoint(ValueType& dest, ValueType::size_type doffset, ValueType::size_type nvals, const ValueType& src, ValueType::size_type soffset);
+   inline
+   bool
+   checkOffset(vec_size_type offset, int size, int noval)
+   {
+      return (size >= (offset+noval));
+   }
 
  }   // End of namespace DataTypes
 
-
 } // End of namespace escript
 
-#endif
+#endif // __ESCRIPT_DATATYPES_H__
 
diff --git a/escriptcore/src/DataVector.cpp b/escriptcore/src/DataVector.cpp
index b9bbbb1..952889d 100644
--- a/escriptcore/src/DataVector.cpp
+++ b/escriptcore/src/DataVector.cpp
@@ -14,283 +14,396 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataVector.h"
 
-#include "Taipan.h"
 #include "DataException.h"
-#include <boost/python/extract.hpp>
 #include "DataTypes.h"
+#include "Taipan.h"
 #include "WrappedArray.h"
 
-#include <cassert>
+#include <boost/python/extract.hpp>
 
 using namespace std;
 using namespace escript;
 using namespace boost::python;
+using namespace DataTypes;
 
-namespace escript {
-
-Taipan arrayManager;
-
-void releaseUnusedMemory()
-{
-   arrayManager.release_unused_arrays();
-}
-
-
-DataVector::DataVector() :
-  m_size(0),
-  m_dim(0),
-  m_N(0),
-  m_array_data(0)
-{
-}
-
-DataVector::DataVector(const DataVector& other) :
-  m_size(other.m_size),
-  m_dim(other.m_dim),
-  m_N(other.m_N),
-  m_array_data(0)
-{
-  m_array_data = arrayManager.new_array(m_dim,m_N);
-  int i;
-  #pragma omp parallel for private(i) schedule(static)
-  for (i=0; i<m_size; i++) {
-    m_array_data[i] = other.m_array_data[i];
-  }
-}
-
-DataVector::DataVector(const DataVector::size_type size,
-                       const DataVector::value_type val,
-                       const DataVector::size_type blockSize) :
-  m_size(size),
-  m_dim(blockSize),
-  m_array_data(0)
-{
-  resize(size, val, blockSize);
-}
-
-DataVector::~DataVector()
-{
-  // dispose of data array
-  if (m_array_data!=0) {
-    arrayManager.delete_array(m_array_data);
-  }
-
-  // clear data members
-  m_size = -1;
-  m_dim = -1;
-  m_N = -1;
-  m_array_data = 0;
-}
+namespace {
 
-void
-DataVector::resize(const DataVector::size_type newSize,
-                   const DataVector::value_type newValue,
-                   const DataVector::size_type newBlockSize)
+inline
+void cplxout(std::ostream& os, const DataTypes::cplx_t& c)
 {
-  assert(m_size >= 0);
-
-			// The < 1 is to catch both ==0 and negatives
-  if ( newBlockSize < 1) {
-    ostringstream oss;
-    oss << "DataVector: invalid blockSize specified (" << newBlockSize << ')';    
-    throw DataException(oss.str());
-  }
-
-  if ( newSize < 0 ) {
-    ostringstream oss;
-    oss << "DataVector: invalid new size specified (" << newSize << ')';
-    throw DataException(oss.str());
-  }
-  if ( (newSize % newBlockSize) != 0) {
-    ostringstream oss;
-    oss << "DataVector: newSize is not a multiple of blockSize: (" << newSize << ", " << newBlockSize<< ')';
-    throw DataException(oss.str());
-  }
-
-  if (m_array_data!=0) {
-    arrayManager.delete_array(m_array_data);
-  }
-
-  m_size = newSize;
-  m_dim = newBlockSize;
-  m_N = newSize / newBlockSize;
-  m_array_data = arrayManager.new_array(m_dim,m_N);
-
-  int i;
-  #pragma omp parallel for private(i) schedule(static)
-  for (i=0; i<m_size; i++) {
-    m_array_data[i] = newValue;
-  }
+    os << c.real();
+    if (c.imag()>=0)
+    {
+        os << '+';
+    }
+    os << c.imag() << 'j';
 }
 
-DataVector&
-DataVector::operator=(const DataVector& other)
-{
-  assert(m_size >= 0);
-
-  if (m_array_data!=0) {
-    arrayManager.delete_array(m_array_data);
-  }
-
-  m_size = other.m_size;
-  m_dim = other.m_dim;
-  m_N = other.m_N;
-
-  m_array_data = arrayManager.new_array(m_dim,m_N);
-  int i;
-  #pragma omp parallel for private(i) schedule(static)
-  for (i=0; i<m_size; i++) {
-    m_array_data[i] = other.m_array_data[i];
-  }
-
-  return *this;
 }
 
-bool
-DataVector::operator==(const DataVector& other) const
-{
-  assert(m_size >= 0);
-
-  if (m_size!=other.m_size) {
-    return false;
-  }
-  if (m_dim!=other.m_dim) {
-    return false;
-  }
-  if (m_N!=other.m_N) {
-    return false;
-  }
-  for (int i=0; i<m_size; i++) {
-    if (m_array_data[i] != other.m_array_data[i]) {
-      return false;
-    }
-  }
-  return true;
-}
 
-bool
-DataVector::operator!=(const DataVector& other) const
-{
-  return !(*this==other);
-}
+namespace escript {
 
-void 
-DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies)
-{
-  using DataTypes::ValueType;
-  const DataTypes::ShapeType& tempShape=value.getShape();
-  size_type len=DataTypes::noValues(tempShape);
-  if (offset+len*copies>size())
-  {
-     ostringstream ss;
-     ss << "Error - not enough room for that DataPoint at that offset. (";
-     ss << "offset=" << offset << " + " << " len=" << len << " >= " << size();
-     throw DataException(ss.str());
-  }
-  size_t si=0,sj=0,sk=0,sl=0;
-  switch (value.getRank())
-  {
-  case 0:	
-	for (size_type z=0;z<copies;++z)
-	{
-	   m_array_data[offset+z]=value.getElt();
-	}
-	break;
-  case 1:
-	for (size_type z=0;z<copies;++z)
-	{
-	   for (size_t i=0;i<tempShape[0];++i)
-	   {
-	      m_array_data[offset+i]=value.getElt(i);
-	   }
-	   offset+=len;
-	}
-	break;
-  case 2:
-	si=tempShape[0];
-	sj=tempShape[1];
-	for (size_type z=0;z<copies;++z)
-	{
-           for (ValueType::size_type i=0;i<si;i++)
-	   {
-              for (ValueType::size_type j=0;j<sj;j++)
-	      {
-                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j)]=value.getElt(i,j);
-              }
-           }
-	   offset+=len;
-	}
-	break;
-  case 3:
-	si=tempShape[0];
-	sj=tempShape[1];
-	sk=tempShape[2];
-	for (size_type z=0;z<copies;++z) 
-	{
-          for (ValueType::size_type i=0;i<si;i++)
-	  {
-            for (ValueType::size_type j=0;j<sj;j++)
+   void
+   DataTypes::pointToStream(std::ostream& os, const CplxVectorType::ElementType* data,const ShapeType& shape, int offset, bool needsep, const std::string& sep)
+   {
+      using namespace std;
+      ESYS_ASSERT(data!=0, "Error - data is null");
+//      ESYS_ASSERT(data.size()>0,"Error - Data object is empty.");
+      switch (getRank(shape)) {
+      case 0:
+	 if (needsep)
+	 {
+		os << sep;
+	 }
+	 else
+	 {
+		needsep=true;
+	 }
+         cplxout(os,data[offset]);
+         break;
+      case 1:
+         for (int i=0;i<shape[0];i++) {
+	    if (needsep)
 	    {
-              for (ValueType::size_type k=0;k<sk;k++)
-	      {
-                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k)]=value.getElt(i,j,k);
-              }
+		os << sep;
+	    }
+	    else
+	    {
+		needsep=true;
+	    }
+	    cplxout(os,data[i+offset]);
+         }
+         break;
+      case 2:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+		if (needsep)
+		{
+			os << sep;
+		}
+		else
+		{
+			needsep=true;
+		}
+                cplxout(os,data[offset+getRelIndex(shape,i,j)]);
+            }
+         }
+         break;
+      case 3:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+		   if (needsep)
+		   {
+			os << sep;
+		   }
+		   else
+		   {
+			needsep=true;
+		   }
+                   cplxout(os,data[offset+getRelIndex(shape,i,j,k)]);
+               }
             }
-          }
-	  offset+=len;
-	}
-	break;
-  case 4:
-	si=tempShape[0];
-	sj=tempShape[1];
-	sk=tempShape[2];
-	sl=tempShape[3];
-	for (size_type z=0;z<copies;++z)
-	{
-          for (ValueType::size_type i=0;i<si;i++)
-	  {
-            for (ValueType::size_type j=0;j<sj;j++)
+         }
+         break;
+      case 4:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  for (int l=0;l<shape[3];l++) {
+			if (needsep)
+			{
+				os << sep;
+			}
+			else
+			{
+				needsep=true;
+			}
+			cplxout(os,data[offset+getRelIndex(shape,i,j,k,l)]);
+                  }
+               }
+            }
+         }
+         break;
+      default:
+         stringstream mess;
+         mess << "Error - (pointToStream) Invalid rank: " << getRank(shape);
+         throw DataException(mess.str());
+      }
+   }
+
+
+   void
+   DataTypes::pointToStream(std::ostream& os, const RealVectorType::ElementType* data,const ShapeType& shape, int offset, bool needsep, const std::string& sep)
+   {
+      using namespace std;
+      ESYS_ASSERT(data!=0, "Error - data is null");
+//      ESYS_ASSERT(data.size()>0,"Error - Data object is empty.");
+      switch (getRank(shape)) {
+      case 0:
+	 if (needsep)
+	 {
+		os << sep;
+	 }
+	 else
+	 {
+		needsep=true;
+	 }
+         os << data[offset];
+         break;
+      case 1:
+         for (int i=0;i<shape[0];i++) {
+	    if (needsep)
+	    {
+		os << sep;
+	    }
+	    else
 	    {
-              for (ValueType::size_type k=0;k<sk;k++)
-	      {
-                 for (ValueType::size_type l=0;l<sl;l++)
-		 {
-                    m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k,l)]=value.getElt(i,j,k,l);
-                 }
-              }
+		needsep=true;
+	    }
+	    os << data[i+offset];
+         }
+         break;
+      case 2:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+		if (needsep)
+		{
+			os << sep;
+		}
+		else
+		{
+			needsep=true;
+		}
+                os << data[offset+getRelIndex(shape,i,j)];
             }
-          }
-	  offset+=len;
-	}
-	break;
-  default:
-	ostringstream oss;
-	oss << "Error - unknown rank. Rank=" << value.getRank();
-	throw DataException(oss.str());
-  }
-}
-
-
-void
-DataVector::copyFromArray(const WrappedArray& value, size_type copies)
-{
-  using DataTypes::ValueType;
-  if (m_array_data!=0) {
-    arrayManager.delete_array(m_array_data);
-  }
-  DataTypes::ShapeType tempShape=value.getShape();
-  DataVector::size_type nelements=DataTypes::noValues(tempShape)*copies;
-  m_array_data = arrayManager.new_array(1,nelements);
-  m_size=nelements;	// total amount of elements
-  m_dim=m_size;		// elements per sample
-  m_N=1;			// number of samples
-  copyFromArrayToOffset(value,0,copies);
-}
+         }
+         break;
+      case 3:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+		   if (needsep)
+		   {
+			os << sep;
+		   }
+		   else
+		   {
+			needsep=true;
+		   }
+                   os << data[offset+getRelIndex(shape,i,j,k)];
+               }
+            }
+         }
+         break;
+      case 4:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  for (int l=0;l<shape[3];l++) {
+			if (needsep)
+			{
+				os << sep;
+			}
+			else
+			{
+				needsep=true;
+			}
+			os << data[offset+getRelIndex(shape,i,j,k,l)];
+                  }
+               }
+            }
+         }
+         break;
+      default:
+         stringstream mess;
+         mess << "Error - (pointToStream) Invalid rank: " << getRank(shape);
+         throw DataException(mess.str());
+      }
+   }
+
+
+   std::string
+   DataTypes::pointToString(const CplxVectorType& data,const ShapeType& shape, int offset, const std::string& prefix)
+   {
+      using namespace std;
+      ESYS_ASSERT(data.size()>0,"Error - Data object is empty.");
+      stringstream temp;
+      string finalPrefix=prefix;
+      if (prefix.length() > 0) {
+         finalPrefix+=" ";
+      }
+      switch (getRank(shape)) {
+      case 0:
+         temp << finalPrefix;
+	 cplxout(temp,data[offset]);
+         break;
+      case 1:
+         for (int i=0;i<shape[0];i++) {
+            temp << finalPrefix << "(" << i <<  ") ";
+	    cplxout(temp,data[i+offset]);
+            if (i!=(shape[0]-1)) {
+               temp << endl;
+            }
+         }
+         break;
+      case 2:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               temp << finalPrefix << "(" << i << "," << j << ") ";
+	       	 cplxout(temp,data[offset+getRelIndex(shape,i,j)]);
+               if (!(i==(shape[0]-1) && j==(shape[1]-1))) {
+                  temp << endl;
+               }
+            }
+         }
+         break;
+      case 3:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  temp << finalPrefix << "(" << i << "," << j << "," << k << ") ";
+		  cplxout(temp,data[offset+getRelIndex(shape,i,j,k)]);
+                  if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1))) {
+                     temp << endl;
+                  }
+               }
+            }
+         }
+         break;
+      case 4:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  for (int l=0;l<shape[3];l++) {
+                     temp << finalPrefix << "(" << i << "," << j << "," << k << "," << l << ") ";
+		     cplxout(temp,data[offset+getRelIndex(shape,i,j,k,l)]);
+                     if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1) && l==(shape[3]-1))) {
+                        temp << endl;
+                     }
+                  }
+               }
+            }
+         }
+         break;
+      default:
+         stringstream mess;
+         mess << "Error - (toString) Invalid rank: " << getRank(shape);
+         throw DataException(mess.str());
+      }
+      return temp.str();
+   }
+
+   std::string
+   DataTypes::pointToString(const RealVectorType& data,const ShapeType& shape, int offset, const std::string& prefix)
+   {
+      using namespace std;
+      ESYS_ASSERT(data.size()>0,"Error - Data object is empty.");
+      stringstream temp;
+      string finalPrefix=prefix;
+      if (prefix.length() > 0) {
+         finalPrefix+=" ";
+      }
+      switch (getRank(shape)) {
+      case 0:
+         temp << finalPrefix << data[offset];
+         break;
+      case 1:
+         for (int i=0;i<shape[0];i++) {
+            temp << finalPrefix << "(" << i <<  ") " << data[i+offset];
+            if (i!=(shape[0]-1)) {
+               temp << endl;
+            }
+         }
+         break;
+      case 2:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               temp << finalPrefix << "(" << i << "," << j << ") " << data[offset+getRelIndex(shape,i,j)];
+               if (!(i==(shape[0]-1) && j==(shape[1]-1))) {
+                  temp << endl;
+               }
+            }
+         }
+         break;
+      case 3:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  temp << finalPrefix << "(" << i << "," << j << "," << k << ") " << data[offset+getRelIndex(shape,i,j,k)];
+                  if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1))) {
+                     temp << endl;
+                  }
+               }
+            }
+         }
+         break;
+      case 4:
+         for (int i=0;i<shape[0];i++) {
+            for (int j=0;j<shape[1];j++) {
+               for (int k=0;k<shape[2];k++) {
+                  for (int l=0;l<shape[3];l++) {
+                     temp << finalPrefix << "(" << i << "," << j << "," << k << "," << l << ") " << data[offset+getRelIndex(shape,i,j,k,l)];
+                     if (!(i==(shape[0]-1) && j==(shape[1]-1) && k==(shape[2]-1) && l==(shape[3]-1))) {
+                        temp << endl;
+                     }
+                  }
+               }
+            }
+         }
+         break;
+      default:
+         stringstream mess;
+         mess << "Error - (toString) Invalid rank: " << getRank(shape);
+         throw DataException(mess.str());
+      }
+      return temp.str();
+   }
+
+
+   void DataTypes::copyPoint(RealVectorType& dest, RealVectorType::size_type doffset, RealVectorType::size_type nvals, const RealVectorType& src, RealVectorType::size_type soffset)
+   {
+      ESYS_ASSERT((dest.size()>0&&src.size()>0&&checkOffset(doffset,dest.size(),nvals)),
+                 "Error - Couldn't copy due to insufficient storage.");
+      if (checkOffset(doffset,dest.size(),nvals) && checkOffset(soffset,src.size(),nvals)) {
+         memcpy(&dest[doffset],&src[soffset],sizeof(real_t)*nvals);
+      } else {
+         throw DataException("Error - invalid offset specified.");
+      }
+   }
+
+   void DataTypes::copyPoint(CplxVectorType& dest, CplxVectorType::size_type doffset, CplxVectorType::size_type nvals, const CplxVectorType& src, CplxVectorType::size_type soffset)
+   {
+      ESYS_ASSERT((dest.size()>0&&src.size()>0&&checkOffset(doffset,dest.size(),nvals)),
+                 "Error - Couldn't copy due to insufficient storage.");
+      if (checkOffset(doffset,dest.size(),nvals) && checkOffset(soffset,src.size(),nvals)) {
+         memcpy(&dest[doffset],&src[soffset],sizeof(cplx_t)*nvals);
+      } else {
+         throw DataException("Error - invalid offset specified.");
+      }
+   }
+
+   /**
+    * \brief copy data from a real vector to a complex vector
+    * The complex vector will be resized as needed and any previous
+    * values will be replaced.
+   */
+   void DataTypes::fillComplexFromReal(const RealVectorType& r, CplxVectorType& c)
+   {
+       if (c.size()!=r.size())
+       {
+	   c.resize(r.size(), 0, 1);
+       }
+       size_t limit=r.size();
+       #pragma omp parallel for schedule(static)
+       for (size_t i=0;i<limit;++i)
+       {
+	   c[i]=r[i];
+       }
+   }
 
 } // end of namespace
+
diff --git a/escriptcore/src/DataVector.h b/escriptcore/src/DataVector.h
index 9488b16..0943016 100644
--- a/escriptcore/src/DataVector.h
+++ b/escriptcore/src/DataVector.h
@@ -14,225 +14,385 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_DATAVECTOR_H__
+#define __ESCRIPT_DATAVECTOR_H__
 
-#if !defined escript_DataVector_20050324_H
-#define escript_DataVector_20050324_H
 #include "system_dep.h"
+#include "DataTypes.h"
+#include "Assert.h"
+#include "DataVectorAlt.h"
+#include "DataVectorTaipan.h"
 
-#include "esysUtils/EsysAssert.h"
+// ensure that nobody else tries to instantiate the complex version
+extern template class escript::DataTypes::DataVectorAlt<escript::DataTypes::cplx_t>;
 
-#include <vector>
-#include <iostream>
-#include <fstream>
 
 namespace escript {
 
-class WrappedArray;
-
-/**
-   \brief
-   DataVector implements an arbitrarily long vector of data values.
-   DataVector is the underlying data container for Data objects.
-
-   Description:
-   DataVector provides an implementation of a vector of data values for use
-   by DataBlocks2D and DataArrayView. Hiding the vector in this container
-   allows different implementations to be swapped in without disrupting the
-   client classes.
-*/
-
-class ESCRIPT_DLL_API DataVector {
-
- public:
-
-  //
-  // The type of the elements stored in the vector.
-  typedef double ElementType;
-
-  //
-  // The underlying type used to implement the vector.
-  typedef ElementType *  ValueType;
-  typedef const ElementType * ConstValueType;
-
-  //
-  // Various types exported to clients of this class.
-  typedef ElementType          value_type;
-  typedef long                 size_type;
-  typedef ElementType &        reference;
-  typedef const ElementType &  const_reference;
-
-  /**
-     \brief
-     Default constructor for DataVector.
-
-     Description:
-     Constructs an empty DataVector object.
-  */
-  DataVector();
-
-  /**
-     \brief
-     Copy constructor for DataVector.
-
-     Description:
-     Constructs a DataVector object which is a copy of the
-     given DataVector object.
-  */
-  DataVector(const DataVector& other);
-
-  /**
-     \brief
-     Constructor for DataVector.
-
-     Description:
-     Constructs a DataVector object of length "size" with all elements
-     initilised to "val".
-
-     \param size - Input - Number of elements in the vector.
-     \param val - Input - Initial value for all elements in the vector. Default is 0.0.
-     \param blockSize - Input - size of blocks within the vector, overall vector
-                size must be a precise multiple of the block size. Default is 1.
-
-     In escript::Data, blocksize corresponds to the number of elements required to hold all
-     the data-points for a sample, ie: the product of the dimensions of a data-point and the
-     number of data-points per sample. Size is the total number of elements required to hold
-     all elements for all data-points in the given object, ie: number of samples * blocksize.
-  */
-  DataVector(const size_type size,
-             const value_type val=0.0,
-             const size_type blockSize=1);
-
-  /**
-     \brief
-     Default destructor for DataVector.
-
-     Description:
-     Destroys the current DataVector object.
-  */
-  ~DataVector();
-
-  /**
-     \brief
-     Resize the DataVector to the given length "newSize".
-     All current data is lost. All elements in the new DataVector are
-     initialised to "newVal".
-
-     \param newSize - Input - New size for the vector.
-     \param newVal - Input - New initial value for all elements in the vector.
-     \param newBlockSize - Input - New block size for the vector.
-  */
-  void
-  resize(const size_type newSize,
-         const value_type newVal=0.0,
-         const size_type newBlockSize=1);
-
-  /**
-    \brief 
-    Populates the vector with the data from value.
-    This method currently throws an exception if the specified number of copies won't fit.
-    \warning This function does not attempt to perform shape checking.
-  */
-  void
-  copyFromArray(const escript::WrappedArray& value, size_type copies);
-
-  void 
-  copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies);
-
-
-  /**
-     \brief
-     Return the number of elements in this DataVector.
-  */
-  inline
-  size_type
-  size() const;
-
-  /**
-     \brief
-     DataVector assignment operator "=".
-     Assign the given DataVector object to this.
-  */
-  DataVector&
-  operator=(const DataVector& other);
+// Functions in DataTypes:: which manipulate DataVectors
+namespace DataTypes
+{
+  
+  // This is the main version we had
+  //typedef DataVectorTaipan DataVector;
+  typedef escript::DataTypes::DataVectorAlt<real_t> RealVectorType;//!< Vector to store underlying data.
+  typedef escript::DataTypes::DataVectorAlt<cplx_t> CplxVectorType;
+
+   /**
+      \brief Display a single value (with the specified shape) from the data.
+
+     Despite its similar name this function behaves differently to pointToString.
+     There are no prefixes or (i,j,k) identifiers on each field. each datapoint is printed without
+     new lines.
+     It also works with double* rather than vectors so be careful what you pass it.
+
+     \param os - stream to write to
+     \param data - vector containing the datapoint
+     \param shape - shape of the datapoint
+     \param offset - start of the datapoint within data
+     \param needsep - Does this output need to start with a separator
+     \param sep - separator string to print between components
+   */
+   void
+   pointToStream(std::ostream& os, const RealVectorType::ElementType* data,const ShapeType& shape, int offset, bool needsep=true, const std::string& sep=",");
+
+   /**
+      \brief Display a single value (with the specified shape) from the data.
+
+     Despite its similar name this function behaves differently to pointToString.
+     There are no prefixes or (i,j,k) identifiers on each field. each datapoint is printed without
+     new lines.
+     It also works with double* rather than vectors so be careful what you pass it.
+
+     \param os - stream to write to
+     \param data - vector containing the datapoint
+     \param shape - shape of the datapoint
+     \param offset - start of the datapoint within data
+     \param needsep - Does this output need to start with a separator
+     \param sep - separator string to print between components
+   */
+   void
+   pointToStream(std::ostream& os, const CplxVectorType::ElementType* data,const ShapeType& shape, int offset, bool needsep=true, const std::string& sep=",");
+
+   /**
+      \brief Display a single value (with the specified shape) from the data.
+
+     \param data - vector containing the datapoint
+     \param shape - shape of the datapoint
+     \param offset - start of the datapoint within data
+     \param prefix - string to prepend to the output
+   */
+   std::string
+   pointToString(const RealVectorType& data,const ShapeType& shape, int offset, const std::string& prefix);
+
+
+   std::string
+   pointToString(const CplxVectorType& data,const ShapeType& shape, int offset, const std::string& prefix);
+
+   /**
+      \brief  Copy a point from one vector to another. Note: This version does not check to see if shapes are the same.
+
+   \param dest - vector to copy to
+   \param doffset - beginning of the target datapoint in dest
+   \param nvals - the number of values comprising the datapoint
+   \param src - vector to copy from
+   \param soffset - beginning of the datapoint in src
+   */
+   void copyPoint(RealVectorType& dest, vec_size_type doffset, vec_size_type nvals, const RealVectorType& src, vec_size_type soffset);
+
+   /**
+      \brief  Copy a point from one vector to another. Note: This version does not check to see if shapes are the same.
+
+   \param dest - vector to copy to
+   \param doffset - beginning of the target datapoint in dest
+   \param nvals - the number of values comprising the datapoint
+   \param src - vector to copy from
+   \param soffset - beginning of the datapoint in src
+   */
+   void copyPoint(CplxVectorType& dest, vec_size_type doffset, vec_size_type nvals, const CplxVectorType& src, vec_size_type soffset);
+
+   /**
+    * \brief copy data from a real vector to a complex vector
+    * The complex vector will be resized as needed and any previous
+    * values will be replaced.
+   */
+   void fillComplexFromReal(const RealVectorType& r, CplxVectorType& c);
 
   /**
      \brief
-     DataVector equality comparison operator "==".
-     Return true if the given DataVector is equal to this.
+     Copy a data slice specified by the given region and offset from the
+     "other" vector into the "left" vector at the given offset.
+
+     \param left - vector to copy into
+     \param leftShape - shape of datapoints for the left vector
+     \param leftOffset - location within left to start copying to
+     \param other - vector to copy from
+     \param otherShape - shape of datapoints for the other vector
+     \param otherOffset - location within other vector to start copying from
+     \param region - Input -
+                      Region in other vector to copy data from.
   */
-  bool
-  operator==(const DataVector& other) const;
+   template <class VEC>
+   ESCRIPT_DLL_API
+   void
+   copySlice(VEC& left,
+             const ShapeType& leftShape,
+             typename VEC::size_type leftOffset,
+             const VEC& other,
+             const ShapeType& otherShape,
+             typename VEC::size_type otherOffset,
+             const RegionLoopRangeType& region)
+   {
+      //
+      // Make sure vectors are not empty
+
+      ESYS_ASSERT(!left.size()==0, "left data is empty.");
+      ESYS_ASSERT(!other.size()==0, "other data is empty.");
+
+      //
+      // Check the vector to be sliced from is compatible with the region to be sliced,
+      // and that the region to be sliced is compatible with this vector:
+      ESYS_ASSERT(checkOffset(leftOffset,left.size(),noValues(leftShape)),
+                 "offset incompatible with this vector.");
+      ESYS_ASSERT(otherOffset+noValues(leftShape)<=other.size(),
+                 "offset incompatible with other vector.");
+
+      ESYS_ASSERT(getRank(otherShape)==region.size(),
+                 "slice not same rank as vector to be sliced from.");
+
+      ESYS_ASSERT(noValues(leftShape)==noValues(getResultSliceShape(region)),
+                 "slice shape not compatible shape for this vector.");
+
+      //
+      // copy the values in the specified region of the other vector into this vector
+
+      // the following loops cannot be parallelised due to the numCopy counter
+      int numCopy=0;
+
+      switch (region.size()) {
+      case 0:
+         /* this case should never be encountered,
+         as python will never pass us an empty region.
+         here for completeness only, allows slicing of a scalar */
+//          (*m_data)[leftOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex()];
+
+         left[leftOffset+numCopy]=other[otherOffset];
+         numCopy++;
+         break;
+      case 1:
+         for (int i=region[0].first;i<region[0].second;i++) {
+            left[leftOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i)];
+            numCopy++;
+         }
+         break;
+      case 2:
+         for (int j=region[1].first;j<region[1].second;j++) {
+            for (int i=region[0].first;i<region[0].second;i++) {
+/*               (*m_data)[leftOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j)];*/
+               left[leftOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j)];
+               numCopy++;
+            }
+         }
+         break;
+      case 3:
+         for (int k=region[2].first;k<region[2].second;k++) {
+            for (int j=region[1].first;j<region[1].second;j++) {
+               for (int i=region[0].first;i<region[0].second;i++) {
+//                  (*m_data)[leftOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j,k)];
+                  left[leftOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j,k)];
+                  numCopy++;
+               }
+            }
+         }
+         break;
+      case 4:
+         for (int l=region[3].first;l<region[3].second;l++) {
+            for (int k=region[2].first;k<region[2].second;k++) {
+               for (int j=region[1].first;j<region[1].second;j++) {
+                  for (int i=region[0].first;i<region[0].second;i++) {
+/*                     (*m_data)[leftOffset+numCopy]=(*other.m_data)[otherOffset+other.relIndex(i,j,k,l)];*/
+                     left[leftOffset+numCopy]=other[otherOffset+getRelIndex(otherShape,i,j,k,l)];
+                     numCopy++;
+                  }
+               }
+            }
+         }
+         break;
+      default:
+         std::stringstream mess;
+         mess << "Error - (copySlice) Invalid slice region rank: " << region.size();
+         throw DataException(mess.str());
+      }
+   }
 
   /**
      \brief
-     DataVector inequality comparison operator "!=".
-     Return true if the given DataVector is not equal to this.
-  */
-  bool
-  operator!=(const DataVector& other) const;
-
-  /**
-    \brief
-    Return a reference to the element at position i in this DataVector.
-    Will throw an exception if an invalid index "i" is given.
-
-    NB: access to the element one past the end of the vector is permitted
-    in order to provide a facility equivalent to an end() pointer.
+     Copy data into a slice specified by the given region and offset in
+     the left vector from the other vector at the given offset.
+
+     \param left - vector to copy into
+     \param leftShape - shape of datapoints for the left vector
+     \param leftOffset - location within left to start copying to
+     \param other - vector to copy from
+     \param otherShape - shape of datapoints for the other vector
+     \param otherOffset - location within other vector to start copying from
+     \param region - Input -
+                      Region in the left vector to copy data to.
   */
-  inline
-  reference
-  operator[](const size_type i);
-
-  inline
-  const_reference
-  operator[](const size_type i) const;
-
-
- protected:
-
- private:
-
-  size_type m_size;
-  size_type m_dim;
-  size_type m_N;
-
-  //
-  // The container for the elements contained in this DataVector.
-  ValueType m_array_data;
-};
-
-/**
-  \brief
-  releases unused memory in the memory manager.
-*/
-                                                                                                                                                                                                     
-ESCRIPT_DLL_API void releaseUnusedMemory();
-                                                                                                                                                                                                     
-
-
-inline
-DataVector::size_type
-DataVector::size() const
-{
-  return m_size;
-}
-
-inline
-DataVector::reference
-DataVector::operator[](const DataVector::size_type i)
-{
-  EsysAssert(i<size(),"DataVector: invalid index specified. " << i << " of " << size());
-  return m_array_data[i];
-}
-
-inline
-DataVector::const_reference
-DataVector::operator[](const DataVector::size_type i) const
-{
-  EsysAssert(i<size(),"DataVector: invalid index specified. " << i << " of " << size());
-  return m_array_data[i];
+   template<typename VEC>
+   ESCRIPT_DLL_API
+   void
+   copySliceFrom(VEC& left,
+                 const ShapeType& leftShape,
+                 typename VEC::size_type leftOffset,
+                 const VEC& other,
+                 const ShapeType& otherShape,
+                 typename VEC::size_type otherOffset,
+                 const RegionLoopRangeType& region)
+   {
+      //
+      // Make sure vectors are not empty
+
+      ESYS_ASSERT(left.size()!=0, "this vector is empty.");
+      ESYS_ASSERT(other.size()!=0, "other vector is empty.");
+
+      //
+      // Check this vector is compatible with the region to be sliced,
+      // and that the region to be sliced is compatible with the other vector:
+
+      ESYS_ASSERT(checkOffset(otherOffset,other.size(),noValues(otherShape)),
+                 "offset incompatible with other vector.");
+      ESYS_ASSERT(leftOffset+noValues(otherShape)<=left.size(),
+                 "offset incompatible with this vector.");
+
+      ESYS_ASSERT(getRank(leftShape)==region.size(),
+                 "slice not same rank as this vector.");
+
+      ESYS_ASSERT(getRank(otherShape)==0 || noValues(otherShape)==noValues(getResultSliceShape(region)),
+                 "slice shape not compatible shape for other vector.");
+
+      //
+      // copy the values in the other vector into the specified region of this vector
+
+      // allow for case where other vector is a scalar
+      if (getRank(otherShape)==0) {
+
+         // the following loops cannot be parallelised due to the numCopy counter
+         int numCopy=0;
+
+         switch (region.size()) {
+         case 0:
+            /* this case should never be encountered,
+            as python will never pass us an empty region.
+            here for completeness only, allows slicing of a scalar */
+            //(*m_data)[leftOffset+relIndex()]=(*other.m_data)[otherOffset];
+            left[leftOffset]=other[otherOffset];
+            numCopy++;
+            break;
+         case 1:
+            for (int i=region[0].first;i<region[0].second;i++) {
+               left[leftOffset+getRelIndex(leftShape,i)]=other[otherOffset];
+               numCopy++;
+            }
+            break;
+         case 2:
+            for (int j=region[1].first;j<region[1].second;j++) {
+               for (int i=region[0].first;i<region[0].second;i++) {
+                  left[leftOffset+getRelIndex(leftShape,i,j)]=other[otherOffset];
+                  numCopy++;
+               }
+            }
+            break;
+         case 3:
+            for (int k=region[2].first;k<region[2].second;k++) {
+               for (int j=region[1].first;j<region[1].second;j++) {
+                  for (int i=region[0].first;i<region[0].second;i++) {
+                     left[leftOffset+getRelIndex(leftShape,i,j,k)]=other[otherOffset];
+                     numCopy++;
+                  }
+               }
+            }
+            break;
+         case 4:
+            for (int l=region[3].first;l<region[3].second;l++) {
+               for (int k=region[2].first;k<region[2].second;k++) {
+                  for (int j=region[1].first;j<region[1].second;j++) {
+                     for (int i=region[0].first;i<region[0].second;i++) {
+                        left[leftOffset+getRelIndex(leftShape,i,j,k,l)]=other[otherOffset];
+                        numCopy++;
+                     }
+                  }
+               }
+            }
+            break;
+         default:
+            std::stringstream mess;
+            mess << "Error - (copySliceFrom) Invalid slice region rank: " << region.size();
+            throw DataException(mess.str());
+         }
+
+      } else {
+
+         // the following loops cannot be parallelised due to the numCopy counter
+         int numCopy=0;
+
+         switch (region.size()) {
+         case 0:
+            /* this case should never be encountered,
+            as python will never pass us an empty region.
+            here for completeness only, allows slicing of a scalar */
+            //(*m_data)[leftOffset+relIndex()]=(*other.m_data)[otherOffset+numCopy];
+            left[leftOffset]=other[otherOffset+numCopy];
+            numCopy++;
+            break;
+         case 1:
+            for (int i=region[0].first;i<region[0].second;i++) {
+               left[leftOffset+getRelIndex(leftShape,i)]=other[otherOffset+numCopy];
+               numCopy++;
+            }
+            break;
+         case 2:
+            for (int j=region[1].first;j<region[1].second;j++) {
+               for (int i=region[0].first;i<region[0].second;i++) {
+                  left[leftOffset+getRelIndex(leftShape,i,j)]=other[otherOffset+numCopy];
+                  numCopy++;
+               }
+            }
+            break;
+         case 3:
+            for (int k=region[2].first;k<region[2].second;k++) {
+               for (int j=region[1].first;j<region[1].second;j++) {
+                  for (int i=region[0].first;i<region[0].second;i++) {
+                     left[leftOffset+getRelIndex(leftShape,i,j,k)]=other[otherOffset+numCopy];
+                     numCopy++;
+                  }
+               }
+            }
+            break;
+         case 4:
+            for (int l=region[3].first;l<region[3].second;l++) {
+               for (int k=region[2].first;k<region[2].second;k++) {
+                  for (int j=region[1].first;j<region[1].second;j++) {
+                     for (int i=region[0].first;i<region[0].second;i++) {
+                        left[leftOffset+getRelIndex(leftShape,i,j,k,l)]=other[otherOffset+numCopy];
+                        numCopy++;
+                     }
+                  }
+               }
+            }
+            break;
+         default:
+            std::stringstream mess;
+            mess << "Error - (copySliceFrom) Invalid slice region rank: " << region.size();
+            throw DataException(mess.str());
+         }
+
+      }
+
+   }
 }
 
 } // end of namespace
 
-#endif
+#endif // __ESCRIPT_DATAVECTOR_H__
+
diff --git a/escriptcore/src/DataVectorAlt.cpp b/escriptcore/src/DataVectorAlt.cpp
new file mode 100644
index 0000000..4e5dca3
--- /dev/null
+++ b/escriptcore/src/DataVectorAlt.cpp
@@ -0,0 +1,113 @@
+#include "DataVectorAlt.h"
+
+/* This file exists to provide a custom implementation of complex methods for DataVectorAlt
+   It also explicitly instantiates the complex version of the template to ensure linkage
+*/   
+
+namespace escript
+{
+  
+namespace DataTypes
+{
+
+// Please make sure that any implementation changes here are reflected in the generic version in the .h file
+template<>
+void 
+DataVectorAlt<DataTypes::cplx_t>::copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies)
+{
+  const DataTypes::ShapeType& tempShape=value.getShape();
+  size_type len=DataTypes::noValues(tempShape);
+  if (offset+len*copies>size())
+  {
+     std::ostringstream ss;
+     ss << "Error - not enough room for that DataPoint at that offset. (";
+     ss << "offset=" << offset << " + " << " len=" << len << " >= " << size();
+     throw DataException(ss.str());
+  }
+  size_type si=0,sj=0,sk=0,sl=0;
+  switch (value.getRank())
+  {
+  case 0:	
+	for (size_type z=0;z<copies;++z)
+	{
+	   m_array_data[offset+z]=value.getEltC();
+	}
+	break;
+  case 1:
+	for (size_type z=0;z<copies;++z)
+	{
+	   for (size_t i=0;i<tempShape[0];++i)
+	   {
+	      m_array_data[offset+i]=value.getEltC(i);
+	   }
+	   offset+=len;
+	}
+	break;
+  case 2:
+	si=tempShape[0];
+	sj=tempShape[1];
+	for (size_type z=0;z<copies;++z)
+	{
+           for (size_type i=0;i<si;i++)
+	   {
+              for (size_type j=0;j<sj;j++)
+	      {
+                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j)]=value.getEltC(i,j);
+              }
+           }
+	   offset+=len;
+	}
+	break;
+  case 3:
+	si=tempShape[0];
+	sj=tempShape[1];
+	sk=tempShape[2];
+	for (size_type z=0;z<copies;++z) 
+	{
+          for (size_type i=0;i<si;i++)
+	  {
+            for (size_type j=0;j<sj;j++)
+	    {
+              for (size_type k=0;k<sk;k++)
+	      {
+                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k)]=value.getEltC(i,j,k);
+              }
+            }
+          }
+	  offset+=len;
+	}
+	break;
+  case 4:
+	si=tempShape[0];
+	sj=tempShape[1];
+	sk=tempShape[2];
+	sl=tempShape[3];
+	for (size_type z=0;z<copies;++z)
+	{
+          for (size_type i=0;i<si;i++)
+	  {
+            for (size_type j=0;j<sj;j++)
+	    {
+              for (size_type k=0;k<sk;k++)
+	      {
+                 for (size_type l=0;l<sl;l++)
+		 {
+                    m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k,l)]=value.getEltC(i,j,k,l);
+                 }
+              }
+            }
+          }
+	  offset+=len;
+	}
+	break;
+  default:
+	std::ostringstream oss;
+	oss << "Error - unknown rank. Rank=" << value.getRank();
+	throw DataException(oss.str());
+  }
+}
+
+template class DataVectorAlt<DataTypes::cplx_t>;
+
+}	// end namespace
+}	// end namespace
diff --git a/escriptcore/src/DataVectorAlt.h b/escriptcore/src/DataVectorAlt.h
new file mode 100644
index 0000000..00030d1
--- /dev/null
+++ b/escriptcore/src/DataVectorAlt.h
@@ -0,0 +1,485 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+#ifndef __ESCRIPT_DATAVECTORALT_H__
+#define __ESCRIPT_DATAVECTORALT_H__
+
+#include "DataTypes.h"
+#include "system_dep.h"
+#include "Assert.h"
+#include "DataException.h"
+#include "WrappedArray.h"
+
+#include <sstream>
+
+namespace escript
+{
+
+namespace DataTypes
+{
+
+template <class T>
+class ESCRIPT_DLL_API DataVectorAlt {
+
+ public:
+
+  //
+  // The type of the elements stored in the vector.
+  typedef T ElementType;
+
+  //
+  // Various types exported to clients of this class.
+  
+  typedef const ElementType * const_pointer;
+  typedef ElementType          value_type;
+  typedef DataTypes::vec_size_type size_type;
+  typedef ElementType &        reference;
+  typedef const ElementType &  const_reference;
+
+  /**
+     \brief
+     Default constructor for DataVectorAlt.
+
+     Description:
+     Constructs an empty DataVectorAlt object.
+  */
+  DataVectorAlt();
+
+  /**
+     \brief
+     Copy constructor for DataVectorAlt.
+
+     Description:
+     Constructs a DataVectorAlt object which is a copy of the
+     given DataVectorAlt object.
+  */
+  DataVectorAlt(const DataVectorAlt<T>& other);
+
+  /**
+     \brief
+     Constructor for DataVectorAlt.
+
+     Description:
+     Constructs a DataVectorAlt object of length "size" with all elements
+     initilised to "val".
+
+     \param size - Input - Number of elements in the vector.
+     \param val - Input - Initial value for all elements in the vector. Default is 0.0.
+     \param blockSize - Input - size of blocks within the vector, overall vector
+                size must be a precise multiple of the block size. Default is 1.
+
+     In escript::Data, blocksize corresponds to the number of elements required to hold all
+     the data-points for a sample, ie: the product of the dimensions of a data-point and the
+     number of data-points per sample. Size is the total number of elements required to hold
+     all elements for all data-points in the given object, ie: number of samples * blocksize.
+  */
+  DataVectorAlt(const size_type size,
+             const value_type val=0.0,
+             const size_type blockSize=1);
+
+  /**
+     \brief
+     Default destructor for DataVectorAlt.
+
+     Description:
+     Destroys the current DataVectorAlt object.
+  */
+  ~DataVectorAlt();
+
+  /**
+     \brief
+     Resize the DataVectorAlt to the given length "newSize".
+     All current data is lost. All elements in the new DataVectorAlt are
+     initialised to "newVal".
+
+     \param newSize - Input - New size for the vector.
+     \param newVal - Input - New initial value for all elements in the vector.
+     \param newBlockSize - Input - New block size for the vector.
+  */
+  void
+  resize(const size_type newSize,
+         const value_type newVal=0.0,
+         const size_type newBlockSize=1);
+
+  /**
+    \brief 
+    Populates the vector with the data from value.
+    This method currently throws an exception if the specified number of copies won't fit.
+    \warning This function does not attempt to perform shape checking.
+  */
+  void
+  copyFromArray(const WrappedArray& value, size_type copies);
+
+  
+  // Please make sure that any implementation changes here are reflected in the specialised 
+  // version in the .cpp file
+  void 
+  copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies);
+
+
+  /**
+     \brief
+     Return the number of elements in this DataVectorAlt.
+  */
+  inline
+  size_type
+  size() const;
+
+  /**
+     \brief
+     DataVectorAlt assignment operator "=".
+     Assign the given DataVectorAlt object to this.
+  */
+  DataVectorAlt&
+  operator=(const DataVectorAlt<T>& other);
+
+  /**
+     \brief
+     DataVectorAlt equality comparison operator "==".
+     Return true if the given DataVectorAlt is equal to this.
+  */
+  bool
+  operator==(const DataVectorAlt<T>& other) const;
+
+  /**
+     \brief
+     DataVectorAlt inequality comparison operator "!=".
+     Return true if the given DataVectorAlt is not equal to this.
+  */
+  bool
+  operator!=(const DataVectorAlt<T>& other) const;
+
+  /**
+    \brief
+    Return a reference to the element at position i in this DataVectorAlt.
+    Will throw an exception if an invalid index "i" is given.
+
+    NB: access to the element one past the end of the vector is permitted
+    in order to provide a facility equivalent to an end() pointer.
+  */
+  inline
+  reference
+  operator[](const size_type i);
+
+  inline
+  const_reference
+  operator[](const size_type i) const;
+
+
+ protected:
+
+ private:
+
+  size_type m_size;
+  size_type m_dim;
+  size_type m_N;
+
+  ElementType* m_array_data;
+};
+
+
+template <class T>
+inline
+typename DataVectorAlt<T>::size_type
+DataVectorAlt<T>::size() const
+{
+  return m_size;
+}
+
+template <class T>
+inline
+typename DataVectorAlt<T>::reference
+DataVectorAlt<T>::operator[](const DataVectorAlt::size_type i)
+{
+  ESYS_ASSERT(i<size(), "DataVectorAlt: invalid index specified, " << i << " of " << size());
+  return m_array_data[i];
+}
+
+template <class T>
+inline
+typename DataVectorAlt<T>::const_reference
+DataVectorAlt<T>::operator[](const DataVectorAlt::size_type i) const
+{
+  ESYS_ASSERT(i<size(), "DataVectorAlt: invalid index specified. " << i << " of " << size());
+  return m_array_data[i];
+}
+
+
+  
+template <class T>
+DataTypes::DataVectorAlt<T>::DataVectorAlt() :
+  m_size(0),
+  m_dim(0),
+  m_N(0),
+  m_array_data(0)
+{
+}
+
+template <class T>
+DataTypes::DataVectorAlt<T>::DataVectorAlt(const DataVectorAlt& other) :
+  m_size(other.m_size),
+  m_dim(other.m_dim),
+  m_N(other.m_N),
+  m_array_data(0)
+{
+  m_array_data=reinterpret_cast<T*>(malloc(sizeof(T)*m_size));  
+  int i;
+  #pragma omp parallel for private(i) schedule(static)
+  for (i=0; i<m_size; i++) {
+    m_array_data[i] = other.m_array_data[i];
+  }
+}
+
+template <class T>
+DataTypes::DataVectorAlt<T>::DataVectorAlt(const DataVectorAlt<T>::size_type size,
+                       const DataVectorAlt<T>::value_type val,
+                       const DataVectorAlt<T>::size_type blockSize) :
+  m_size(size),
+  m_dim(blockSize),
+  m_array_data(0)
+{
+  resize(size, val, blockSize);
+}
+
+template <class T>
+DataTypes::DataVectorAlt<T>::~DataVectorAlt()
+{
+  // clear data members
+  m_size = -1;
+  m_dim = -1;
+  m_N = -1;
+  if (m_array_data!=0)
+  {
+      free(m_array_data);
+  }
+  m_array_data=0;
+}
+
+template <class T>
+void
+DataVectorAlt<T>::resize(const DataVectorAlt<T>::size_type newSize,
+                   const DataVectorAlt<T>::value_type newValue,
+                   const DataVectorAlt<T>::size_type newBlockSize)
+{
+        // The < 1 is to catch both ==0 and negatives
+  if ( newBlockSize < 1) {
+    std::ostringstream oss;
+    oss << "DataVectorAlt: invalid blockSize specified (" << newBlockSize << ')';    
+    throw DataException(oss.str());
+  }
+
+  if ( newSize < 0 ) {
+    std::ostringstream oss;
+    oss << "DataVectorAlt: invalid new size specified (" << newSize << ')';
+    throw DataException(oss.str());
+  }
+  if ( (newSize % newBlockSize) != 0) {
+    std::ostringstream oss;
+    oss << "DataVectorAlt: newSize is not a multiple of blockSize: (" << newSize << ", " << newBlockSize<< ')';
+    throw DataException(oss.str());
+  }
+
+  m_size = newSize;
+  m_dim = newBlockSize;
+  m_N = newSize / newBlockSize;
+
+  if (m_array_data!=0)
+  {
+     free(m_array_data);
+  } 
+  m_array_data=reinterpret_cast<T*>(malloc(sizeof(T)*m_size));  
+  int i;
+  #pragma omp parallel for private(i) schedule(static)
+  for (i=0; i<m_size; i++) {
+    m_array_data[i] = newValue;
+  }
+}
+
+template <class T>
+DataVectorAlt<T>&
+DataVectorAlt<T>::operator=(const DataVectorAlt& other)
+{
+  assert(m_size >= 0);
+
+
+  m_size = other.m_size;
+  m_dim = other.m_dim;
+  m_N = other.m_N;
+
+  if (m_array_data!=0)
+  {
+      free(m_array_data);
+  }
+  m_array_data=reinterpret_cast<T*>(malloc(sizeof(T)*m_size));
+  int i;
+  #pragma omp parallel for private(i) schedule(static)
+  for (i=0; i<m_size; i++) {
+    m_array_data[i] = other.m_array_data[i];
+  }
+
+  return *this;
+}
+
+template <class T>
+bool
+DataVectorAlt<T>::operator==(const DataVectorAlt& other) const
+{
+  assert(m_size >= 0);
+
+  if (m_size!=other.m_size) {
+    return false;
+  }
+  if (m_dim!=other.m_dim) {
+    return false;
+  }
+  if (m_N!=other.m_N) {
+    return false;
+  }
+  for (int i=0; i<m_size; i++) {
+    if (m_array_data[i] != other.m_array_data[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class T>
+bool
+DataVectorAlt<T>::operator!=(const DataVectorAlt& other) const
+{
+  return !(*this==other);
+}
+
+template <class T>
+void 
+DataVectorAlt<T>::copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies)
+{
+  const DataTypes::ShapeType& tempShape=value.getShape();
+  size_type len=DataTypes::noValues(tempShape);
+  if (offset+len*copies>size())
+  {
+     std::ostringstream ss;
+     ss << "Error - not enough room for that DataPoint at that offset. (";
+     ss << "offset=" << offset << " + " << " len=" << len << " >= " << size();
+     throw DataException(ss.str());
+  }
+  size_type si=0,sj=0,sk=0,sl=0;
+  switch (value.getRank())
+  {
+  case 0:       
+        for (size_type z=0;z<copies;++z)
+        {
+           m_array_data[offset+z]=value.getElt();
+        }
+        break;
+  case 1:
+        for (size_type z=0;z<copies;++z)
+        {
+           for (size_t i=0;i<tempShape[0];++i)
+           {
+              m_array_data[offset+i]=value.getElt(i);
+           }
+           offset+=len;
+        }
+        break;
+  case 2:
+        si=tempShape[0];
+        sj=tempShape[1];
+        for (size_type z=0;z<copies;++z)
+        {
+           for (size_type i=0;i<si;i++)
+           {
+              for (size_type j=0;j<sj;j++)
+              {
+                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j)]=value.getElt(i,j);
+              }
+           }
+           offset+=len;
+        }
+        break;
+  case 3:
+        si=tempShape[0];
+        sj=tempShape[1];
+        sk=tempShape[2];
+        for (size_type z=0;z<copies;++z) 
+        {
+          for (size_type i=0;i<si;i++)
+          {
+            for (size_type j=0;j<sj;j++)
+            {
+              for (size_type k=0;k<sk;k++)
+              {
+                 m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k)]=value.getElt(i,j,k);
+              }
+            }
+          }
+          offset+=len;
+        }
+        break;
+  case 4:
+        si=tempShape[0];
+        sj=tempShape[1];
+        sk=tempShape[2];
+        sl=tempShape[3];
+        for (size_type z=0;z<copies;++z)
+        {
+          for (size_type i=0;i<si;i++)
+          {
+            for (size_type j=0;j<sj;j++)
+            {
+              for (size_type k=0;k<sk;k++)
+              {
+                 for (size_type l=0;l<sl;l++)
+                 {
+                    m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k,l)]=value.getElt(i,j,k,l);
+                 }
+              }
+            }
+          }
+          offset+=len;
+        }
+        break;
+  default:
+        std::ostringstream oss;
+        oss << "Error - unknown rank. Rank=" << value.getRank();
+        throw DataException(oss.str());
+  }
+}
+
+template <class T>
+void
+DataVectorAlt<T>::copyFromArray(const WrappedArray& value, size_type copies)
+{
+  DataTypes::ShapeType tempShape=value.getShape();
+  DataVectorAlt<T>::size_type nelements=DataTypes::noValues(tempShape)*copies;
+  if (m_array_data!=0)
+  {
+    free(m_array_data);
+  }
+  m_array_data=reinterpret_cast<T*>(malloc(sizeof(T)*nelements));
+  m_size=nelements;     // total amount of elements
+  m_dim=m_size;         // elements per sample
+  m_N=1;                        // number of samples
+  copyFromArrayToOffset(value,0,copies);
+}
+
+
+
+} // end of namespace
+} // end of namespace
+
+#endif // __ESCRIPT_DATAVECTORALT_H__
diff --git a/escriptcore/src/DataVectorOps.cpp b/escriptcore/src/DataVectorOps.cpp
new file mode 100644
index 0000000..9912871
--- /dev/null
+++ b/escriptcore/src/DataVectorOps.cpp
@@ -0,0 +1,999 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "DataVectorOps.h"
+#include "DataTypes.h"
+
+#include <sstream>
+
+namespace
+{
+const int SUCCESS=0;
+const int BADRANK=1;
+const int NOTSQUARE=2;
+const int SHAPEMISMATCH=3;
+const int NOINVERSE=4;
+const int NEEDLAPACK=5;
+const int ERRFACTORISE=6;
+const int ERRINVERT=7;
+}
+
+namespace escript
+{
+
+  void
+  matMult(const DataTypes::RealVectorType& left, 
+	  const DataTypes::ShapeType& leftShape,
+	  DataTypes::RealVectorType::size_type leftOffset,
+          const DataTypes::RealVectorType& right,
+   	  const DataTypes::ShapeType& rightShape,
+	  DataTypes::RealVectorType::size_type rightOffset,
+          DataTypes::RealVectorType& result,
+	  const DataTypes::ShapeType& resultShape)
+   {
+      using namespace escript::DataTypes;
+      using namespace std; 
+
+      int leftRank=getRank(leftShape);
+      int rightRank=getRank(rightShape);
+      int resultRank=getRank(resultShape);
+      if (leftRank==0 || rightRank==0) {
+         stringstream temp;
+         temp << "Error - (matMult) Invalid for rank 0 objects.";
+         throw DataException(temp.str());
+      }
+
+      if (leftShape[leftRank-1] != rightShape[0]) {
+         stringstream temp;
+         temp << "Error - (matMult) Dimension: " << leftRank 
+              << ", size: " << leftShape[leftRank-1] 
+              << " of LHS and dimension: 1, size: " << rightShape[0]
+              << " of RHS don't match.";
+         throw DataException(temp.str());
+      }
+
+      int outputRank = leftRank+rightRank-2;
+
+      if (outputRank < 0) {
+         stringstream temp;
+         temp << "Error - (matMult) LHS and RHS cannot be multiplied "
+              << "as they have incompatible rank.";
+         throw DataException(temp.str());
+      }
+
+      if (outputRank != resultRank) {
+         stringstream temp;
+         temp << "Error - (matMult) Rank of result array is: " 
+              << resultRank 
+              << " it must be: " << outputRank;
+         throw DataException(temp.str());
+      }
+
+      for (int i=0; i<(leftRank-1); i++) {
+         if (leftShape[i] != resultShape[i]) {
+            stringstream temp;
+            temp << "Error - (matMult) Dimension: " << i 
+                 << " of LHS and result array don't match.";
+            throw DataException(temp.str());
+         }
+      }
+
+      for (int i=1; i<rightRank; i++) {
+         if (rightShape[i] != resultShape[i+leftRank-2]) {
+            stringstream temp;
+            temp << "Error - (matMult) Dimension: " << i
+                 << ", size: " << rightShape[i]
+                 << " of RHS and dimension: " << i+leftRank-1 
+                 << ", size: " << resultShape[i+leftRank-1]
+                 << " of result array don't match.";
+            throw DataException(temp.str());
+         }
+      }
+
+      switch (leftRank) {
+
+      case 1:
+         switch (rightRank) {
+         case 1:
+            result[0]=0;
+            for (int i=0;i<leftShape[0];i++) {
+               result[0]+=left[i+leftOffset]*right[i+rightOffset];
+            }
+            break;
+         case 2:
+            for (int i=0;i<resultShape[0];i++) {
+               result[i]=0;
+               for (int j=0;j<rightShape[0];j++) {
+                  result[i]+=left[j+leftOffset]*right[getRelIndex(rightShape,j,i)+rightOffset];
+               }
+            }
+            break;
+         default:
+            stringstream temp; temp << "Error - (matMult) Invalid rank. Programming error.";
+            throw DataException(temp.str());
+            break;
+         }
+         break;
+
+      case 2:
+         switch (rightRank) {
+         case 1:
+            result[0]=0;
+            for (int i=0;i<leftShape[0];i++) {
+               result[i]=0;
+               for (int j=0;j<leftShape[1];j++) {
+                  result[i]+=left[leftOffset+getRelIndex(leftShape,i,j)]*right[i+rightOffset];
+               }
+            }
+	    break;
+         case 2:
+            for (int i=0;i<resultShape[0];i++) {
+               for (int j=0;j<resultShape[1];j++) {
+                  result[getRelIndex(resultShape,i,j)]=0;
+                  for (int jR=0;jR<rightShape[0];jR++) {
+                     result[getRelIndex(resultShape,i,j)]+=left[leftOffset+getRelIndex(leftShape,i,jR)]*right[rightOffset+getRelIndex(rightShape,jR,j)];
+                  }
+               }
+            }
+            break;
+         default:
+            stringstream temp; temp << "Error - (matMult) Invalid rank. Programming error.";
+            throw DataException(temp.str());
+            break;
+         }
+         break;
+
+      default:
+         stringstream temp; temp << "Error - (matMult) Not supported for rank: " << leftRank;
+         throw DataException(temp.str());
+         break;
+      }
+
+   }
+
+
+   DataTypes::ShapeType
+   determineResultShape(const DataTypes::ShapeType& left,
+                       const DataTypes::ShapeType& right)
+   {
+      DataTypes::ShapeType result;
+      for (int i=0; i<(DataTypes::getRank(left)-1); i++) {
+         result.push_back(left[i]);
+      }
+      for (int i=1; i<DataTypes::getRank(right); i++) {
+         result.push_back(right[i]);
+      }
+      return result;
+   }
+
+
+
+
+void matrixInverseError(int err)
+{
+    switch (err)
+    {
+    case 0: break;	// not an error
+    case BADRANK: throw DataException("matrix_inverse: input and output must be rank 2.");
+    case NOTSQUARE: throw DataException("matrix_inverse: matrix must be square.");
+    case SHAPEMISMATCH: throw DataException("matrix_inverse: programmer error input and output must be the same shape.");
+    case NOINVERSE: throw DataException("matrix_inverse: argument not invertible.");
+    case NEEDLAPACK:throw DataException("matrix_inverse: matrices larger than 3x3 require lapack support."); 
+    case ERRFACTORISE: throw DataException("matrix_inverse: argument not invertible (factorise stage).");
+    case ERRINVERT: throw DataException("matrix_inverse: argument not invertible (inverse stage).");
+    default:
+	throw DataException("matrix_inverse: unknown error.");
+    }
+}
+
+
+
+// Copied from the python version in util.py
+int
+matrix_inverse(const DataTypes::RealVectorType& in, 
+	    const DataTypes::ShapeType& inShape,
+            DataTypes::RealVectorType::size_type inOffset,
+            DataTypes::RealVectorType& out,
+	    const DataTypes::ShapeType& outShape,
+            DataTypes::RealVectorType::size_type outOffset,
+	    int count,
+	    LapackInverseHelper& helper)
+{
+    using namespace DataTypes;
+    using namespace std;
+    int inRank=getRank(inShape);
+    int outRank=getRank(outShape);
+    int size=DataTypes::noValues(inShape);
+    if ((inRank!=2) || (outRank!=2))
+    {
+	return BADRANK;		
+    }
+    if (inShape[0]!=inShape[1])
+    {
+	return NOTSQUARE; 		
+    }
+    if (inShape!=outShape)
+    {
+	return SHAPEMISMATCH;	
+    }
+    if (inShape[0]==1)
+    {
+	for (int i=0;i<count;++i)
+	{
+	    if (in[inOffset+i]!=0)
+	    {
+	    	out[outOffset+i]=1/in[inOffset+i];
+	    }
+	    else
+	    {
+		return NOINVERSE;
+	    }
+	}
+    }
+    else if (inShape[0]==2)
+    {
+	int step=0;
+	for (int i=0;i<count;++i)
+	{	
+          double A11=in[inOffset+step+getRelIndex(inShape,0,0)];
+          double A12=in[inOffset+step+getRelIndex(inShape,0,1)];
+          double A21=in[inOffset+step+getRelIndex(inShape,1,0)];
+          double A22=in[inOffset+step+getRelIndex(inShape,1,1)];
+          double D = A11*A22-A12*A21;
+	  if (D!=0)
+	  {
+          	D=1/D;
+		out[outOffset+step+getRelIndex(inShape,0,0)]= A22*D;
+         	out[outOffset+step+getRelIndex(inShape,1,0)]=-A21*D;
+          	out[outOffset+step+getRelIndex(inShape,0,1)]=-A12*D;
+          	out[outOffset+step+getRelIndex(inShape,1,1)]= A11*D;
+	  }
+	  else
+	  {
+		return NOINVERSE;
+	  }
+	  step+=size;
+	}
+    }
+    else if (inShape[0]==3)
+    {
+	int step=0;
+	for (int i=0;i<count;++i)
+	{	
+          double A11=in[inOffset+step+getRelIndex(inShape,0,0)];
+          double A21=in[inOffset+step+getRelIndex(inShape,1,0)];
+          double A31=in[inOffset+step+getRelIndex(inShape,2,0)];
+          double A12=in[inOffset+step+getRelIndex(inShape,0,1)];
+          double A22=in[inOffset+step+getRelIndex(inShape,1,1)];
+          double A32=in[inOffset+step+getRelIndex(inShape,2,1)];
+          double A13=in[inOffset+step+getRelIndex(inShape,0,2)];
+          double A23=in[inOffset+step+getRelIndex(inShape,1,2)];
+          double A33=in[inOffset+step+getRelIndex(inShape,2,2)];
+          double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
+	  if (D!=0)
+	  {
+		D=1/D;
+          	out[outOffset+step+getRelIndex(inShape,0,0)]=(A22*A33-A23*A32)*D;
+          	out[outOffset+step+getRelIndex(inShape,1,0)]=(A31*A23-A21*A33)*D;
+          	out[outOffset+step+getRelIndex(inShape,2,0)]=(A21*A32-A31*A22)*D;
+          	out[outOffset+step+getRelIndex(inShape,0,1)]=(A13*A32-A12*A33)*D;
+          	out[outOffset+step+getRelIndex(inShape,1,1)]=(A11*A33-A31*A13)*D;
+          	out[outOffset+step+getRelIndex(inShape,2,1)]=(A12*A31-A11*A32)*D;
+          	out[outOffset+step+getRelIndex(inShape,0,2)]=(A12*A23-A13*A22)*D;
+          	out[outOffset+step+getRelIndex(inShape,1,2)]=(A13*A21-A11*A23)*D;
+          	out[outOffset+step+getRelIndex(inShape,2,2)]=(A11*A22-A12*A21)*D;
+          }
+	  else
+	  {
+		return NOINVERSE;
+	  }
+	  step+=size;
+	}
+    }
+    else	// inShape[0] >3  (or negative but that can hopefully never happen)
+    {
+#ifndef ESYS_HAVE_LAPACK
+	return NEEDLAPACK;
+#else
+	int step=0;
+	
+	
+	for (int i=0;i<count;++i)
+	{
+		// need to make a copy since blas overwrites its input
+		for (int j=0;j<size;++j)
+		{
+		    out[outOffset+step+j]=in[inOffset+step+j];
+		}
+		double* arr=&(out[outOffset+step]);
+		int res=helper.invert(arr);
+		if (res!=0)
+		{
+		    return res;
+		}
+		step+=size;
+	}
+#endif
+    }
+    return SUCCESS;
+}
+
+
+// --------------------------------------------------------
+
+template <>
+void
+binaryOpVectorTagged(DataTypes::RealVectorType& res,				// where result is to be stored
+	  const typename DataTypes::RealVectorType::size_type samplesToProcess,	// number of samples to be updated in the result
+	  const typename DataTypes::RealVectorType::size_type DPPSample,	// number of datapoints per sample
+	  const typename DataTypes::RealVectorType::size_type DPSize,		// datapoint size
+		
+	  const DataTypes::RealVectorType& left, 				// LHS of calculation
+	  const bool leftscalar,
+	  const DataTypes::RealVectorType& right, 				// RHS of the calculation
+	  const bool rightscalar,		
+	  const bool lefttagged,			// true if left object is the tagged one
+	  const DataTagged& tagsource,			// where to get tag offsets from
+	  escript::ES_optype operation)		// operation to perform	  
+{
+  typename DataTypes::RealVectorType::size_type lstep=leftscalar?1:DPSize;
+  typename DataTypes::RealVectorType::size_type rstep=rightscalar?1:DPSize;
+  typename DataTypes::RealVectorType::size_type limit=samplesToProcess*DPPSample;
+  switch (operation)
+  {
+    case ADD:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]+right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;
+    case POW:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=pow(left[leftbase+j*(!leftscalar)],right[rightbase+j*(!rightscalar)]);
+	  }
+	
+      }
+      break;      
+    case SUB:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]-right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case MUL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]*right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case DIV:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]/right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case LESS:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]<right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case GREATER:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]>right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case GREATER_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]>=right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    case LESS_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<limit;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);	// only one of these
+	  typename DataTypes::RealVectorType::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));	// will apply
+	  
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<DPSize;++j)
+	  {
+	      res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]<=right[rightbase+j*(!rightscalar)];
+	  }
+	
+      }
+      break;      
+    default:
+      throw DataException("Unsupported binary operation");    
+  }  
+}
+
+template <>
+void
+binaryOpVectorRightScalar(DataTypes::RealVectorType& res,				// where result is to be stored
+	  typename DataTypes::RealVectorType::size_type resOffset,		// offset in the result vector to start storing results
+	  const typename DataTypes::RealVectorType::size_type samplesToProcess,	// number of samples to be updated in the result
+	  const typename DataTypes::RealVectorType::size_type sampleSize,		// number of values in each sample
+	  const DataTypes::RealVectorType& left, 				// LHS of calculation
+	  typename DataTypes::RealVectorType::size_type leftOffset,		// where to start reading LHS values
+	  const DataTypes::real_t* right, 			// RHS of the calculation
+	  const bool rightreset,			// true if RHS is providing a single sample of 1 value only
+	  escript::ES_optype operation,		// operation to perform
+	  bool singleleftsample)			// set to false for normal operation
+{
+  size_t substep=(rightreset?0:1);  
+  switch (operation)
+  {
+    case ADD:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]+*rpos;
+	  }
+      }
+      break;
+    case POW:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=pow(left[leftbase+j],*rpos);
+	  }
+      }
+      break;      
+    case SUB:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]-*rpos;
+	  }
+      }
+      break;      
+    case MUL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j] * *rpos;
+	  }
+      }
+      break;      
+    case DIV:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]/ *rpos;
+	  }
+      }
+      break;      
+    case LESS:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]<*rpos;
+	  }
+      }
+      break;      
+    case GREATER:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]>*rpos;
+	  }
+      }
+      break;      
+    case GREATER_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]>=*rpos;
+	  }
+      }
+      break;      
+    case LESS_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+	  const DataTypes::real_t* rpos=right+(rightreset?0:i*substep);	
+	  
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]<=*rpos;
+	  }
+      }
+      break;      
+    default:
+      throw DataException("Unsupported binary operation");    
+  }  
+}
+
+
+template <>
+void
+binaryOpVectorLeftScalar(DataTypes::RealVectorType& res,				// where result is to be stored
+	  typename DataTypes::RealVectorType::size_type resOffset,		// offset in the result vector to start storing results
+	  const typename DataTypes::RealVectorType::size_type samplesToProcess,	// number of samples to be updated in the result
+	  const typename DataTypes::RealVectorType::size_type sampleSize,		// number of values in each sample
+	  const DataTypes::real_t* left, 				// LHS of calculation
+          const bool leftreset,				// true if LHS is providing a single sample of 1 value only
+	  const DataTypes::RealVectorType& right, 				// RHS of the calculation
+	  typename DataTypes::RealVectorType::size_type rightOffset,		// where to start reading RHS values
+	  escript::ES_optype operation,		// operation to perform
+	  bool singlerightsample)			// right consists of a single sample
+{
+  size_t substep=(leftreset?0:1);
+  switch (operation)
+  {
+    case ADD:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos+right[rightbase+j];
+	  }	
+      }
+      break;
+    case POW:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=pow(*lpos,right[rightbase+j]);
+	  }	
+      }
+      break;      
+    case SUB:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos-right[rightbase+j];
+	  }	
+      }
+      break;      
+    case MUL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos*right[rightbase+j];
+	  }	
+      }
+      break;      
+    case DIV:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos/right[rightbase+j];
+	  }	
+      }
+      break;      
+    case LESS:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos<right[rightbase+j];
+	  }	
+      }
+      break;      
+    case GREATER:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos>right[rightbase+j];
+	  }	
+      }
+      break;      
+    case GREATER_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos>=right[rightbase+j];
+	  }	
+      }
+      break;      
+    case LESS_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+	  const DataTypes::real_t* lpos=left+(leftreset?0:i*substep);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=*lpos<=right[rightbase+j];
+	  }	
+      }
+      break;      
+    default:
+      throw DataException("Unsupported binary operation");    
+  }  
+}
+
+template <>
+void
+binaryOpVector(DataTypes::RealVectorType& res,				// where result is to be stored
+	  typename DataTypes::RealVectorType::size_type resOffset,		// offset in the result vector to start storing results
+	  const typename DataTypes::RealVectorType::size_type samplesToProcess,	// number of samples to be updated in the result
+	  const typename DataTypes::RealVectorType::size_type sampleSize,		// number of values in each sample
+	  const DataTypes::RealVectorType& left, 				// LHS of calculation
+	  typename DataTypes::RealVectorType::size_type leftOffset,		// where to start reading LHS values
+	  const bool leftreset,				// Is LHS only supplying a single sample instead of a bunch of them
+	  const DataTypes::RealVectorType& right, 				// RHS of the calculation
+	  typename DataTypes::RealVectorType::size_type rightOffset,		// where to start reading RHS values
+	  const bool rightreset,			// Is RHS only supplying a single sample instead of a bunch of them
+	  escript::ES_optype operation)		// operation to perform
+{
+  switch (operation)
+  {
+    case ADD:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]+right[rightbase+j];
+	  }
+	
+      }
+      break;
+    case POW:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=pow(left[leftbase+j],right[rightbase+j]);
+	  }
+	
+      }
+      break;      
+    case SUB:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]-right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case MUL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]*right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case DIV:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]/right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case LESS:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]<right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case GREATER:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]>right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case GREATER_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]>=right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    case LESS_EQUAL:
+      #pragma omp parallel for
+      for (typename DataTypes::RealVectorType::size_type i=0;i<samplesToProcess;++i)
+      {
+	  typename DataTypes::RealVectorType::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+	  typename DataTypes::RealVectorType::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+	  for (typename DataTypes::RealVectorType::size_type j=0;j<sampleSize;++j)
+	  {
+	      res[i*sampleSize+resOffset+j]=left[leftbase+j]<=right[rightbase+j];
+	  }
+	
+      }
+      break;      
+    default:
+      throw DataException("Unsupported binary operation");    
+  }  
+}
+
+  /**
+     \brief
+     computes an hermitian matrix from your square matrix A: (A + adjoint(A)) / 2
+
+     \param in - vector containing the matrix A
+     \param inShape - shape of the matrix A
+     \param inOffset - the beginning of A within the vector in
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing ev in vector ev
+  */
+  void
+   hermitian(const DataTypes::CplxVectorType& in, 
+	    const DataTypes::ShapeType& inShape,
+            DataTypes::CplxVectorType::size_type inOffset,
+            DataTypes::CplxVectorType& ev, 
+	    const DataTypes::ShapeType& evShape,
+            DataTypes::CplxVectorType::size_type evOffset)
+  {
+   if (DataTypes::getRank(inShape) == 2) {
+     int i0, i1;
+     int s0=inShape[0];
+     int s1=inShape[1];
+     for (i0=0; i0<s0; i0++) {
+       for (i1=0; i1<s1; i1++) {
+         ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] + std::conj(in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)])) / 2.0;
+       }
+     }
+    }
+    else if (DataTypes::getRank(inShape) == 4) {
+      int i0, i1, i2, i3;
+      int s0=inShape[0];
+      int s1=inShape[1];
+      int s2=inShape[2];
+      int s3=inShape[3];
+      for (i0=0; i0<s0; i0++) {
+        for (i1=0; i1<s1; i1++) {
+          for (i2=0; i2<s2; i2++) {
+            for (i3=0; i3<s3; i3++) {
+              ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] + std::conj(in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)])) / 2.0;
+            }
+          }
+        }
+      }
+    }
+   }
+
+  /**
+     \brief
+     computes a antihermitian matrix from your square matrix A: (A - adjoint(A)) / 2
+
+     \param in - vector containing the matrix A
+     \param inShape - shape of the matrix A
+     \param inOffset - the beginning of A within the vector in
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing ev in vector ev
+  */
+   void
+   antihermitian(const DataTypes::CplxVectorType& in, 
+ 	    const DataTypes::ShapeType& inShape,
+             typename DataTypes::CplxVectorType::size_type inOffset,
+             DataTypes::CplxVectorType& ev, 
+ 	    const DataTypes::ShapeType& evShape,
+             typename DataTypes::CplxVectorType::size_type evOffset)  
+   {
+    if (DataTypes::getRank(inShape) == 2) {
+      int i0, i1;
+      int s0=inShape[0];
+      int s1=inShape[1];
+      for (i0=0; i0<s0; i0++) {
+        for (i1=0; i1<s1; i1++) {
+          ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] - std::conj(in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)])) / 2.0;
+        }
+      }
+    }
+   else if (DataTypes::getRank(inShape) == 4) {
+     int i0, i1, i2, i3;
+     int s0=inShape[0];
+     int s1=inShape[1];
+     int s2=inShape[2];
+     int s3=inShape[3];
+     for (i0=0; i0<s0; i0++) {
+       for (i1=0; i1<s1; i1++) {
+         for (i2=0; i2<s2; i2++) {
+           for (i3=0; i3<s3; i3++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] - std::conj(in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)])) / 2.0;
+           }
+         }
+       }
+     }
+   }
+  }
+
+
+}    // end namespace
+
diff --git a/escriptcore/src/DataVectorOps.h b/escriptcore/src/DataVectorOps.h
new file mode 100644
index 0000000..15de86e
--- /dev/null
+++ b/escriptcore/src/DataVectorOps.h
@@ -0,0 +1,1452 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_DATAMATHS_H__
+#define __ESCRIPT_DATAMATHS_H__
+
+#include "DataAbstract.h"
+#include "DataException.h"
+#include "ArrayOps.h"
+#include "LapackInverseHelper.h"
+#include "DataTagged.h"
+#include <complex>
+/**
+\file DataVectorOps.h
+\brief Describes binary operations performed on DataVector.
+
+
+For operations on DataReady see BinaryDataReadyOp.h.
+For operations on double* see ArrayOps.h.
+*/
+
+
+namespace escript
+{
+
+/**
+In order to properly identify the datapoints, in most cases, the vector, shape and offset of the point must all be supplied.
+Note that vector in this context refers to a data vector storing datapoints not a mathematical vector. (However, datapoints within the data vector could represent scalars, vectors, matricies, ...).
+*/
+
+
+ /**
+     \brief
+     Perform a matrix multiply of the given views.
+
+     NB: Only multiplies together the two given datapoints,
+     would need to call this over all data-points to multiply the entire
+     Data objects involved.
+
+     \param left,right - vectors containing the datapoints
+     \param leftShape,rightShape - shapes of datapoints in the vectors
+     \param leftOffset,rightOffset - beginnings of datapoints in the vectors
+     \param result - Vector to store the resulting datapoint in
+     \param resultShape - expected shape of the resulting datapoint
+  */
+  ESCRIPT_DLL_API
+  void
+  matMult(const DataTypes::RealVectorType& left,
+          const DataTypes::ShapeType& leftShape,
+          DataTypes::RealVectorType::size_type leftOffset,
+          const DataTypes::RealVectorType& right,
+          const DataTypes::ShapeType& rightShape,
+          DataTypes::RealVectorType::size_type rightOffset,
+          DataTypes::RealVectorType& result,
+          const DataTypes::ShapeType& resultShape);
+// Hmmmm why is there no offset for the result??
+
+
+
+
+  /**
+     \brief
+     Determine the shape of the result array for a matrix multiplication
+     of the given views.
+
+     \param left,right - shapes of the left and right matricies
+     \return the shape of the matrix which would result from multiplying left and right
+  */
+  ESCRIPT_DLL_API
+  DataTypes::ShapeType
+  determineResultShape(const DataTypes::ShapeType& left,
+                       const DataTypes::ShapeType& right);
+
+
+   /**
+      \brief
+      computes a symmetric matrix from your square matrix A: (A + transpose(A)) / 2
+
+     \param in - vector containing the matrix A
+     \param inShape - shape of the matrix A
+     \param inOffset - the beginning of A within the vector in
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing ev in vector ev
+  */
+  template<typename VEC>
+  inline
+  void
+  symmetric(const VEC& in,
+            const DataTypes::ShapeType& inShape,
+            typename VEC::size_type inOffset,
+            VEC& ev,
+            const DataTypes::ShapeType& evShape,
+            typename VEC::size_type evOffset)
+  {
+   if (DataTypes::getRank(inShape) == 2) {
+     int i0, i1;
+     int s0=inShape[0];
+     int s1=inShape[1];
+     for (i0=0; i0<s0; i0++) {
+       for (i1=0; i1<s1; i1++) {
+         ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] + in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)]) / 2.0;
+       }
+     }
+    }
+    else if (DataTypes::getRank(inShape) == 4) {
+      int i0, i1, i2, i3;
+      int s0=inShape[0];
+      int s1=inShape[1];
+      int s2=inShape[2];
+      int s3=inShape[3];
+      for (i0=0; i0<s0; i0++) {
+        for (i1=0; i1<s1; i1++) {
+          for (i2=0; i2<s2; i2++) {
+            for (i3=0; i3<s3; i3++) {
+              ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] + in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)]) / 2.0;
+            }
+          }
+        }
+      }
+    }
+   }
+
+  /**
+      \brief
+      computes a antisymmetric matrix from your square matrix A: (A - transpose(A)) / 2
+
+      \param in - vector containing the matrix A
+      \param inShape - shape of the matrix A
+      \param inOffset - the beginning of A within the vector in
+      \param ev - vector to store the output matrix
+      \param evShape - expected shape of the output matrix
+      \param evOffset - starting location for storing ev in vector ev
+   */
+   template<typename VEC>
+   inline
+   void
+   antisymmetric(const VEC& in,
+            const DataTypes::ShapeType& inShape,
+             typename VEC::size_type inOffset,
+             VEC& ev,
+            const DataTypes::ShapeType& evShape,
+             typename VEC::size_type evOffset)
+   {
+    if (DataTypes::getRank(inShape) == 2) {
+      int i0, i1;
+      int s0=inShape[0];
+      int s1=inShape[1];
+      for (i0=0; i0<s0; i0++) {
+        for (i1=0; i1<s1; i1++) {
+          ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)] - in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)]) / 2.0;
+        }
+      }
+    }
+   else if (DataTypes::getRank(inShape) == 4) {
+     int i0, i1, i2, i3;
+     int s0=inShape[0];
+     int s1=inShape[1];
+     int s2=inShape[2];
+     int s3=inShape[3];
+     for (i0=0; i0<s0; i0++) {
+       for (i1=0; i1<s1; i1++) {
+         for (i2=0; i2<s2; i2++) {
+           for (i3=0; i3<s3; i3++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = (in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)] - in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)]) / 2.0;
+           }
+         }
+       }
+     }
+   }
+  }
+
+
+
+  /**
+     \brief
+     computes an hermitian matrix from your square matrix A: (A + adjoint(A)) / 2
+
+     \param in - vector containing the matrix A
+     \param inShape - shape of the matrix A
+     \param inOffset - the beginning of A within the vector in
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing ev in vector ev
+  */
+  void
+   hermitian(const DataTypes::CplxVectorType& in,
+            const DataTypes::ShapeType& inShape,
+            DataTypes::CplxVectorType::size_type inOffset,
+            DataTypes::CplxVectorType& ev,
+            const DataTypes::ShapeType& evShape,
+            DataTypes::CplxVectorType::size_type evOffset);
+
+  /**
+     \brief
+     computes a antihermitian matrix from your square matrix A: (A - adjoint(A)) / 2
+
+     \param in - vector containing the matrix A
+     \param inShape - shape of the matrix A
+     \param inOffset - the beginning of A within the vector in
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing ev in vector ev
+  */
+   void
+   antihermitian(const DataTypes::CplxVectorType& in,
+            const DataTypes::ShapeType& inShape,
+             typename DataTypes::CplxVectorType::size_type inOffset,
+             DataTypes::CplxVectorType& ev,
+            const DataTypes::ShapeType& evShape,
+             typename DataTypes::CplxVectorType::size_type evOffset);
+
+  /**
+     \brief
+     computes the trace of a matrix
+
+     \param in - vector containing the input matrix
+     \param inShape - shape of the input matrix
+     \param inOffset - the beginning of the input matrix within the vector "in"
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing the output matrix in vector ev
+     \param axis_offset
+  */
+  template <class VEC>
+  inline
+  void
+  trace(const VEC& in,
+            const DataTypes::ShapeType& inShape,
+            typename VEC::size_type inOffset,
+            VEC& ev,
+            const DataTypes::ShapeType& evShape,
+            typename VEC::size_type evOffset,
+            int axis_offset)
+  {
+   for (int j=0;j<DataTypes::noValues(evShape);++j)
+   {
+      ev[evOffset+j]=0;
+   }
+   if (DataTypes::getRank(inShape) == 2) {
+     int s0=inShape[0]; // Python wrapper limits to square matrix
+     int i;
+     for (i=0; i<s0; i++) {
+       ev[evOffset/*+DataTypes::getRelIndex(evShape)*/] += in[inOffset+DataTypes::getRelIndex(inShape,i,i)];
+     }
+   }
+   else if (DataTypes::getRank(inShape) == 3) {
+     if (axis_offset==0) {
+       int s0=inShape[0];
+       int s2=inShape[2];
+       int i0, i2;
+       for (i0=0; i0<s0; i0++) {
+         for (i2=0; i2<s2; i2++) {
+           ev[evOffset+DataTypes::getRelIndex(evShape,i2)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i0,i2)];
+         }
+       }
+     }
+     else if (axis_offset==1) {
+       int s0=inShape[0];
+       int s1=inShape[1];
+       int i0, i1;
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           ev[evOffset+DataTypes::getRelIndex(evShape,i0)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i1)];
+         }
+       }
+     }
+   }
+   else if (DataTypes::getRank(inShape) == 4) {
+     if (axis_offset==0) {
+       int s0=inShape[0];
+       int s2=inShape[2];
+       int s3=inShape[3];
+       int i0, i2, i3;
+       for (i0=0; i0<s0; i0++) {
+         for (i2=0; i2<s2; i2++) {
+           for (i3=0; i3<s3; i3++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i2,i3)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i0,i2,i3)];
+           }
+         }
+       }
+     }
+     else if (axis_offset==1) {
+       int s0=inShape[0];
+       int s1=inShape[1];
+       int s3=inShape[3];
+       int i0, i1, i3;
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i3=0; i3<s3; i3++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i3)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i1,i3)];
+           }
+         }
+       }
+     }
+     else if (axis_offset==2) {
+       int s0=inShape[0];
+       int s1=inShape[1];
+       int s2=inShape[2];
+       int i0, i1, i2;
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] += in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i2)];
+           }
+         }
+       }
+     }
+   }
+  }
+
+
+  /**
+     \brief
+     Transpose each data point of this Data object around the given axis.
+
+     \param in - vector containing the input matrix
+     \param inShape - shape of the input matrix
+     \param inOffset - the beginning of the input matrix within the vector "in"
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing the output matrix in vector ev
+     \param axis_offset
+  */
+  ESCRIPT_DLL_API
+  template <class VEC>
+  inline
+  void
+  transpose(const VEC& in,
+            const DataTypes::ShapeType& inShape,
+            typename VEC::size_type inOffset,
+            VEC& ev,
+            const DataTypes::ShapeType& evShape,
+            typename VEC::size_type evOffset,
+            int axis_offset)
+  {
+   int inRank=DataTypes::getRank(inShape);
+   if ( inRank== 4) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int s2=evShape[2];
+     int s3=evShape[3];
+     int i0, i1, i2, i3;
+     if (axis_offset==1) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             for (i3=0; i3<s3; i3++) {
+               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i3,i0,i1,i2)];
+             }
+           }
+         }
+       }
+     }
+     else if (axis_offset==2) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             for (i3=0; i3<s3; i3++) {
+               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i3,i0,i1)];
+             }
+           }
+         }
+       }
+     }
+     else if (axis_offset==3) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             for (i3=0; i3<s3; i3++) {
+               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i2,i3,i0)];
+             }
+           }
+         }
+       }
+     }
+     else {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             for (i3=0; i3<s3; i3++) {
+               ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2,i3)];
+             }
+           }
+         }
+       }
+     }
+   }
+   else if (inRank == 3) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int s2=evShape[2];
+     int i0, i1, i2;
+     if (axis_offset==1) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i0,i1)];
+           }
+         }
+       }
+     }
+     else if (axis_offset==2) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i2,i0)];
+           }
+         }
+       }
+     }
+     else {
+       // Copy the matrix unchanged
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           for (i2=0; i2<s2; i2++) {
+             ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i2)];
+           }
+         }
+       }
+     }
+   }
+   else if (inRank == 2) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int i0, i1;
+     if (axis_offset==1) {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)];
+         }
+       }
+     }
+     else {
+       for (i0=0; i0<s0; i0++) {
+         for (i1=0; i1<s1; i1++) {
+           ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1)];
+         }
+       }
+     }
+   }
+   else if (inRank == 1) {
+     int s0=evShape[0];
+     int i0;
+     for (i0=0; i0<s0; i0++) {
+       ev[evOffset+DataTypes::getRelIndex(evShape,i0)] = in[inOffset+DataTypes::getRelIndex(inShape,i0)];
+     }
+   }
+   else if (inRank == 0) {
+     ev[evOffset/*+DataTypes::getRelIndex(evShape,)*/] = in[inOffset/*+DataTypes::getRelIndex(inShape,)*/];
+   }
+   else {
+      throw DataException("Error - DataArrayView::transpose can only be calculated for rank 0, 1, 2, 3 or 4 objects.");
+   }
+  }
+
+  /**
+     \brief
+     swaps the components axis0 and axis1.
+
+     \param in - vector containing the input matrix
+     \param inShape - shape of the input matrix
+     \param inOffset - the beginning of the input matrix within the vector "in"
+     \param ev - vector to store the output matrix
+     \param evShape - expected shape of the output matrix
+     \param evOffset - starting location for storing the output matrix in vector ev
+     \param axis0 - axis index
+     \param axis1 - axis index
+  */
+  ESCRIPT_DLL_API
+  template <class VEC>
+  inline
+  void
+  swapaxes(const VEC& in,
+           const DataTypes::ShapeType& inShape,
+           typename VEC::size_type inOffset,
+           VEC& ev,
+           const DataTypes::ShapeType& evShape,
+           typename VEC::size_type evOffset,
+           int axis0,
+           int axis1)
+  {
+     int inRank=DataTypes::getRank(inShape);
+     if (inRank == 4) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int s2=evShape[2];
+     int s3=evShape[3];
+     int i0, i1, i2, i3;
+     if (axis0==0) {
+        if (axis1==1) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0,i2,i3)];
+                  }
+                }
+              }
+            }
+        } else if (axis1==2) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i1,i0,i3)];
+                  }
+                }
+              }
+            }
+
+        } else if (axis1==3) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i3,i1,i2,i0)];
+                  }
+                }
+              }
+            }
+        }
+     } else if (axis0==1) {
+        if (axis1==2) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i2,i1,i3)];
+                  }
+                }
+              }
+            }
+        } else if (axis1==3) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i3,i2,i1)];
+                  }
+                }
+              }
+            }
+        }
+     } else if (axis0==2) {
+        if (axis1==3) {
+            for (i0=0; i0<s0; i0++) {
+              for (i1=0; i1<s1; i1++) {
+                for (i2=0; i2<s2; i2++) {
+                  for (i3=0; i3<s3; i3++) {
+                    ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2,i3)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i1,i3,i2)];
+                  }
+                }
+              }
+            }
+        }
+     }
+
+   } else if ( inRank == 3) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int s2=evShape[2];
+     int i0, i1, i2;
+     if (axis0==0) {
+        if (axis1==1) {
+           for (i0=0; i0<s0; i0++) {
+             for (i1=0; i1<s1; i1++) {
+               for (i2=0; i2<s2; i2++) {
+                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0,i2)];
+               }
+             }
+           }
+        } else if (axis1==2) {
+           for (i0=0; i0<s0; i0++) {
+             for (i1=0; i1<s1; i1++) {
+               for (i2=0; i2<s2; i2++) {
+                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i2,i1,i0)];
+               }
+             }
+           }
+       }
+     } else if (axis0==1) {
+        if (axis1==2) {
+           for (i0=0; i0<s0; i0++) {
+             for (i1=0; i1<s1; i1++) {
+               for (i2=0; i2<s2; i2++) {
+                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1,i2)] = in[inOffset+DataTypes::getRelIndex(inShape,i0,i2,i1)];
+               }
+             }
+           }
+        }
+     }
+   } else if ( inRank == 2) {
+     int s0=evShape[0];
+     int s1=evShape[1];
+     int i0, i1;
+     if (axis0==0) {
+        if (axis1==1) {
+           for (i0=0; i0<s0; i0++) {
+             for (i1=0; i1<s1; i1++) {
+                 ev[evOffset+DataTypes::getRelIndex(evShape,i0,i1)] = in[inOffset+DataTypes::getRelIndex(inShape,i1,i0)];
+             }
+           }
+        }
+    }
+  } else {
+      throw DataException("Error - DataArrayView::swapaxes can only be calculated for rank 2, 3 or 4 objects.");
+  }
+ }
+
+  /**
+     \brief
+     solves a local eigenvalue problem
+
+     \param in - vector containing the input matrix
+     \param inShape - shape of the input matrix
+     \param inOffset - the beginning of the input matrix within the vector "in"
+     \param ev - vector to store the eigenvalues
+     \param evShape - expected shape of the eigenvalues
+     \param evOffset - starting location for storing the eigenvalues in vector ev
+  */
+  ESCRIPT_DLL_API
+  inline
+  void
+  eigenvalues(const DataTypes::RealVectorType& in,
+              const DataTypes::ShapeType& inShape,
+              typename DataTypes::RealVectorType::size_type inOffset,
+              DataTypes::RealVectorType& ev,
+              const DataTypes::ShapeType& evShape,
+              typename DataTypes::RealVectorType::size_type evOffset)
+  {
+   typename DataTypes::RealVectorType::ElementType in00,in10,in20,in01,in11,in21,in02,in12,in22;
+   typename DataTypes::RealVectorType::ElementType ev0,ev1,ev2;
+   int s=inShape[0];
+   if (s==1) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      eigenvalues1(in00,&ev0);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+
+   } else  if (s==2) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+      eigenvalues2(in00,(in01+in10)/2.,in11,&ev0,&ev1);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+
+   } else  if (s==3) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+      in20=in[inOffset+DataTypes::getRelIndex(inShape,2,0)];
+      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+      in21=in[inOffset+DataTypes::getRelIndex(inShape,2,1)];
+      in02=in[inOffset+DataTypes::getRelIndex(inShape,0,2)];
+      in12=in[inOffset+DataTypes::getRelIndex(inShape,1,2)];
+      in22=in[inOffset+DataTypes::getRelIndex(inShape,2,2)];
+      eigenvalues3(in00,(in01+in10)/2.,(in02+in20)/2.,in11,(in21+in12)/2.,in22,
+                 &ev0,&ev1,&ev2);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+      ev[evOffset+DataTypes::getRelIndex(evShape,2)]=ev2;
+
+   }
+  }
+
+  inline
+  void
+  eigenvalues(const DataTypes::CplxVectorType& in,
+              const DataTypes::ShapeType& inShape,
+              typename DataTypes::CplxVectorType::size_type inOffset,
+              DataTypes::CplxVectorType& ev,
+              const DataTypes::ShapeType& evShape,
+              typename DataTypes::CplxVectorType::size_type evOffset)
+  {
+   typename DataTypes::CplxVectorType::ElementType in00,in10,in20,in01,in11,in21,in02,in12,in22;
+   typename DataTypes::CplxVectorType::ElementType ev0,ev1,ev2;
+   int s=inShape[0];
+   if (s==1) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      eigenvalues1(in00,&ev0);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+
+   } else  if (s==2) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+      eigenvalues2(in00,(in01+in10)/2.,in11,&ev0,&ev1);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+
+   } else  if (s==3) {
+     // this doesn't work yet
+//       in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+//       in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+//       in20=in[inOffset+DataTypes::getRelIndex(inShape,2,0)];
+//       in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+//       in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+//       in21=in[inOffset+DataTypes::getRelIndex(inShape,2,1)];
+//       in02=in[inOffset+DataTypes::getRelIndex(inShape,0,2)];
+//       in12=in[inOffset+DataTypes::getRelIndex(inShape,1,2)];
+//       in22=in[inOffset+DataTypes::getRelIndex(inShape,2,2)];
+//       eigenvalues3(in00,(in01+in10)/2.,(in02+in20)/2.,in11,(in21+in12)/2.,in22,
+//                  &ev0,&ev1,&ev2);
+//       ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+//       ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+//       ev[evOffset+DataTypes::getRelIndex(evShape,2)]=ev2;
+
+   }
+  }
+
+
+  /**
+     \brief
+     solves a local eigenvalue problem
+
+     \param in - vector containing the input matrix
+     \param inShape - shape of the input matrix
+     \param inOffset - the beginning of the input matrix within the vector "in"
+     \param ev - vector to store the eigenvalues
+     \param evShape - expected shape of the eigenvalues
+     \param evOffset - starting location for storing the eigenvalues in ev
+     \param V - vector to store the eigenvectors
+     \param VShape - expected shape of the eigenvectors
+     \param VOffset - starting location for storing the eigenvectors in V
+     \param tol - Input - eigenvalues with relative difference tol are treated as equal
+  */
+  ESCRIPT_DLL_API
+  inline
+  void
+  eigenvalues_and_eigenvectors(const DataTypes::RealVectorType& in, const DataTypes::ShapeType& inShape,
+                               DataTypes::RealVectorType::size_type inOffset,
+                               DataTypes::RealVectorType& ev, const DataTypes::ShapeType& evShape,
+                               DataTypes::RealVectorType::size_type evOffset,
+                               DataTypes::RealVectorType& V, const DataTypes::ShapeType& VShape,
+                               DataTypes::RealVectorType::size_type VOffset,
+                               const double tol=1.e-13)
+  {
+   double in00,in10,in20,in01,in11,in21,in02,in12,in22;
+   double V00,V10,V20,V01,V11,V21,V02,V12,V22;
+   double ev0,ev1,ev2;
+   int s=inShape[0];
+   if (s==1) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      eigenvalues_and_eigenvectors1(in00,&ev0,&V00,tol);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
+   } else  if (s==2) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+      eigenvalues_and_eigenvectors2(in00,(in01+in10)/2.,in11,
+                   &ev0,&ev1,&V00,&V10,&V01,&V11,tol);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
+      V[inOffset+DataTypes::getRelIndex(VShape,1,0)]=V10;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,1)]=V01;
+      V[inOffset+DataTypes::getRelIndex(VShape,1,1)]=V11;
+   } else  if (s==3) {
+      in00=in[inOffset+DataTypes::getRelIndex(inShape,0,0)];
+      in10=in[inOffset+DataTypes::getRelIndex(inShape,1,0)];
+      in20=in[inOffset+DataTypes::getRelIndex(inShape,2,0)];
+      in01=in[inOffset+DataTypes::getRelIndex(inShape,0,1)];
+      in11=in[inOffset+DataTypes::getRelIndex(inShape,1,1)];
+      in21=in[inOffset+DataTypes::getRelIndex(inShape,2,1)];
+      in02=in[inOffset+DataTypes::getRelIndex(inShape,0,2)];
+      in12=in[inOffset+DataTypes::getRelIndex(inShape,1,2)];
+      in22=in[inOffset+DataTypes::getRelIndex(inShape,2,2)];
+      eigenvalues_and_eigenvectors3(in00,(in01+in10)/2.,(in02+in20)/2.,in11,(in21+in12)/2.,in22,
+                 &ev0,&ev1,&ev2,
+                 &V00,&V10,&V20,&V01,&V11,&V21,&V02,&V12,&V22,tol);
+      ev[evOffset+DataTypes::getRelIndex(evShape,0)]=ev0;
+      ev[evOffset+DataTypes::getRelIndex(evShape,1)]=ev1;
+      ev[evOffset+DataTypes::getRelIndex(evShape,2)]=ev2;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,0)]=V00;
+      V[inOffset+DataTypes::getRelIndex(VShape,1,0)]=V10;
+      V[inOffset+DataTypes::getRelIndex(VShape,2,0)]=V20;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,1)]=V01;
+      V[inOffset+DataTypes::getRelIndex(VShape,1,1)]=V11;
+      V[inOffset+DataTypes::getRelIndex(VShape,2,1)]=V21;
+      V[inOffset+DataTypes::getRelIndex(VShape,0,2)]=V02;
+      V[inOffset+DataTypes::getRelIndex(VShape,1,2)]=V12;
+      V[inOffset+DataTypes::getRelIndex(VShape,2,2)]=V22;
+
+   }
+ }
+
+
+/**
+   Inline function definitions.
+*/
+
+template <class VEC>
+inline
+bool
+checkOffset(const VEC& data,
+            const DataTypes::ShapeType& shape,
+            typename VEC::size_type offset)
+{
+        return (data.size() >= (offset+DataTypes::noValues(shape)));
+}
+
+/**
+ * This assumes that all data involved have the same points per sample and same shape
+*/
+template <class ResVEC, class LVEC, class RSCALAR>
+void
+binaryOpVectorRightScalar(ResVEC& res,                          // where result is to be stored
+          typename ResVEC::size_type resOffset,         // offset in the result vector to start storing results
+          const typename ResVEC::size_type samplesToProcess,    // number of samples to be updated in the result
+          const typename ResVEC::size_type sampleSize,          // number of values in each sample
+          const LVEC& left,                             // LHS of calculation
+          typename LVEC::size_type leftOffset,          // where to start reading LHS values
+          const RSCALAR* right,                         // RHS of the calculation
+          const bool rightreset,                        // true if RHS is providing a single sample of 1 value only
+          escript::ES_optype operation,         // operation to perform
+          bool singleleftsample)                        // set to false for normal operation
+{
+    size_t substep=(rightreset?0:1);
+    switch (operation)
+    {
+        case ADD:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+              const RSCALAR* rpos=right+(rightreset?0:i*substep);
+
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]+*rpos;
+              }
+          }
+        }
+        break;
+        case POW:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+              const RSCALAR* rpos=right+(rightreset?0:i*substep);
+
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=pow(left[leftbase+j],*rpos);
+              }
+          }
+        }
+        break;
+        case SUB:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+              const RSCALAR* rpos=right+(rightreset?0:i*substep);
+
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]-*rpos;
+              }
+          }
+        }
+        break;
+        case MUL:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+              const RSCALAR* rpos=right+(rightreset?0:i*substep);
+
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j] * *rpos;
+              }
+          }
+        }
+        break;
+        case DIV:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(singleleftsample?0:i*sampleSize);
+              const RSCALAR* rpos=right+(rightreset?0:i*substep);
+
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]/ *rpos;
+              }
+          }
+        }
+        break;
+        default:
+            throw DataException("Unsupported binary operation");
+    }
+}
+
+template<>
+void
+binaryOpVectorRightScalar(DataTypes::RealVectorType& res,                               // where result is to be stored
+          typename DataTypes::RealVectorType::size_type resOffset,              // offset in the result vector to start storing results
+          const typename DataTypes::RealVectorType::size_type samplesToProcess, // number of samples to be updated in the result
+          const typename DataTypes::RealVectorType::size_type sampleSize,               // number of values in each sample
+          const DataTypes::RealVectorType& left,                                // LHS of calculation
+          typename DataTypes::RealVectorType::size_type leftOffset,             // where to start reading LHS values
+          const DataTypes::real_t* right,                       // RHS of the calculation
+          const bool rightreset,                        // true if RHS is providing a single sample of 1 value only
+          escript::ES_optype operation,         // operation to perform
+          bool singleleftsample);
+
+/**
+ * This assumes that all data involved have the same points per sample and same shape
+*/
+template <class ResVEC, class LSCALAR, class RVEC>
+void
+binaryOpVectorLeftScalar(ResVEC& res,                           // where result is to be stored
+          typename ResVEC::size_type resOffset,         // offset in the result vector to start storing results
+          const typename ResVEC::size_type samplesToProcess,    // number of samples to be updated in the result
+          const typename ResVEC::size_type sampleSize,          // number of values in each sample
+          const LSCALAR* left,                          // LHS of calculation
+          const bool leftreset,                         // true if LHS is providing a single sample of 1 value only
+          const RVEC& right,                            // RHS of the calculation
+          typename RVEC::size_type rightOffset,         // where to start reading RHS values
+          escript::ES_optype operation,         // operation to perform
+          bool singlerightsample)                       // right consists of a single sample
+{
+    size_t substep=(leftreset?0:1);
+    switch (operation)
+    {
+        case ADD:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename RVEC::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+              const LSCALAR* lpos=left+(leftreset?0:i*substep);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=*lpos+right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case POW:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename RVEC::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+              const LSCALAR* lpos=left+(leftreset?0:i*substep);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=pow(*lpos,right[rightbase+j]);
+              }
+          }
+        }
+        break;
+        case SUB:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename RVEC::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+              const LSCALAR* lpos=left+(leftreset?0:i*substep);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=*lpos-right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case MUL:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename RVEC::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+              const LSCALAR* lpos=left+(leftreset?0:i*substep);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=*lpos*right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case DIV:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename RVEC::size_type rightbase=rightOffset+(singlerightsample?0:i*sampleSize);
+              const LSCALAR* lpos=left+(leftreset?0:i*substep);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=*lpos/right[rightbase+j];
+              }
+          }
+        }
+        break;
+        default:
+            throw DataException("Unsupported binary operation");
+    }
+}
+
+template <>
+void
+binaryOpVectorLeftScalar(DataTypes::RealVectorType& res,                                // where result is to be stored
+          typename DataTypes::RealVectorType::size_type resOffset,              // offset in the result vector to start storing results
+          const typename DataTypes::RealVectorType::size_type samplesToProcess, // number of samples to be updated in the result
+          const typename DataTypes::RealVectorType::size_type sampleSize,               // number of values in each sample
+          const DataTypes::real_t* left,                                // LHS of calculation
+          const bool leftreset,                         // true if LHS is providing a single sample of 1 value only
+          const DataTypes::RealVectorType& right,                               // RHS of the calculation
+          typename DataTypes::RealVectorType::size_type rightOffset,            // where to start reading RHS values
+          escript::ES_optype operation,         // operation to perform
+          bool singlerightsample);                      // right consists of a single sample
+
+/**
+ * This assumes that all data involved have the same points per sample and same shape
+*/
+template <class ResVEC, class LVEC, class RVEC>
+void
+binaryOpVector(ResVEC& res,                             // where result is to be stored
+          typename ResVEC::size_type resOffset,         // offset in the result vector to start storing results
+          const typename ResVEC::size_type samplesToProcess,    // number of samples to be updated in the result
+          const typename ResVEC::size_type sampleSize,          // number of values in each sample
+          const LVEC& left,                             // LHS of calculation
+          typename LVEC::size_type leftOffset,          // where to start reading LHS values
+          const bool leftreset,                         // Is LHS only supplying a single sample instead of a bunch of them
+          const RVEC& right,                            // RHS of the calculation
+          typename RVEC::size_type rightOffset,         // where to start reading RHS values
+          const bool rightreset,                        // Is RHS only supplying a single sample instead of a bunch of them
+          escript::ES_optype operation)         // operation to perform
+{
+    switch (operation)
+    {
+        case ADD:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+              typename RVEC::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]+right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case POW:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+              typename RVEC::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=pow(left[leftbase+j],right[rightbase+j]);
+              }
+          }
+        }
+        break;
+        case SUB:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+              typename RVEC::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]-right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case MUL:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+              typename RVEC::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]*right[rightbase+j];
+              }
+          }
+        }
+        break;
+        case DIV:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<samplesToProcess;++i)
+          {
+              typename LVEC::size_type leftbase=leftOffset+(leftreset?0:i*sampleSize);
+              typename RVEC::size_type rightbase=rightOffset+(rightreset?0:i*sampleSize);
+              for (typename ResVEC::size_type j=0;j<sampleSize;++j)
+              {
+                  res[i*sampleSize+resOffset+j]=left[leftbase+j]/right[rightbase+j];
+              }
+          }
+        }
+        break;
+        default:
+            throw DataException("Unsupported binary operation");
+    }
+}
+
+template <>
+void
+binaryOpVector(DataTypes::RealVectorType& res,                          // where result is to be stored
+          typename DataTypes::RealVectorType::size_type resOffset,              // offset in the result vector to start storing results
+          const typename DataTypes::RealVectorType::size_type samplesToProcess, // number of samples to be updated in the result
+          const typename DataTypes::RealVectorType::size_type sampleSize,               // number of values in each sample
+          const DataTypes::RealVectorType& left,                                // LHS of calculation
+          typename DataTypes::RealVectorType::size_type leftOffset,             // where to start reading LHS values
+          const bool leftreset,                         // Is LHS only supplying a single sample instead of a bunch of them
+          const DataTypes::RealVectorType& right,                               // RHS of the calculation
+          typename DataTypes::RealVectorType::size_type rightOffset,            // where to start reading RHS values
+          const bool rightreset,                        // Is RHS only supplying a single sample instead of a bunch of them
+          escript::ES_optype operation);                // operation to perform
+
+#define OPVECLAZYBODY(X)     \
+    for (size_t j=0;j<onumsteps;++j)\
+    {\
+      for (size_t i=0;i<numsteps;++i,res+=resultStep) \
+      { \
+          for (size_t s=0; s<chunksize; ++s)\
+          {\
+              res[s] = X;\
+          }\
+/*        tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X);*/ \
+          lroffset+=leftstep; \
+          rroffset+=rightstep; \
+      }\
+      lroffset+=oleftstep;\
+      rroffset+=orightstep;\
+    }
+
+/**
+ * This assumes that all data involved have the same points per sample and same shape
+ * This version is to be called from within DataLazy.
+ * It does not have openmp around loops because it will be evaluating individual samples
+ * (Which will be done within an enclosing openmp region.
+*/
+template <class ResELT, class LELT, class RELT>
+void
+binaryOpVectorLazyHelper(ResELT* res,
+                         const LELT* left,
+                         const RELT* right,
+                         const size_t chunksize,
+                         const size_t onumsteps,
+                         const size_t numsteps,
+                         const size_t resultStep,
+                         const size_t leftstep,
+                         const size_t rightstep,
+                         const size_t oleftstep,
+                         const size_t orightstep,
+                         size_t lroffset,
+                         size_t rroffset,
+                         escript::ES_optype operation)          // operation to perform
+{
+    switch (operation)
+    {
+        case ADD:
+            OPVECLAZYBODY((left[lroffset+s]+right[rroffset+s]));
+            break;
+        case POW:
+            OPVECLAZYBODY(pow(left[lroffset+s],right[rroffset+s]))
+            break;
+        case SUB:
+            OPVECLAZYBODY(left[lroffset+s]-right[rroffset+s])
+            break;
+        case MUL:
+            OPVECLAZYBODY(left[lroffset+s]*right[rroffset+s])
+            break;
+        case DIV:
+            OPVECLAZYBODY(left[lroffset+s]/right[rroffset+s])
+            break;
+        case LESS:
+            OPVECLAZYBODY(left[lroffset+s]<right[rroffset+s])
+            break;
+        case GREATER:
+            OPVECLAZYBODY(left[lroffset+s]>right[rroffset+s])
+            break;
+        case GREATER_EQUAL:
+            OPVECLAZYBODY(left[lroffset+s]>=right[rroffset+s])
+            break;
+        case LESS_EQUAL:
+            OPVECLAZYBODY(left[lroffset+s]<=right[rroffset+s])
+            break;
+        default:
+            ESYS_ASSERT(false, "Invalid operation. This should never happen!");
+            // I can't throw here because this will be called inside a parallel section
+    }
+}
+
+/**
+ * This assumes that all data involved have the same points per sample and same shape
+*/
+/* trying to make a single version for all Tagged+Expanded interactions */
+template <class ResVEC, class LVEC, class RVEC>
+void
+binaryOpVectorTagged(ResVEC& res,                               // where result is to be stored
+          const typename ResVEC::size_type samplesToProcess,    // number of samples to be updated in the result
+          const typename ResVEC::size_type DPPSample, // number of datapoints per sample
+          const typename ResVEC::size_type DPSize,    // datapoint size
+          const LVEC& left,                           // LHS of calculation
+          bool leftscalar,
+          const RVEC& right,                          // RHS of the calculation
+          bool rightscalar,
+          bool lefttagged,                            // true if left object is the tagged one
+          const DataTagged& tagsource,                // where to get tag offsets from
+          escript::ES_optype operation)               // operation to perform
+{
+    typename ResVEC::size_type lstep=leftscalar?1:DPSize;
+    typename ResVEC::size_type rstep=rightscalar?1:DPSize;
+    typename ResVEC::size_type limit=samplesToProcess*DPPSample;
+    switch (operation)
+    {
+        case ADD:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<limit;++i)
+          {
+              typename LVEC::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);       // only one of these
+              typename RVEC::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));      // will apply
+
+              for (typename ResVEC::size_type j=0;j<DPSize;++j)
+              {
+                  res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]+right[rightbase+j*(!rightscalar)];
+              }
+          }
+        }
+        break;
+        case POW:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<limit;++i)
+          {
+              typename LVEC::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);       // only one of these
+              typename RVEC::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));      // will apply
+
+              for (typename ResVEC::size_type j=0;j<DPSize;++j)
+              {
+                  res[i*DPSize+j]=pow(left[leftbase+j*(!leftscalar)],right[rightbase+j*(!rightscalar)]);
+              }
+          }
+        }
+        break;
+        case SUB:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<limit;++i)
+          {
+              typename LVEC::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);       // only one of these
+              typename RVEC::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));      // will apply
+
+              for (typename ResVEC::size_type j=0;j<DPSize;++j)
+              {
+                  res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]-right[rightbase+j*(!rightscalar)];
+              }
+          }
+        }
+        break;
+        case MUL:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<limit;++i)
+          {
+              typename LVEC::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);       // only one of these
+              typename RVEC::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));      // will apply
+
+              for (typename ResVEC::size_type j=0;j<DPSize;++j)
+              {
+                  res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]*right[rightbase+j*(!rightscalar)];
+              }
+          }
+        }
+        break;
+        case DIV:
+        {
+#pragma omp parallel for
+          for (typename ResVEC::size_type i=0;i<limit;++i)
+          {
+              typename LVEC::size_type leftbase=(lefttagged?tagsource.getPointOffset(i/DPPSample,0):i*lstep);       // only one of these
+              typename RVEC::size_type rightbase=(lefttagged?i*rstep:tagsource.getPointOffset(i/DPPSample,0));      // will apply
+
+              for (typename ResVEC::size_type j=0;j<DPSize;++j)
+              {
+                  res[i*DPSize+j]=left[leftbase+j*(!leftscalar)]/right[rightbase+j*(!rightscalar)];
+              }
+          }
+        }
+        break;
+        default:
+            throw DataException("Unsupported binary operation");
+    }
+}
+
+template<>
+void
+binaryOpVectorTagged(DataTypes::RealVectorType& res,                            // where result is to be stored
+          const typename DataTypes::RealVectorType::size_type samplesToProcess, // number of samples to be updated in the result
+          const typename DataTypes::RealVectorType::size_type DPPSample,        // number of datapoints per sample
+          const typename DataTypes::RealVectorType::size_type DPSize,           // datapoint size
+          const DataTypes::RealVectorType& left,                                // LHS of calculation
+          const bool leftscalar,
+          const DataTypes::RealVectorType& right,                               // RHS of the calculation
+          const bool rightscalar,
+          const bool lefttagged,                        // true if left object is the tagged one
+          const DataTagged& tagsource,                  // where to get tag offsets from
+          escript::ES_optype operation);
+
+
+
+
+  /**
+     \brief
+     Perform the given data point reduction operation on the data point
+     specified by the given offset into the view. Reduces all elements of
+     the data point using the given operation, returning the result as a
+     scalar. Operation must be a pointer to a function.
+
+     Called by escript::algorithm.
+
+     \param left - vector containing the datapoint
+     \param shape - shape of datapoints in the vector
+     \param offset - beginning of datapoint in the vector
+     \param operation - Input -
+                  Operation to apply. Must be a pointer to a function.
+     \param initial_value
+  */
+template <class BinaryFunction>
+inline
+DataTypes::real_t
+reductionOpVector(const DataTypes::RealVectorType& left,
+                           const DataTypes::ShapeType& leftShape,
+                           DataTypes::RealVectorType::size_type offset,
+                           BinaryFunction operation,
+                           DataTypes::real_t initial_value)
+{
+  ESYS_ASSERT((left.size()>0)&&checkOffset(left,leftShape,offset),
+         "Couldn't perform reductionOp due to insufficient storage.");
+  DataTypes::real_t current_value=initial_value;
+  for (DataTypes::RealVectorType::size_type i=0;i<DataTypes::noValues(leftShape);i++) {
+    current_value=operation(current_value,left[offset+i]);
+  }
+  return current_value;
+}
+
+template <class BinaryFunction>
+inline
+DataTypes::real_t
+reductionOpVector(const DataTypes::CplxVectorType& left,
+                           const DataTypes::ShapeType& leftShape,
+                           DataTypes::CplxVectorType::size_type offset,
+                           BinaryFunction operation,
+                           DataTypes::real_t initial_value)
+{
+  ESYS_ASSERT((left.size()>0)&&checkOffset(left,leftShape,offset),
+         "Couldn't perform reductionOp due to insufficient storage.");
+  DataTypes::real_t current_value=initial_value;
+  for (DataTypes::RealVectorType::size_type i=0;i<DataTypes::noValues(leftShape);i++) {
+    current_value=operation(current_value,left[offset+i]);
+  }
+  return current_value;
+}
+
+
+/**
+     \brief
+     computes the inverses of square (up to 3x3) matricies
+
+     \param in - vector containing the input matricies
+     \param inShape - shape of the input matricies
+     \param inOffset - the beginning of the input matricies within the vector "in"
+     \param out - vector to store the inverses
+     \param outShape - expected shape of the inverses
+     \param outOffset - starting location for storing the inverses in out
+     \param count - number of matricies to invert
+     \param helper - associated working storage
+
+     \exception DataException if input and output are not the correct shape or if any of the matricies are not invertible.
+     \return 0 on success, on failure the return value should be passed to matrixInverseError(int err).
+*/
+int
+matrix_inverse(const DataTypes::RealVectorType& in,
+            const DataTypes::ShapeType& inShape,
+            DataTypes::RealVectorType::size_type inOffset,
+            DataTypes::RealVectorType& out,
+            const DataTypes::ShapeType& outShape,
+            DataTypes::RealVectorType::size_type outOffset,
+            int count,
+            LapackInverseHelper& helper);
+
+/**
+   \brief
+   throws an appropriate exception based on failure of matrix_inverse.
+
+   \param err - error code returned from matrix_inverse
+   \warning do not call in a parallel region since it throws.
+*/
+void
+matrixInverseError(int err);
+
+/**
+   \brief returns true if the vector contains NaN
+
+*/
+inline
+bool
+vectorHasNaN(const DataTypes::RealVectorType& in, DataTypes::RealVectorType::size_type inOffset, size_t count)
+{
+        for (size_t z=inOffset;z<inOffset+count;++z)
+        {
+            if (nancheck(in[z]))
+            {
+                return true;
+            }
+        }
+        return false;
+}
+
+}  // end namespace escript
+
+#endif // __ESCRIPT_DATAMATHS_H__
+
diff --git a/escriptcore/src/DataVector.cpp b/escriptcore/src/DataVectorTaipan.cpp
similarity index 73%
copy from escriptcore/src/DataVector.cpp
copy to escriptcore/src/DataVectorTaipan.cpp
index b9bbbb1..3118d7c 100644
--- a/escriptcore/src/DataVector.cpp
+++ b/escriptcore/src/DataVectorTaipan.cpp
@@ -14,25 +14,26 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "DataVector.h"
 
-#include "Taipan.h"
 #include "DataException.h"
-#include <boost/python/extract.hpp>
 #include "DataTypes.h"
+#include "Taipan.h"
 #include "WrappedArray.h"
 
+#include <boost/python/extract.hpp>
 #include <cassert>
 
 using namespace std;
 using namespace escript;
 using namespace boost::python;
+using namespace DataTypes;
 
-namespace escript {
+namespace escript
+{
+
+namespace DataTypes
+{
 
 Taipan arrayManager;
 
@@ -42,7 +43,7 @@ void releaseUnusedMemory()
 }
 
 
-DataVector::DataVector() :
+DataVectorTaipan::DataVectorTaipan() :
   m_size(0),
   m_dim(0),
   m_N(0),
@@ -50,7 +51,7 @@ DataVector::DataVector() :
 {
 }
 
-DataVector::DataVector(const DataVector& other) :
+DataVectorTaipan::DataVectorTaipan(const DataVectorTaipan& other) :
   m_size(other.m_size),
   m_dim(other.m_dim),
   m_N(other.m_N),
@@ -64,9 +65,9 @@ DataVector::DataVector(const DataVector& other) :
   }
 }
 
-DataVector::DataVector(const DataVector::size_type size,
-                       const DataVector::value_type val,
-                       const DataVector::size_type blockSize) :
+DataVectorTaipan::DataVectorTaipan(const DataVectorTaipan::size_type size,
+                       const DataVectorTaipan::value_type val,
+                       const DataVectorTaipan::size_type blockSize) :
   m_size(size),
   m_dim(blockSize),
   m_array_data(0)
@@ -74,7 +75,7 @@ DataVector::DataVector(const DataVector::size_type size,
   resize(size, val, blockSize);
 }
 
-DataVector::~DataVector()
+DataVectorTaipan::~DataVectorTaipan()
 {
   // dispose of data array
   if (m_array_data!=0) {
@@ -89,27 +90,27 @@ DataVector::~DataVector()
 }
 
 void
-DataVector::resize(const DataVector::size_type newSize,
-                   const DataVector::value_type newValue,
-                   const DataVector::size_type newBlockSize)
+DataVectorTaipan::resize(const DataVectorTaipan::size_type newSize,
+                   const DataVectorTaipan::value_type newValue,
+                   const DataVectorTaipan::size_type newBlockSize)
 {
   assert(m_size >= 0);
 
 			// The < 1 is to catch both ==0 and negatives
   if ( newBlockSize < 1) {
     ostringstream oss;
-    oss << "DataVector: invalid blockSize specified (" << newBlockSize << ')';    
+    oss << "DataVectorTaipan: invalid blockSize specified (" << newBlockSize << ')';    
     throw DataException(oss.str());
   }
 
   if ( newSize < 0 ) {
     ostringstream oss;
-    oss << "DataVector: invalid new size specified (" << newSize << ')';
+    oss << "DataVectorTaipan: invalid new size specified (" << newSize << ')';
     throw DataException(oss.str());
   }
   if ( (newSize % newBlockSize) != 0) {
     ostringstream oss;
-    oss << "DataVector: newSize is not a multiple of blockSize: (" << newSize << ", " << newBlockSize<< ')';
+    oss << "DataVectorTaipan: newSize is not a multiple of blockSize: (" << newSize << ", " << newBlockSize<< ')';
     throw DataException(oss.str());
   }
 
@@ -129,8 +130,8 @@ DataVector::resize(const DataVector::size_type newSize,
   }
 }
 
-DataVector&
-DataVector::operator=(const DataVector& other)
+DataVectorTaipan&
+DataVectorTaipan::operator=(const DataVectorTaipan& other)
 {
   assert(m_size >= 0);
 
@@ -153,7 +154,7 @@ DataVector::operator=(const DataVector& other)
 }
 
 bool
-DataVector::operator==(const DataVector& other) const
+DataVectorTaipan::operator==(const DataVectorTaipan& other) const
 {
   assert(m_size >= 0);
 
@@ -175,15 +176,15 @@ DataVector::operator==(const DataVector& other) const
 }
 
 bool
-DataVector::operator!=(const DataVector& other) const
+DataVectorTaipan::operator!=(const DataVectorTaipan& other) const
 {
   return !(*this==other);
 }
 
 void 
-DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies)
+DataVectorTaipan::copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies)
 {
-  using DataTypes::ValueType;
+  using DataTypes::RealVectorType;
   const DataTypes::ShapeType& tempShape=value.getShape();
   size_type len=DataTypes::noValues(tempShape);
   if (offset+len*copies>size())
@@ -193,7 +194,7 @@ DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, s
      ss << "offset=" << offset << " + " << " len=" << len << " >= " << size();
      throw DataException(ss.str());
   }
-  size_t si=0,sj=0,sk=0,sl=0;
+  size_type si=0,sj=0,sk=0,sl=0;
   switch (value.getRank())
   {
   case 0:	
@@ -217,9 +218,9 @@ DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, s
 	sj=tempShape[1];
 	for (size_type z=0;z<copies;++z)
 	{
-           for (ValueType::size_type i=0;i<si;i++)
+           for (size_type i=0;i<si;i++)
 	   {
-              for (ValueType::size_type j=0;j<sj;j++)
+              for (size_type j=0;j<sj;j++)
 	      {
                  m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j)]=value.getElt(i,j);
               }
@@ -233,11 +234,11 @@ DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, s
 	sk=tempShape[2];
 	for (size_type z=0;z<copies;++z) 
 	{
-          for (ValueType::size_type i=0;i<si;i++)
+          for (size_type i=0;i<si;i++)
 	  {
-            for (ValueType::size_type j=0;j<sj;j++)
+            for (size_type j=0;j<sj;j++)
 	    {
-              for (ValueType::size_type k=0;k<sk;k++)
+              for (size_type k=0;k<sk;k++)
 	      {
                  m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k)]=value.getElt(i,j,k);
               }
@@ -253,13 +254,13 @@ DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, s
 	sl=tempShape[3];
 	for (size_type z=0;z<copies;++z)
 	{
-          for (ValueType::size_type i=0;i<si;i++)
+          for (size_type i=0;i<si;i++)
 	  {
-            for (ValueType::size_type j=0;j<sj;j++)
+            for (size_type j=0;j<sj;j++)
 	    {
-              for (ValueType::size_type k=0;k<sk;k++)
+              for (size_type k=0;k<sk;k++)
 	      {
-                 for (ValueType::size_type l=0;l<sl;l++)
+                 for (size_type l=0;l<sl;l++)
 		 {
                     m_array_data[offset+DataTypes::getRelIndex(tempShape,i,j,k,l)]=value.getElt(i,j,k,l);
                  }
@@ -278,14 +279,14 @@ DataVector::copyFromArrayToOffset(const WrappedArray& value, size_type offset, s
 
 
 void
-DataVector::copyFromArray(const WrappedArray& value, size_type copies)
+DataVectorTaipan::copyFromArray(const WrappedArray& value, size_type copies)
 {
-  using DataTypes::ValueType;
+  using DataTypes::RealVectorType;
   if (m_array_data!=0) {
     arrayManager.delete_array(m_array_data);
   }
   DataTypes::ShapeType tempShape=value.getShape();
-  DataVector::size_type nelements=DataTypes::noValues(tempShape)*copies;
+  DataVectorTaipan::size_type nelements=DataTypes::noValues(tempShape)*copies;
   m_array_data = arrayManager.new_array(1,nelements);
   m_size=nelements;	// total amount of elements
   m_dim=m_size;		// elements per sample
@@ -293,4 +294,7 @@ DataVector::copyFromArray(const WrappedArray& value, size_type copies)
   copyFromArrayToOffset(value,0,copies);
 }
 
-} // end of namespace
+} // namespace DataTypes
+
+} // namespace escript
+
diff --git a/escriptcore/src/DataVector.h b/escriptcore/src/DataVectorTaipan.h
similarity index 63%
copy from escriptcore/src/DataVector.h
copy to escriptcore/src/DataVectorTaipan.h
index 9488b16..6bc14a6 100644
--- a/escriptcore/src/DataVector.h
+++ b/escriptcore/src/DataVectorTaipan.h
@@ -15,33 +15,33 @@
 *****************************************************************************/
 
 
-#if !defined escript_DataVector_20050324_H
-#define escript_DataVector_20050324_H
+#if !defined escript_DataVectorTaipan_H
+#define escript_DataVectorTaipan_H
 #include "system_dep.h"
 
-#include "esysUtils/EsysAssert.h"
+#include "Assert.h"
+#include "DataTypes.h"
+#include "WrappedArray.h"
 
-#include <vector>
-#include <iostream>
-#include <fstream>
-
-namespace escript {
-
-class WrappedArray;
+namespace escript
+{
 
-/**
+namespace DataTypes
+{
+  
+  /**
    \brief
-   DataVector implements an arbitrarily long vector of data values.
-   DataVector is the underlying data container for Data objects.
+   DataVectorTaipan implements an arbitrarily long vector of data values.
+   DataVectorTaipan is the underlying data container for Data objects.
 
    Description:
-   DataVector provides an implementation of a vector of data values for use
+   DataVectorTaipan provides an implementation of a vector of data values for use
    by DataBlocks2D and DataArrayView. Hiding the vector in this container
    allows different implementations to be swapped in without disrupting the
    client classes.
 */
 
-class ESCRIPT_DLL_API DataVector {
+class ESCRIPT_DLL_API DataVectorTaipan {
 
  public:
 
@@ -51,11 +51,12 @@ class ESCRIPT_DLL_API DataVector {
 
   //
   // The underlying type used to implement the vector.
-  typedef ElementType *  ValueType;
-  typedef const ElementType * ConstValueType;
+  typedef ElementType *  VectorStorageType;
+
 
   //
   // Various types exported to clients of this class.
+  typedef const ElementType *  const_pointer;  
   typedef ElementType          value_type;
   typedef long                 size_type;
   typedef ElementType &        reference;
@@ -63,29 +64,29 @@ class ESCRIPT_DLL_API DataVector {
 
   /**
      \brief
-     Default constructor for DataVector.
+     Default constructor for DataVectorTaipan.
 
      Description:
-     Constructs an empty DataVector object.
+     Constructs an empty DataVectorTaipan object.
   */
-  DataVector();
+  DataVectorTaipan();
 
   /**
      \brief
-     Copy constructor for DataVector.
+     Copy constructor for DataVectorTaipan.
 
      Description:
-     Constructs a DataVector object which is a copy of the
-     given DataVector object.
+     Constructs a DataVectorTaipan object which is a copy of the
+     given DataVectorTaipan object.
   */
-  DataVector(const DataVector& other);
+  DataVectorTaipan(const DataVectorTaipan& other);
 
   /**
      \brief
-     Constructor for DataVector.
+     Constructor for DataVectorTaipan.
 
      Description:
-     Constructs a DataVector object of length "size" with all elements
+     Constructs a DataVectorTaipan object of length "size" with all elements
      initilised to "val".
 
      \param size - Input - Number of elements in the vector.
@@ -98,23 +99,23 @@ class ESCRIPT_DLL_API DataVector {
      number of data-points per sample. Size is the total number of elements required to hold
      all elements for all data-points in the given object, ie: number of samples * blocksize.
   */
-  DataVector(const size_type size,
+  DataVectorTaipan(const size_type size,
              const value_type val=0.0,
              const size_type blockSize=1);
 
   /**
      \brief
-     Default destructor for DataVector.
+     Default destructor for DataVectorTaipan.
 
      Description:
-     Destroys the current DataVector object.
+     Destroys the current DataVectorTaipan object.
   */
-  ~DataVector();
+  ~DataVectorTaipan();
 
   /**
      \brief
-     Resize the DataVector to the given length "newSize".
-     All current data is lost. All elements in the new DataVector are
+     Resize the DataVectorTaipan to the given length "newSize".
+     All current data is lost. All elements in the new DataVectorTaipan are
      initialised to "newVal".
 
      \param newSize - Input - New size for the vector.
@@ -136,12 +137,12 @@ class ESCRIPT_DLL_API DataVector {
   copyFromArray(const escript::WrappedArray& value, size_type copies);
 
   void 
-  copyFromArrayToOffset(const WrappedArray& value, size_type offset, size_type copies);
+  copyFromArrayToOffset(const escript::WrappedArray& value, size_type offset, size_type copies);
 
 
   /**
      \brief
-     Return the number of elements in this DataVector.
+     Return the number of elements in this DataVectorTaipan.
   */
   inline
   size_type
@@ -149,31 +150,31 @@ class ESCRIPT_DLL_API DataVector {
 
   /**
      \brief
-     DataVector assignment operator "=".
-     Assign the given DataVector object to this.
+     DataVectorTaipan assignment operator "=".
+     Assign the given DataVectorTaipan object to this.
   */
-  DataVector&
-  operator=(const DataVector& other);
+  DataVectorTaipan&
+  operator=(const DataVectorTaipan& other);
 
   /**
      \brief
-     DataVector equality comparison operator "==".
-     Return true if the given DataVector is equal to this.
+     DataVectorTaipan equality comparison operator "==".
+     Return true if the given DataVectorTaipan is equal to this.
   */
   bool
-  operator==(const DataVector& other) const;
+  operator==(const DataVectorTaipan& other) const;
 
   /**
      \brief
-     DataVector inequality comparison operator "!=".
-     Return true if the given DataVector is not equal to this.
+     DataVectorTaipan inequality comparison operator "!=".
+     Return true if the given DataVectorTaipan is not equal to this.
   */
   bool
-  operator!=(const DataVector& other) const;
+  operator!=(const DataVectorTaipan& other) const;
 
   /**
     \brief
-    Return a reference to the element at position i in this DataVector.
+    Return a reference to the element at position i in this DataVectorTaipan.
     Will throw an exception if an invalid index "i" is given.
 
     NB: access to the element one past the end of the vector is permitted
@@ -197,10 +198,11 @@ class ESCRIPT_DLL_API DataVector {
   size_type m_N;
 
   //
-  // The container for the elements contained in this DataVector.
-  ValueType m_array_data;
+  // The container for the elements contained in this DataVectorTaipan.
+  VectorStorageType m_array_data;
 };
 
+
 /**
   \brief
   releases unused memory in the memory manager.
@@ -211,28 +213,32 @@ ESCRIPT_DLL_API void releaseUnusedMemory();
 
 
 inline
-DataVector::size_type
-DataVector::size() const
+DataVectorTaipan::size_type
+DataVectorTaipan::size() const
 {
   return m_size;
 }
 
 inline
-DataVector::reference
-DataVector::operator[](const DataVector::size_type i)
+DataVectorTaipan::reference
+DataVectorTaipan::operator[](const DataVectorTaipan::size_type i)
 {
-  EsysAssert(i<size(),"DataVector: invalid index specified. " << i << " of " << size());
+  ESYS_ASSERT(i<size(), "DataVectorTaipan: invalid index specified. " << i << " of " << size());
   return m_array_data[i];
 }
 
 inline
-DataVector::const_reference
-DataVector::operator[](const DataVector::size_type i) const
+DataVectorTaipan::const_reference
+DataVectorTaipan::operator[](const DataVectorTaipan::size_type i) const
 {
-  EsysAssert(i<size(),"DataVector: invalid index specified. " << i << " of " << size());
+  ESYS_ASSERT(i<size(),"DataVectorTaipan: invalid index specified. " << i << " of " << size());
   return m_array_data[i];
 }
 
+
+} // end of namespace 
 } // end of namespace
 
+
+
 #endif
diff --git a/escriptcore/src/Distribution.h b/escriptcore/src/Distribution.h
new file mode 100644
index 0000000..f262e03
--- /dev/null
+++ b/escriptcore/src/Distribution.h
@@ -0,0 +1,79 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_DISTRIBUTION_H__
+#define __ESCRIPT_DISTRIBUTION_H__
+
+#include <escript/DataTypes.h>
+
+namespace escript {
+
+struct Distribution;
+typedef boost::shared_ptr<Distribution> Distribution_ptr;
+typedef boost::shared_ptr<const Distribution> const_Distribution_ptr;
+
+/// Describes the distribution of a vector across processes.
+/// Process i has entries with global indices first_component[i] to
+/// first_component[i+1].
+struct Distribution
+{
+    Distribution(JMPI mpiInfo, const DataTypes::IndexVector& firstComponent,
+                 DataTypes::index_t m = 1, DataTypes::index_t b = 0) :
+        mpi_info(mpiInfo)
+    {
+        first_component.resize(mpi_info->size + 1);
+        for (int i = 0; i < mpi_info->size+1; ++i)
+            first_component[i] = m * firstComponent[i] + b;
+    }
+
+    inline DataTypes::index_t getFirstComponent() const
+    {
+        return first_component[mpi_info->rank];
+    }
+
+    inline DataTypes::index_t getLastComponent() const
+    {
+        return first_component[mpi_info->rank+1];
+    }
+
+    inline DataTypes::dim_t getGlobalNumComponents() const
+    {
+        return getMaxGlobalComponents()-getMinGlobalComponents();
+    }
+
+    inline DataTypes::dim_t getMyNumComponents() const
+    {
+        return getLastComponent()-getFirstComponent();
+    }
+
+    inline DataTypes::dim_t getMinGlobalComponents() const
+    {
+        return first_component[0];
+    }
+
+    inline DataTypes::dim_t getMaxGlobalComponents() const
+    {
+        return first_component[mpi_info->size];
+    }
+
+    DataTypes::IndexVector first_component;
+    JMPI mpi_info;
+};
+
+} // namespace escript
+
+#endif // __ESCRIPT_DISTRIBUTION_H__
+
diff --git a/escriptcore/src/DomainException.cpp b/escriptcore/src/DomainException.cpp
deleted file mode 100644
index 8c7b659..0000000
--- a/escriptcore/src/DomainException.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "DomainException.h"
-
-using namespace escript;
-
-const std::string 
-DomainException::exceptionNameValue("DomainException");
-
-
-const std::string &
-DomainException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/escriptcore/src/DomainException.h b/escriptcore/src/DomainException.h
index 1a8e443..3f4918a 100644
--- a/escriptcore/src/DomainException.h
+++ b/escriptcore/src/DomainException.h
@@ -19,82 +19,18 @@
 #define escript_DomainException_20040608_H
 #include "system_dep.h"
 
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
-namespace escript
-{
-
-  /**
-  \brief
-  DomainException exception class.
-
-  Description:
-  DomainException exception class.
-  The class provides a public function returning the exception name
-  */
-  class DomainException : public esysUtils::EsysException
-  {
-
-  protected:
-
-    typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DomainException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DomainException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DomainException(const std::string &str) : Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    DomainException(const DomainException &other) : Parent(other)
-      {
-        updateMessage();
-      }
+namespace escript {
 
-    ESCRIPT_DLL_API
-    inline virtual DomainException &
-    operator=(const DomainException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-    /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~DomainException() THROW(NO_ARG) {}
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
+class DomainException : public EsysException
+{
+public:
+    DomainException(const std::string& str) : EsysException(str) {}
+    virtual ~DomainException() throw() {}
+};
 
 } // end of namespace
+
 #endif
+
diff --git a/escriptcore/src/ES_optype.cpp b/escriptcore/src/ES_optype.cpp
new file mode 100644
index 0000000..21ef739
--- /dev/null
+++ b/escriptcore/src/ES_optype.cpp
@@ -0,0 +1,80 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+#include "ES_optype.h"
+#include <string>
+
+using namespace escript;
+
+namespace
+{
+
+std::string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
+                        "sin","cos","tan",
+                        "asin","acos","atan","sinh","cosh","tanh","erf",
+                        "asinh","acosh","atanh",
+                        "log10","log","sign","abs","neg","pos","exp","sqrt",
+                        "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
+                        "symmetric","antisymmetric",
+                        "prod",
+                        "transpose", "trace",
+                        "swapaxes",
+                        "minval", "maxval",
+                        "condEval",
+                        "hermitian","antihermitian",
+			"real","imaginary","conjugate",
+			"<", ">", ">=", "<="
+};
+
+
+ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
+                        G_UNARY,G_UNARY,G_UNARY, //10
+                        G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
+                        G_UNARY,G_UNARY,G_UNARY,                                        // 20
+                        G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
+                        G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
+                        G_NP1OUT,G_NP1OUT,
+                        G_TENSORPROD,
+                        G_NP1OUT_P, G_NP1OUT_P,
+                        G_NP1OUT_2P,
+                        G_REDUCTION, G_REDUCTION,
+                        G_CONDEVAL,
+                        G_UNARY,G_UNARY,
+                        G_UNARY_R, G_UNARY_R, G_UNARY,
+			G_UNARY_R, G_UNARY_R, G_UNARY_R, G_UNARY_R
+};
+
+
+int ES_opcount=53;
+}
+
+// Return a string representing the operation
+const std::string&
+escript::opToString(ES_optype op)
+{
+  if (op<0 || op>=ES_opcount) 
+  {
+    op=UNKNOWNOP;
+  }
+  return ES_opstrings[op];
+}
+
+ES_opgroup
+escript::getOpgroup(ES_optype op)
+{
+  return opgroups[op];
+}
diff --git a/escriptcore/src/ES_optype.h b/escriptcore/src/ES_optype.h
new file mode 100644
index 0000000..604146e
--- /dev/null
+++ b/escriptcore/src/ES_optype.h
@@ -0,0 +1,108 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_ESOPTYPE_H__
+#define __ESCRIPT_ESOPTYPE_H__
+
+#include <string>
+
+namespace escript
+{
+
+// For the purposes of unit testing and maintaining sanity, it is important that this enum be contiguous
+enum ES_optype
+{
+	UNKNOWNOP=0,
+	IDENTITY=1,
+	ADD=2,
+	SUB=3,
+	MUL=4,
+	DIV=5,
+	POW=6,
+	SIN=POW+1,
+	COS=SIN+1,
+	TAN=SIN+2,
+	ASIN=SIN+3,
+	ACOS=SIN+4,
+	ATAN=SIN+5,
+	SINH=SIN+6,
+	COSH=SIN+7,
+	TANH=SIN+8,
+	ERF=SIN+9,
+	ASINH=SIN+10,
+	ACOSH=SIN+11,
+	ATANH=SIN+12,
+	LOG10=ATANH+1,
+	LOG=LOG10+1,
+	SIGN=LOG10+2,
+	ABS=LOG10+3,
+	NEG=LOG10+4,
+	POS=LOG10+5,
+	EXP=LOG10+6,
+	SQRT=LOG10+7,
+	RECIP=LOG10+8,
+	GZ=RECIP+1,
+	LZ=GZ+1,	// <0
+	GEZ=GZ+2,	// >=0
+	LEZ=GZ+3,	// <=0
+	NEZ=GZ+4,	// >=0
+	EZ=GZ+5,
+	SYM=EZ+1,
+	NSYM=SYM+1,
+	PROD=NSYM+1,
+	TRANS=PROD+1,
+	TRACE=TRANS+1,
+	SWAP=TRACE+1,
+	MINVAL=SWAP+1,
+	MAXVAL=MINVAL+1,
+	CONDEVAL=MAXVAL+1,
+	HER=CONDEVAL+1,		// hermitian
+	NHER=HER+1,              // antihermitian
+	REAL=NHER+1,
+	IMAG=REAL+1,
+	CONJ=IMAG+1,
+	LESS=CONJ+1,		// a<b
+	GREATER=LESS+1,
+	GREATER_EQUAL=GREATER+1,
+	LESS_EQUAL=GREATER_EQUAL+1
+};
+
+const std::string&
+opToString(ES_optype op);
+
+enum ES_opgroup
+{
+   G_UNKNOWN,
+   G_IDENTITY,
+   G_BINARY,            // pointwise operations with two arguments
+   G_UNARY,             // pointwise operations with one argument
+   G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
+   G_UNARY_R,		// pointwise operations with one argument, always real output
+   G_NP1OUT,            // non-pointwise op with one output
+   G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
+   G_TENSORPROD,        // general tensor product
+   G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
+   G_REDUCTION,         // non-pointwise unary op with a scalar output
+   G_CONDEVAL
+};
+
+
+
+ES_opgroup
+getOpgroup(ES_optype op);
+}
+
+#endif
\ No newline at end of file
diff --git a/escriptcore/src/EscriptParams.cpp b/escriptcore/src/EscriptParams.cpp
index 398b449..fb506cd 100644
--- a/escriptcore/src/EscriptParams.cpp
+++ b/escriptcore/src/EscriptParams.cpp
@@ -1,5 +1,4 @@
 
-
 /*****************************************************************************
 *
 * Copyright (c) 2003-2016 by The University of Queensland
@@ -15,220 +14,179 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "EscriptParams.h"
-#include <cstring>
+#include "EsysException.h"
+#include "EsysMPI.h"
+
+#include <cmath> // to test if we know how to check for nan
 #include <boost/python/tuple.hpp>
-#include <cmath>                        // to test if we know how to check for nan
 
-#include "esysUtils/EsysException.h"
-#include "esysUtils/Esys_MPI.h"
+namespace bp = boost::python;
 
-namespace escript
-{
+namespace escript {
 
-EscriptParams escriptParams;                // externed in header file
+EscriptParams escriptParams; // externed in header file
 
 
 EscriptParams::EscriptParams()
 {
-   too_many_lines=80;
-   autolazy=0;
-   too_many_levels=70;
-   lazy_str_fmt=0;
-   lazy_verbose=0;
-#ifdef USE_NETCDF
-   has_netcdf=1;
-#else   
-   has_netcdf=0;
-#endif   
-#ifdef USE_LAPACK
-   lapack_support=1;
+    // These #defs are for performance testing only
+    // in general, I don't want people tweaking the
+    // default value using compiler options
+    // I've provided a python interface for that
+#ifdef FAUTOLAZYON
+    autoLazy = 1;
 #else
-   lapack_support=0;
+    autoLazy = 0;
 #endif
+    lazyStrFmt = 0;
+    lazyVerbose = 0;
+#ifdef FRESCOLLECTON
+    resolveCollective = 1;
+#else
+    resolveCollective = 0;
+#endif
+    tooManyLevels = 9;	// this is fairly arbitrary
+    tooManyLines = 80;
 
-    gmsh = gmsh_mpi = 0;
-#if defined(GMSH) || defined(GMSH_MPI)
-    gmsh = 1;
+    // now populate feature set
+#ifdef ESYS_HAVE_CUDA
+    features.insert("cuda");
 #endif
-    //only mark gmsh as mpi if escript built with mpi, otherwise comm_spawns
-    //might just fail terribly
-#if defined(GMSH_MPI) && defined(ESYS_MPI)
-    gmsh_mpi = 1;
+#ifdef ESYS_HAVE_DUDLEY
+    features.insert("dudley");
+#endif
+#ifdef ESYS_HAVE_FINLEY
+    features.insert("finley");
+#endif
+#ifdef ESYS_HAVE_LAPACK
+    features.insert("lapack");
+#endif
+#ifdef ESYS_HAVE_MKL
+    features.insert("mkl");
 #endif
-
 #ifdef ESYS_MPI
-    amg_disabled=true;
-#else
-    amg_disabled=false;
+    features.insert("mpi");
 #endif
-
-    temp_direct_solver=false;   // This variable is to be removed once proper
-                                // SolverOptions support is in place
-#ifdef MKL
-    temp_direct_solver=true;
+#ifdef isnan
+    features.insert("NAN_CHECK");
 #endif
-#ifdef USE_UMFPACK
-    temp_direct_solver=true;
+#ifdef ESYS_HAVE_NETCDF
+    features.insert("netcdf");
 #endif
-#ifdef PASTIX
-    temp_direct_solver=true;
+#ifdef _OPENMP
+    features.insert("openmp");
 #endif
-
-                        // These #defs are for performance testing only
-                        // in general, I don't want people tweaking the
-                        // default value using compiler options
-                        // I've provided a python interface for that
-#ifdef FAUTOLAZYON
-   autolazy=1;
+#ifdef ESYS_HAVE_PASO
+    features.insert("paso");
+#endif
+#ifdef ESYS_HAVE_RIPLEY
+    features.insert("ripley");
+#endif
+#ifdef ESYS_HAVE_SILO
+    features.insert("silo");
 #endif
-#ifdef FAUTOLAZYOFF
-   autolazy=0;
+#ifdef ESYS_HAVE_SPECKLEY
+    features.insert("speckley");
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    features.insert("trilinos");
+#endif
+#ifdef ESYS_HAVE_UMFPACK
+    features.insert("umfpack");
+#endif
+#ifdef ESYS_HAVE_WEIPA
+    features.insert("weipa");
+#endif
+#ifdef ESYS_HAVE_BOOST_IO
+    features.insert("unzip");
+#endif
+#ifdef ESYS_INDEXTYPE_LONG
+    features.insert("longindex");
 #endif
 
-#ifdef FRESCOLLECTON
-   resolve_collective=1;
+    //TODO: these should be replaced by a runtime check in python
+#ifdef ESYS_HAVE_GMSH
+    features.insert("gmsh");
 #endif
-#ifdef FRESCOLLECTOFF
-   resolve_collective=0;
+#ifdef ESYS_GMSH_MPI
+    features.insert("gmsh_mpi");
 #endif
 }
 
-int 
-EscriptParams::getInt(const char* name, int sentinel) const
+int EscriptParams::getInt(const std::string& name, int sentinel) const
 {
-   if (!strcmp(name,"TOO_MANY_LINES"))
-   {
-        return too_many_lines;
-   }
-   if (!strcmp(name,"AUTOLAZY"))
-   {
-        return autolazy;
-   }
-   if (!strcmp(name,"TOO_MANY_LEVELS"))
-   {
-        return too_many_levels;
-   }
-   if (!strcmp(name,"RESOLVE_COLLECTIVE"))
-   {
-        return resolve_collective;
-   }
-   if (!strcmp(name,"LAZY_STR_FMT"))
-   {
-        return lazy_str_fmt;
-   }
-   if (!strcmp(name,"LAPACK_SUPPORT"))
-   {
-        return lapack_support;
-   }
-   if (!strcmp(name, "NAN_CHECK"))
-   {
-#ifdef isnan        
-        return 1;
-#else
-        return 0;
-#endif
-   }
-   if (!strcmp(name,"LAZY_VERBOSE"))
-   {
-        return lazy_verbose;
-   }
-   if (!strcmp(name, "DISABLE_AMG"))
-   {
-        return amg_disabled;
-   }
-   if (!strcmp(name, "MPIBUILD"))
-   {
-#ifdef ESYS_MPI           
-        return 1;
-#else
-        return 0;
-#endif
-   }
-   if (!strcmp(name, "PASO_DIRECT"))
-   {
-        // This is not in the constructor because escriptparams could be constructed 
-        // before main (and hence no opportunity to call INIT)
-        #ifdef ESYS_MPI
-            int size;
-            if (MPI_Comm_size(MPI_COMM_WORLD, &size)!=MPI_SUCCESS)        // This would break in a subworld
-            {
-                temp_direct_solver=false;        
-            }
-            if (size>1)
-            {
-                temp_direct_solver=false;
-            }
-        #endif   
-        return temp_direct_solver;
-   }
-    if (!strcmp(name, "NETCDF_BUILD"))
-    {
-       return has_netcdf; 
-    }
-    if (!strcmp(name, "GMSH_SUPPORT"))
-        return gmsh;
-    if (!strcmp(name, "GMSH_MPI"))
-        return gmsh_mpi;
-   return sentinel;
+    if (name == "AUTOLAZY")
+        return autoLazy;
+    else if (name == "LAZY_STR_FMT")
+        return lazyStrFmt;
+    else if (name == "LAZY_VERBOSE")
+        return lazyVerbose;
+    else if (name == "RESOLVE_COLLECTIVE")
+        return resolveCollective;
+    else if (name == "TOO_MANY_LEVELS")
+        return tooManyLevels;
+    else if (name == "TOO_MANY_LINES")
+        return tooManyLines;
+
+    return sentinel;
 }
-  
-void 
-EscriptParams::setInt(const char* name, int value)
+
+void
+EscriptParams::setInt(const std::string& name, int value)
 {
-   // Note: there is no way to modify the LAPACK_SUPPORT variable ATM
-    if (!strcmp(name,"TOO_MANY_LINES"))
-        too_many_lines=value;
-    else if (!strcmp(name,"AUTOLAZY"))
-        autolazy=!(value==0);        // set to 1 or zero
-    else if (!strcmp(name,"TOO_MANY_LEVELS"))
-        too_many_levels=value;
-    else if (!strcmp(name,"RESOLVE_COLLECTIVE"))
-        resolve_collective=value;
-    else if (!strcmp(name,"LAZY_STR_FMT"))
-        lazy_str_fmt=value;
-    else if (!strcmp(name,"LAZY_VERBOSE"))
-        lazy_verbose=value;
+    if (name == "AUTOLAZY")
+        autoLazy = value;
+    else if (name == "LAZY_STR_FMT")
+        lazyStrFmt = value;
+    else if (name == "LAZY_VERBOSE")
+        lazyVerbose = value;
+    else if (name == "RESOLVE_COLLECTIVE")
+        resolveCollective = value;
+    else if (name == "TOO_MANY_LEVELS")
+        tooManyLevels = value;
+    else if (name == "TOO_MANY_LINES")
+        tooManyLines = value;
     else
-       throw esysUtils::EsysException("Invalid parameter name");
+        throw ValueError("Invalid parameter name - "+name);
 }
 
-void 
-setEscriptParamInt(const char* name, int value)
+bp::list EscriptParams::listEscriptParams() const
 {
-   escriptParams.setInt(name,value);
+   bp::list l;
+   l.append(bp::make_tuple("AUTOLAZY", autoLazy, "{0,1} Operations involving Expanded Data will create lazy results."));
+   l.append(bp::make_tuple("LAZY_STR_FMT", lazyStrFmt, "{0,1,2}(TESTING ONLY) change output format for lazy expressions."));
+   l.append(bp::make_tuple("LAZY_VERBOSE", lazyVerbose, "{0,1} Print a warning when expressions are resolved because they are too large."));
+   l.append(bp::make_tuple("RESOLVE_COLLECTIVE", resolveCollective, "(TESTING ONLY) {0.1} Collective operations will resolve their data."));
+   l.append(bp::make_tuple("TOO_MANY_LEVELS", tooManyLevels, "(TESTING ONLY) maximum levels allowed in an expression."));
+   l.append(bp::make_tuple("TOO_MANY_LINES", tooManyLines, "Maximum number of lines to output when printing data before printing a summary instead."));
+   return l;
 }
 
-
-int
-getEscriptParamInt(const char* name, int sentinel)
+bool EscriptParams::hasFeature(const std::string& name) const
 {
-   return escriptParams.getInt(name, sentinel);
+    if (name == "PASO_DIRECT") {
+        // This is not in the constructor because escriptparams could be
+        // constructed before main (and hence no opportunity to call INIT)
+#ifdef ESYS_MPI
+        int size;
+        if (MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS || size > 1)
+            return false;
+#endif
+        return hasFeature("paso") && (hasFeature("umfpack") || hasFeature("mkl"));
+    }
+
+    return features.count(name) > 0;
 }
 
-boost::python::list
-EscriptParams::listEscriptParams()
+bp::list EscriptParams::listFeatures() const
 {
-   using namespace boost::python;
-   boost::python::list l;
-   l.append(make_tuple("TOO_MANY_LINES", too_many_lines, "Maximum number of lines to output when printing data before printing a summary instead."));
-   l.append(make_tuple("AUTOLAZY", autolazy, "{0,1} Operations involving Expanded Data will create lazy results."));
-   l.append(make_tuple("RESOLVE_COLLECTIVE",resolve_collective ,"(TESTING ONLY) {0.1} Collective operations will resolve their data."));
-   l.append(make_tuple("TOO_MANY_LEVELS", too_many_levels, "(TESTING ONLY) maximum levels allowed in an expression."));
-   l.append(make_tuple("LAZY_STR_FMT", lazy_str_fmt, "{0,1,2}(TESTING ONLY) change output format for lazy expressions."));
-   l.append(make_tuple("LAZY_VERBOSE", lazy_verbose, "{0,1} Print a warning when expressions are resolved because they are too large."));
-   l.append(make_tuple("DISABLE_AMG", amg_disabled, "{0,1} AMG is disabled."));
-   l.append(make_tuple("NETCDF_BUILD", has_netcdf, "{0,1} Was this build made with netcdf libraries?"));
-   l.append(make_tuple("GMSH_SUPPORT", gmsh, "{0,1} Non-python GMSH support is available."));
-   l.append(make_tuple("GMSH_MPI", gmsh_mpi, "{0,1} Both GMSH and escript have MPI capabilities."));
+   bp::list l;
+   FeatureSet::const_iterator it;
+   for (it = features.begin(); it != features.end(); it++)
+       l.append(*it);
    return l;
 }
 
+} // end namespace
 
-
-
-}        // end namespace
diff --git a/escriptcore/src/EscriptParams.h b/escriptcore/src/EscriptParams.h
index 8205af8..65752a5 100644
--- a/escriptcore/src/EscriptParams.h
+++ b/escriptcore/src/EscriptParams.h
@@ -14,108 +14,99 @@
 *
 *****************************************************************************/
 
-#ifndef escript_EscriptParams_H
-#define escript_EscriptParams_H
-#include "system_dep.h"
+#ifndef __ESCRIPT_PARAMS_H__
+#define __ESCRIPT_PARAMS_H__
+
 #include <boost/python/list.hpp>
-#include "Data.h"    // for the operators
+
+#include <string>
+#include <unordered_set>
 
 namespace escript
 {
 
-class Data;
-class DataLazy;
-
 class EscriptParams
 {
+    typedef std::unordered_set<std::string> FeatureSet;
+
 public:
-  ESCRIPT_DLL_API
-  EscriptParams();
+    EscriptParams();
 
-  ESCRIPT_DLL_API
-  int getInt(const char* name, int sentinel=0) const;
-  
-  ESCRIPT_DLL_API
-  void setInt(const char* name, int value);
+    int getInt(const std::string& name, int sentinel = 0) const;
+    void setInt(const std::string& name, int value);
+    boost::python::list listEscriptParams() const;
 
-  boost::python::list
-  listEscriptParams();
+    inline int getAutoLazy() const { return autoLazy; }
+    inline int getLazyStrFmt() const { return lazyStrFmt; }
+    inline int getLazyVerbose() const { return lazyVerbose; }
+    inline int getResolveCollective() const { return resolveCollective; }
+    inline int getTooManyLevels() const { return tooManyLevels; }
+    inline int getTooManyLines() const { return tooManyLines; }
 
-private:
+    bool hasFeature(const std::string& name) const;
+    boost::python::list listFeatures() const;
 
-  // If we get more params we can replace this with a map
-	int too_many_lines;
-	int autolazy;
-	int too_many_levels;
-	int resolve_collective;
-	int lazy_str_fmt;
-	int lapack_support;
-	int lazy_verbose;
-	int amg_disabled;
-	int has_netcdf;
-	int gmsh;
-	int gmsh_mpi;
-	mutable int temp_direct_solver;
-
-protected: 
-  // This is to provide fast access for methods in Data.
-  // Its a little bit ugly, needing all those friends but I really want to
-  // limit outside access to the char* interface
-
-  int getTOO_MANY_LINES() {return too_many_lines;}
-  int getAUTOLAZY() { return autolazy;}
-  int getTOO_MANY_LEVELS() {return too_many_levels;}
-  int getRESOLVE_COLLECTIVE() {return resolve_collective;}
-  int getLAZY_STR_FMT() {return lazy_str_fmt;}
-  int getLAZY_VERBOSE() {return lazy_verbose;}
-
-  friend class escript::Data;
-  friend class escript::DataLazy;
-  friend Data operator+(const boost::python::api::object&, const escript::Data&);
-  friend Data operator-(const boost::python::api::object&, const escript::Data&);
-  friend Data operator*(const boost::python::api::object&, const escript::Data&);
-  friend Data operator/(const boost::python::api::object&, const escript::Data&);
-  friend Data operator+(const escript::Data&, const escript::Data&);
-  friend Data operator-(const escript::Data&, const escript::Data&);
-  friend Data operator*(const escript::Data&, const escript::Data&);
-  friend Data operator/(const escript::Data&, const escript::Data&);
-  friend Data operator+(const escript::Data&, const boost::python::api::object&);
-  friend Data operator-(const escript::Data&, const boost::python::api::object&);
-  friend Data operator*(const escript::Data&, const boost::python::api::object&);
-  friend Data operator/(const escript::Data&, const boost::python::api::object&);
-  friend Data C_GeneralTensorProduct(escript::Data& arg_0, escript::Data& arg_1,
-                     int axis_offset, int transpose);
-  friend Data condEval(escript::Data& mask, escript::Data& trueval, escript::Data& falseval);
+private:
+    FeatureSet features;
+    // the number of parameters is small enough to avoid a map for performance
+    // reasons
+    int autoLazy;
+    int lazyStrFmt;
+    int lazyVerbose;
+    int resolveCollective;
+    int tooManyLevels;
+    int tooManyLines;
 };
 
 
 extern EscriptParams escriptParams;
 
 /**
-  \brief Set the value of a named parameter.
-  See listEscriptParams() (showEscriptParams() in python) for available parameters.
+    \brief Set the value of a named parameter.
+    See listEscriptParams() for available parameters.
 */
-ESCRIPT_DLL_API
-void setEscriptParamInt(const char* name, int value);
+inline void setEscriptParamInt(const std::string& name, int value)
+{
+   escriptParams.setInt(name, value);
+}
 
 /**
-  \brief get the value of a named parameter.
-  See listEscriptParams() (showEscriptParams() in python) for available parameters.
+    \brief get the value of a named parameter.
+    See listEscriptParams() for available parameters.
 */
-ESCRIPT_DLL_API
-int getEscriptParamInt(const char* name, int sentinel=0);
+inline int getEscriptParamInt(const std::string& name, int sentinel=0)
+{
+    return escriptParams.getInt(name, sentinel);
+}
 
 /**
-  \brief describe available paramters.
-  \return a list of tuples (parameter name, value, description)
+    \brief describe available parameters.
+    \return a list of tuples (parameter name, value, description)
 */
-ESCRIPT_DLL_API
 inline boost::python::list listEscriptParams()
 {
-   return escriptParams.listEscriptParams();
+    return escriptParams.listEscriptParams();
 }
 
+/**
+    \brief returns true if escript was compiled with the feature `name`,
+           false otherwise.
+*/
+inline bool hasFeature(const std::string& name)
+{
+    return escriptParams.hasFeature(name);
+}
 
-
+/**
+    \brief returns a list of features escript was compiled with.
+    \return a boost python list of strings
+*/
+inline boost::python::list listFeatures()
+{
+    return escriptParams.listFeatures();
 }
-#endif
+
+} // namespace escript
+
+#endif // __ESCRIPT_PARAMS_H__
+
diff --git a/escriptcore/src/EsysException.h b/escriptcore/src/EsysException.h
new file mode 100644
index 0000000..b44e8f2
--- /dev/null
+++ b/escriptcore/src/EsysException.h
@@ -0,0 +1,97 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_ESYSEXCEPTION_H__
+#define __ESCRIPT_ESYSEXCEPTION_H__
+
+#include <exception>
+#include <string>
+
+namespace escript {
+
+/**
+   \brief The base class for escript exceptions.
+*/
+class EsysException : public std::exception
+{
+public:
+    /**
+      \brief
+      Constructor which creates an Exception with the given message
+     
+      @param message - Exception message.
+    */
+    EsysException(const std::string& message) : msg(message) {}
+
+    /// Destructor
+    virtual ~EsysException() throw() {}
+
+    /**
+      \brief
+      Returns a description of the exception.
+    */
+    inline virtual const char* what() const throw() { return msg.c_str(); }
+
+private:
+    //
+    // the exception message
+    std::string msg;
+};
+
+/**
+  \brief
+  An exception class for assertions within escript
+*/
+class AssertException : public EsysException
+{
+public:
+    AssertException(const std::string& str) : EsysException(str) {}
+};
+
+/**
+  \brief
+  An exception class for Input/Output errors
+*/
+class IOError : public EsysException
+{
+public:
+    IOError(const std::string& str) : EsysException(str) {}
+};
+
+/**
+  \brief
+  An exception class for features which are not (yet) implemented
+*/
+class NotImplementedError : public EsysException
+{
+public:
+    NotImplementedError(const std::string& str) : EsysException(str) {}
+};
+
+/**
+  \brief
+  An exception class that signals an invalid argument value
+*/
+class ValueError : public EsysException
+{
+public:
+    ValueError(const std::string& str) : EsysException(str) {}
+};
+
+} // namespace escript
+
+#endif // __ESCRIPT_ESYSEXCEPTION_H__
+
diff --git a/escriptcore/src/EsysMPI.cpp b/escriptcore/src/EsysMPI.cpp
new file mode 100644
index 0000000..70ff3c9
--- /dev/null
+++ b/escriptcore/src/EsysMPI.cpp
@@ -0,0 +1,242 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "EsysMPI.h"
+#include "EsysException.h"
+
+#include <vector>
+
+namespace escript
+{
+  
+using DataTypes::dim_t;
+using DataTypes::index_t;
+
+JMPI makeInfo(MPI_Comm comm, bool owncom)
+{
+    if (NoCOMM_WORLD::active() && comm==MPI_COMM_WORLD)
+        throw EsysException("Attempt to use the MPI_COMM_WORLD "
+                            "communicator when it is blocked.");
+    JMPI_* p = new JMPI_(comm, owncom);
+    return JMPI(p);
+}
+
+JMPI_::JMPI_(MPI_Comm mpicomm, bool owncom)
+        : comm(mpicomm), ownscomm(owncom), msg_tag_counter(0)
+{
+#ifdef ESYS_MPI
+    if (mpicomm != MPI_COMM_NULL) {
+        if (MPI_Comm_rank(comm, &rank) != MPI_SUCCESS ||
+                MPI_Comm_size(comm, &size) != MPI_SUCCESS) {
+            throw EsysException("JMPI::JMPI: error finding comm rank/size" );
+        }
+    } else {
+        rank = 0;
+        size = 0;
+    }
+#else
+    rank = 0;
+    size = 1;        
+#endif        
+}
+
+JMPI_::~JMPI_()
+{
+#ifdef ESYS_MPI
+    if (ownscomm && comm != MPI_COMM_NULL)
+        MPI_Comm_free(&comm);
+#endif
+}
+
+dim_t JMPI_::setDistribution(index_t min_id, index_t max_id,
+                             index_t* distribution)
+{
+    const dim_t N = max_id-min_id+1;
+    if (N > 0) {
+        const dim_t local_N = N/size;
+        const dim_t rest = N-local_N*size;
+        for (int p=0; p<size; ++p) {
+            if (p < rest) {
+                distribution[p]=min_id+(local_N+1)*p;
+            } else {
+                distribution[p]=min_id+rest+local_N*p;
+            }
+        }
+        distribution[size]=max_id+1;
+        if (rest==0) {
+            return local_N;
+        } else {
+            return local_N+1;
+        }
+    } else {
+        for (int p=0; p<size+1; ++p)
+            distribution[p]=min_id;
+        return 0;
+    }
+}
+
+void JMPI_::split(dim_t N, dim_t* local_N, index_t* offset) 
+{
+    *local_N = N/size;
+    dim_t rest = N-(*local_N)*size;
+    if (rank < rest) {
+        (*local_N)++;
+        *offset = (*local_N)*rank;
+    } else {
+        *offset = (*local_N)*rank + rest;
+    }
+}
+
+// Throw all values in and get the maximum --- used for error checking.
+// This used to be implemented as a simple AllReduce.
+// However, if there are other (overlapping) communicators in the system,
+// they don't react well to getting unexpected/untagged messages.
+// To avoid this, we do individual sends to the root which sends the
+// result back.
+bool checkResult(int res, int& mres, const JMPI& info)
+{
+    if (info->size==1) {
+        mres = res;
+        return true;
+    }
+#ifdef ESYS_MPI
+    const int leader = 0;
+    const int BIGTAG = getSubWorldTag();
+    if (info->rank != leader) {  
+        if (MPI_Send(&res, 1, MPI_INT, leader, BIGTAG, info->comm) != MPI_SUCCESS)
+            return false;
+        MPI_Status status;
+        if (MPI_Recv(&mres, 1, MPI_INT, leader, BIGTAG, info->comm, &status) != MPI_SUCCESS)
+            return false;
+    } else {
+        std::vector<MPI_Status> status(info->size - 1);
+        MPI_Request* reqs = new MPI_Request[info->size-1];
+        int* eres = new int[info->size-1];
+        for (int i=0; i<info->size-1; ++i) {
+            MPI_Irecv(eres+i, 1, MPI_INT, i+1, BIGTAG, info->comm, reqs+i);
+        }
+        if (MPI_Waitall(info->size-1, reqs, &status[0]) != MPI_SUCCESS) {
+            delete[] reqs;
+            delete[] eres;
+            return false;
+        }
+        // now we have them all, find the max
+        mres = res;
+        for (int i=0; i<info->size-1; ++i) {
+            if (mres < eres[i])
+                mres = eres[i];
+        }
+        delete[] eres;
+        // now we know what the result should be, send it to the others
+        for (int i=0; i<info->size-1; ++i)
+            MPI_Isend(&mres, 1, MPI_INT, i+1, BIGTAG, info->comm, reqs+i);
+
+        if (MPI_Waitall(info->size-1, reqs, &status[0]) != MPI_SUCCESS) {
+            delete[] reqs;
+            return false;
+        }
+        delete[] reqs;
+    }
+#endif
+    return true;
+}
+
+
+// ensure that the any ranks with an empty src argument end up with the string
+// from one of the other ranks.
+// without mpi, it makes dest point at a copy of src.
+// Expected use case for this code is to ship error messages between ranks.
+// As such, it is not written to be speedy
+bool shipString(const char* src, char** dest, MPI_Comm& comm)
+{
+#ifdef ESYS_MPI  
+    int rank=0;
+    if (MPI_Comm_rank(comm, &rank) != MPI_SUCCESS)
+        return false; // we have no reason to believe MPI works anymore
+    
+    int slen = strlen(src);
+    // everybody needs to tell everyone if they have a string
+    // send your rank if you have a non-empty string else
+    // send -1
+    int in = (slen ? rank : -1);
+    int out;
+    if (MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_MAX, comm)!=MPI_SUCCESS)
+        return false;
+
+    // should not be called under these conditions, but noone had a string
+    if (out==-1) {
+        *dest = new char[1];
+        *dest[0] = '\0';
+        return true;
+    }
+    // since we will be using broadcast, we need to tell everyone how big the
+    // string is going to be with an additional bcast
+    
+    if (MPI_Bcast(&slen, 1, MPI_INT, out, comm) != MPI_SUCCESS)
+        return false;
+
+    // now broadcast that string to everyone
+    if (rank==out) {
+        // I could const_cast src but instead I'll make a copy
+        *dest = new char[slen+1];
+        strcpy(*dest, src);
+
+        // this guy should just send the string
+        if (MPI_Bcast(*dest, slen+1, MPI_CHAR, out, comm) != MPI_SUCCESS)
+            return false;
+
+        return true;
+    } else {
+        *dest = new char[slen+1];
+        if (MPI_Bcast(*dest, slen+1, MPI_CHAR, out, comm)!=MPI_SUCCESS)
+            return false;
+
+        return true;
+    }
+#else
+    *dest = new char[strlen(src)+1];
+    strcpy(*dest, src);
+    return true;
+#endif
+}
+
+namespace 
+{
+    // true if a split world call is currently running and MPI_COMM_WORLD
+    // should not be allowed by default
+    bool nocommworldplease=false;
+}
+
+NoCOMM_WORLD::NoCOMM_WORLD()
+{
+    if (nocommworldplease)
+        throw EsysException("NoCOMM_WORLD does not nest.");
+
+    nocommworldplease=true;
+}
+
+NoCOMM_WORLD::~NoCOMM_WORLD()
+{
+    nocommworldplease=false;
+}  
+
+bool NoCOMM_WORLD::active()
+{
+    return nocommworldplease;
+}
+
+} // namespace escript
+
diff --git a/escriptcore/src/EsysMPI.h b/escriptcore/src/EsysMPI.h
new file mode 100644
index 0000000..8c8a270
--- /dev/null
+++ b/escriptcore/src/EsysMPI.h
@@ -0,0 +1,202 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_ESYSMPI_H__
+#define __ESCRIPT_ESYSMPI_H__
+
+#include <escript/DataTypes.h>
+
+#include <ctime>
+#include <sstream>
+
+#include <boost/shared_ptr.hpp>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#ifdef ESYS_MPI
+#include <mpi.h>
+
+#ifdef ESYS_INDEXTYPE_LONG
+#define MPI_DIM_T MPI_LONG
+#else
+#define MPI_DIM_T MPI_INT
+#endif
+
+#else
+   typedef int MPI_Comm;
+   typedef int MPI_Request;
+   typedef int MPI_Op;
+   typedef int MPI_Status;
+   #define MPI_INT 6
+   #define MPI_DOUBLE 11
+   #define MPI_COMM_WORLD 91
+   #define MPI_COMM_NULL 0
+
+   // MPI_Op replacements for non-MPI - these values are arbitrary
+   #define MPI_SUM 100
+   #define MPI_MIN 101
+   #define MPI_MAX 102
+
+   #define MPI_OP_NULL 17
+   // end MPI_op
+
+#endif // ESYS_MPI
+
+namespace escript {
+
+/** \brief tag reserved for use by SubWorld code
+    This value should be higher than the modulus used in JMPI_::setCounter.
+    Apart from that, its value is not particularly significant.
+*/
+inline int getSubWorldTag()
+{
+    return (('S'<< 24) + ('u' << 16) + ('b' << 8) + 'W')%1010201;
+}
+
+class JMPI_;
+
+typedef boost::shared_ptr<JMPI_> JMPI;
+
+/// creates a JMPI shared pointer from MPI communicator
+/// if owncom is true, the communicator is freed when mpi info is destroyed.
+JMPI makeInfo(MPI_Comm comm, bool owncom=false);
+
+class JMPI_
+{
+public:
+    ~JMPI_();
+
+    ///
+    DataTypes::dim_t setDistribution(DataTypes::index_t min_id,
+                                     DataTypes::index_t max_id,
+                                     DataTypes::index_t* distribution);
+
+    ///
+    void split(DataTypes::dim_t N, DataTypes::dim_t* local_N,
+               DataTypes::index_t* offset);
+
+    /// N = #CPUs, k is a CPU number but out of range or even negative.
+    /// Return a CPU number in 0...N-1.
+    inline int mod_rank(int k) const
+    {
+        int out=0;
+#ifdef ESYS_MPI
+        if (size > 1) {
+            const int q = k/size;
+            if (k > 0) {
+               out=k-size*q;
+            } else if (k < 0) {
+               out=k-size*(q-1);
+            }
+        }
+#endif
+        return out;
+    }
+
+    /// appends MPI rank to a file name if MPI size > 1
+    inline std::string appendRankToFileName(const std::string& fileName) const
+    {
+#ifdef ESYS_MPI
+        if (size > 1) {
+            std::stringstream ss;
+            ss << fileName << '.';
+            ss.fill('0');
+            ss.width(4);
+            ss << rank;
+            return ss.str();
+        }
+#endif
+        return fileName;
+    }
+
+    /// returns the current value of the message tag counter
+    inline int counter() const
+    {
+        return msg_tag_counter;
+    }
+
+    /// increments the message tag counter by `i`
+    inline void incCounter(int i=1)
+    {
+        msg_tag_counter+=i;
+        // there is no particular significance here other than being 7 digits
+        // and prime (because why not). It just needs to be big.
+        msg_tag_counter %= 1010201;
+    }
+
+    /// sets the message tag counter to `value`
+    inline void setCounter(int value)
+    {
+        msg_tag_counter = value%1010201;
+    }
+
+    /// returns true if this has a valid MPI communicator
+    inline bool isValid() const
+    {
+        return comm!=MPI_COMM_NULL;
+    }
+
+    int size;
+    int rank;
+    MPI_Comm comm;
+
+private:
+    JMPI_(MPI_Comm comm, bool owncomm);
+    friend JMPI makeInfo(MPI_Comm comm, bool owncom);
+
+    bool ownscomm;
+    int msg_tag_counter;
+};
+
+// Does not cope with nested calls
+class NoCOMM_WORLD
+{
+public:
+    NoCOMM_WORLD();
+    ~NoCOMM_WORLD();
+    static bool active();
+};
+
+/// Everyone puts in their error code and everyone gets the largest one
+bool checkResult(int input, int& output, const JMPI& comm);
+
+/// ensure that the any ranks with an empty src argument end up with the
+/// string from one of the other ranks.
+/// With no MPI, it makes dest point at a copy of src.
+bool shipString(const char* src, char** dest, MPI_Comm& comm);
+
+/// returns the current ticks for timing
+inline double gettime()
+{
+    double out;
+#ifdef ESYS_MPI
+    out = MPI_Wtime();
+#else
+#ifdef _OPENMP 
+    out=omp_get_wtime();
+#else
+    out=((double) clock())/CLOCKS_PER_SEC;
+#endif
+#endif
+    return out;
+}
+
+} // namespace escript
+
+#endif // __ESCRIPT_ESYSMPI_H__
+
diff --git a/esysUtils/src/esysExceptionTranslator.cpp b/escriptcore/src/ExceptionTranslators.cpp
similarity index 50%
rename from esysUtils/src/esysExceptionTranslator.cpp
rename to escriptcore/src/ExceptionTranslators.cpp
index 31a9a1f..2f34db1 100644
--- a/esysUtils/src/esysExceptionTranslator.cpp
+++ b/escriptcore/src/ExceptionTranslators.cpp
@@ -14,24 +14,34 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "first.h"
-#include "system_dep.h"
-#include "esysExceptionTranslator.h" 
-#include <iostream>
+#include "ExceptionTranslators.h" 
 
-using namespace std;
+namespace escript {
 
-namespace esysUtils {
+void AssertionErrorTranslator(const EsysException& e) 
+{
+    PyErr_SetString(PyExc_AssertionError, e.what());
+}
 
-void RuntimeErrorTranslator(EsysException const& e) 
-  {
-    PyErr_SetString(PyExc_RuntimeError,e.what());
-  }
+void IOErrorTranslator(const EsysException& e) 
+{
+    PyErr_SetString(PyExc_IOError, e.what());
+}
 
-void ValueErrorTranslator(EsysException const& e) 
+void NotImplementedErrorTranslator(const EsysException& e) 
 {
-    PyErr_SetString(PyExc_ValueError,e.what());
+    PyErr_SetString(PyExc_NotImplementedError, e.what());
+}
+
+void RuntimeErrorTranslator(const EsysException& e) 
+{
+    PyErr_SetString(PyExc_RuntimeError, e.what());
+}
+
+void ValueErrorTranslator(const EsysException& e) 
+{
+    PyErr_SetString(PyExc_ValueError, e.what());
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/ExceptionTranslators.h b/escriptcore/src/ExceptionTranslators.h
new file mode 100644
index 0000000..d3707d3
--- /dev/null
+++ b/escriptcore/src/ExceptionTranslators.h
@@ -0,0 +1,66 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESCRIPT_EXCEPTIONTRANSLATORS_H__
+#define __ESCRIPT_EXCEPTIONTRANSLATORS_H__
+
+#include "DataTypes.h"
+#include "EsysException.h"
+
+// put this within all boost module definitions so exceptions are translated
+// properly
+#define REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS \
+    register_exception_translator<escript::AssertException>(&escript::AssertionErrorTranslator);\
+    register_exception_translator<escript::IOError>(&escript::IOErrorTranslator);\
+    register_exception_translator<escript::NotImplementedError>(&escript::NotImplementedErrorTranslator);\
+    register_exception_translator<escript::ValueError>(&escript::ValueErrorTranslator)
+
+namespace escript {
+
+  /**
+     \brief
+     Function which translates an EsysException into a python AssertionError
+  */
+  void AssertionErrorTranslator(const EsysException& e);
+
+  /**
+     \brief
+     Function which translates an EsysException into a python IOError
+  */
+  void IOErrorTranslator(const EsysException& e);
+
+  /**
+     \brief
+     Function which translates an EsysException into a python NotImplementedError
+  */
+  void NotImplementedErrorTranslator(const EsysException& e);
+
+  /**
+     \brief
+     Function which translates an EsysException into a python RuntimeError
+  */
+  void RuntimeErrorTranslator(const EsysException& e);
+
+  /**
+     \brief
+     Function which translates an EsysException into a python ValueError
+  */
+  void ValueErrorTranslator(const EsysException& e);
+
+} // end of namespace
+
+#endif // __ESCRIPT_EXCEPTIONTRANSLATORS_H__
+
diff --git a/esysUtils/src/esysFileWriter.h b/escriptcore/src/FileWriter.h
similarity index 94%
rename from esysUtils/src/esysFileWriter.h
rename to escriptcore/src/FileWriter.h
index 24dea56..84483ab 100644
--- a/esysUtils/src/esysFileWriter.h
+++ b/escriptcore/src/FileWriter.h
@@ -14,16 +14,16 @@
 *
 *****************************************************************************/
 
-#ifndef __ESYS_FILEWRITER_H__
-#define __ESYS_FILEWRITER_H__
+#ifndef __ESCRIPT_FILEWRITER_H__
+#define __ESCRIPT_FILEWRITER_H__
+
+#include <escript/EsysMPI.h>
 
 #include <fstream>
 #include <iostream>
 #include <sstream>
 
-#include <esysUtils/Esys_MPI.h>
-
-namespace esysUtils {
+namespace escript {
 
 class FileWriter
 {
@@ -36,10 +36,6 @@ public:
             MPI_Comm_rank(mpiComm, &mpiRank);
             MPI_Comm_size(mpiComm, &mpiSize);
         }
-#else
-        (void)mpiRank;	// to prevent the compiler from complaining
-        (void)mpiSize;  // that these are unused
-        (void)mpiComm;
 #endif
     }
 
@@ -103,7 +99,7 @@ public:
                 char errorstr[MPI_MAX_ERROR_STRING];
                 int len;
                 MPI_Error_string(mpiErr, errorstr, &len);
-                std::cerr << "Error " << " opening " << filename
+                std::cerr << "Error opening " << filename
                           << " for parallel writing: " << errorstr << std::endl;
             } else {
                 success=true;
@@ -226,7 +222,7 @@ private:
 };
 
 
-} // namespace esysUtils
+} // namespace escript
 
-#endif //  __ESYS_FILEWRITER_H__
+#endif // __ESCRIPT_FILEWRITER_H__
 
diff --git a/escriptcore/src/FunctionSpace.cpp b/escriptcore/src/FunctionSpace.cpp
index 27a2055..db39e6f 100644
--- a/escriptcore/src/FunctionSpace.cpp
+++ b/escriptcore/src/FunctionSpace.cpp
@@ -14,14 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "FunctionSpace.h" 
-#include "FunctionSpaceException.h"
+
 #include "Data.h" 
 #include "DataFactory.h" 
+#include "FunctionSpaceException.h"
 #include "NullDomain.h"
 
 #include <iostream>
@@ -39,9 +36,7 @@ namespace escript {
   }
 
 
-
-namespace
-{
+namespace {
 //
 // Create a null domain for use with any default-constructed function space
 // NullDomain const FunctionSpace::nullDomainValue;
@@ -89,7 +84,7 @@ m_functionSpaceType(other.m_functionSpaceType)
 {
 }
 
-std::pair<int,dim_t>
+std::pair<int,DataTypes::dim_t>
 FunctionSpace::getDataShape() const
 {
   return m_domain->getDataShape(m_functionSpaceType);
@@ -145,13 +140,13 @@ FunctionSpace::toPyString() const
 
 
 int
-FunctionSpace::getTagFromSampleNo(dim_t sampleNo) const
+FunctionSpace::getTagFromSampleNo(DataTypes::dim_t sampleNo) const
 {
   return m_domain->getTagFromSampleNo(m_functionSpaceType,sampleNo);
 }
 
 int
-FunctionSpace::getTagFromDataPointNo(dim_t dataPointNo) const
+FunctionSpace::getTagFromDataPointNo(DataTypes::dim_t dataPointNo) const
 {
   //
   // Get the number of samples and data-points per sample
@@ -180,26 +175,26 @@ FunctionSpace::getTagFromDataPointNo(dim_t dataPointNo) const
   return(tagNo);
 }
 
-dim_t FunctionSpace::getReferenceIDFromDataPointNo(dim_t dataPointNo) const
+DataTypes::dim_t FunctionSpace::getReferenceIDFromDataPointNo(DataTypes::dim_t dataPointNo) const
 {
      //
      // Get the number of samples and data-points per sample
-     dim_t numSamples = getNumSamples();
-     int numDataPointsPerSample = getNumDPPSample();
-     const dim_t* referenceIDs= borrowSampleReferenceIDs();
-     dim_t numDataPoints = numSamples * numDataPointsPerSample;
+    DataTypes::dim_t numSamples = getNumSamples();
+    int numDataPointsPerSample = getNumDPPSample();
+    const DataTypes::dim_t* referenceIDs= borrowSampleReferenceIDs();
+    DataTypes::dim_t numDataPoints = numSamples * numDataPointsPerSample;
 
-     if (numDataPointsPerSample==0) {
+    if (numDataPointsPerSample==0) {
         throw DataException("FunctionSpace::getReferenceIDFromDataPointNo error: no data-points associated with this object.");
-     }
-     if (dataPointNo<0 || dataPointNo>numDataPoints) {
+    }
+    if (dataPointNo<0 || dataPointNo>numDataPoints) {
         throw DataException("FunctionSpace::getReferenceIDFromDataPointNo error: invalid data-point number supplied.");
-     }
-     dim_t sampleNo = dataPointNo / numDataPointsPerSample;
-     return referenceIDs[sampleNo];
+    }
+    DataTypes::dim_t sampleNo = dataPointNo / numDataPointsPerSample;
+    return referenceIDs[sampleNo];
 }
 
-const dim_t*
+const DataTypes::dim_t*
 FunctionSpace::borrowSampleReferenceIDs() const
 {
   return m_domain->borrowSampleReferenceIDs(m_functionSpaceType);
@@ -318,3 +313,4 @@ FunctionSpace::getApproximationOrder() const
 }
 
 }  // end of namespace
+
diff --git a/escriptcore/src/FunctionSpace.h b/escriptcore/src/FunctionSpace.h
index d101d76..ac56987 100644
--- a/escriptcore/src/FunctionSpace.h
+++ b/escriptcore/src/FunctionSpace.h
@@ -81,7 +81,7 @@ public:
       \brief Returns the shape of the data needed to represent the function
              space.
     */
-    std::pair<int,dim_t> getDataShape() const;
+    std::pair<int,DataTypes::dim_t> getDataShape() const;
 
     /**
       \brief Comparison operator.  Returns true if function spaces are equal.
@@ -99,18 +99,18 @@ public:
     /**
       \brief Returns the tag associated with the given sample number.
     */
-    int getTagFromSampleNo(dim_t sampleNo) const;
+    int getTagFromSampleNo(DataTypes::dim_t sampleNo) const;
 
     /**
       \brief Returns the tag associated with the given data-point number.
     */
-    int getTagFromDataPointNo(dim_t dataPointNo) const;
+    int getTagFromDataPointNo(DataTypes::dim_t dataPointNo) const;
 
     /**
       \brief Returns the reference number associated with the given data-point
              number.
     */
-    dim_t getReferenceIDFromDataPointNo(dim_t dataPointNo) const;
+    DataTypes::dim_t getReferenceIDFromDataPointNo(DataTypes::dim_t dataPointNo) const;
 
     /**
       \brief Returns the reference number associated with the given sample
@@ -119,7 +119,7 @@ public:
              to use sampleNo as an offset.
     */
     inline
-    dim_t getReferenceIDOfSample(dim_t sampleNo) const {
+    DataTypes::dim_t getReferenceIDOfSample(DataTypes::dim_t sampleNo) const {
         return borrowSampleReferenceIDs()[sampleNo];
     }
 
@@ -128,14 +128,14 @@ public:
              return true
     */
     inline
-    bool ownSample(dim_t sampleNo) const {
+    bool ownSample(DataTypes::dim_t sampleNo) const {
         return m_domain->ownSample(m_functionSpaceType, sampleNo);
     }
 
     /**
       \brief Returns a borrowed reference to the list of sample reference IDs
     */
-    const dim_t* borrowSampleReferenceIDs() const;
+    const DataTypes::dim_t* borrowSampleReferenceIDs() const;
 
     /**
       \brief Returns the spatial locations of the data points.
@@ -157,7 +157,7 @@ public:
       \brief Returns the number of samples.
     */
     inline
-    dim_t getNumSamples() const { return getDataShape().second; }
+    DataTypes::dim_t getNumSamples() const { return getDataShape().second; }
 
     /**
       \brief Returns the number of data points per sample.
diff --git a/escriptcore/src/FunctionSpaceException.cpp b/escriptcore/src/FunctionSpaceException.cpp
deleted file mode 100644
index 820aba0..0000000
--- a/escriptcore/src/FunctionSpaceException.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "FunctionSpaceException.h"
-
-
-using namespace escript;
-
-
-const std::string 
-FunctionSpaceException::exceptionNameValue("FunctionSpaceException");
-
-
-const std::string &
-FunctionSpaceException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/escriptcore/src/FunctionSpaceException.h b/escriptcore/src/FunctionSpaceException.h
index fa94c25..23a9e98 100644
--- a/escriptcore/src/FunctionSpaceException.h
+++ b/escriptcore/src/FunctionSpaceException.h
@@ -19,84 +19,19 @@
 #define escript_FunctionSpaceException_20040602_H
 #include "system_dep.h"
 
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
 namespace escript
 {
 
-  /**
-  \brief
-  FunctionSpaceException exception class.
-
-  Description:
-  FunctionSpaceException exception class.
-  The class provides a public function returning the exception name
-  */
-  class FunctionSpaceException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    FunctionSpaceException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    FunctionSpaceException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    FunctionSpaceException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    FunctionSpaceException(const FunctionSpaceException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESCRIPT_DLL_API
-    inline FunctionSpaceException &
-    operator=(const FunctionSpaceException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-
-    /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~FunctionSpaceException() THROW(NO_ARG) {}
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
+class FunctionSpaceException : public EsysException
+{
+public:
+    FunctionSpaceException(const std::string& str) : EsysException(str) {}
+    virtual ~FunctionSpaceException() throw() {}
+};
 
 } // end of namespace
+
 #endif
+
diff --git a/escriptcore/src/FunctionSpaceFactory.cpp b/escriptcore/src/FunctionSpaceFactory.cpp
index 4d25618..d390a2e 100644
--- a/escriptcore/src/FunctionSpaceFactory.cpp
+++ b/escriptcore/src/FunctionSpaceFactory.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "FunctionSpaceFactory.h"
 #include "AbstractContinuousDomain.h"
 #include "FunctionSpaceException.h"
diff --git a/escriptcore/src/FunctionSpaceFactory.h b/escriptcore/src/FunctionSpaceFactory.h
index e876b21..ea3f8ea 100644
--- a/escriptcore/src/FunctionSpaceFactory.h
+++ b/escriptcore/src/FunctionSpaceFactory.h
@@ -14,11 +14,10 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_FUNCTIONSPACEFACTORY_H__
+#define __ESCRIPT_FUNCTIONSPACEFACTORY_H__
 
-#if !defined  escript_FunctionSpaceFactory_20040604_H
-#define escript_FunctionSpaceFactory_20040604_H
 #include "system_dep.h"
-
 #include "AbstractDomain.h"
 #include "FunctionSpace.h"
 
@@ -102,4 +101,6 @@ namespace escript {
   ESCRIPT_DLL_API FunctionSpace diracDeltaFunctions(const AbstractDomain& domain);
 
 } // end of namespace
-#endif
+
+#endif // __ESCRIPT_FUNCTIONSPACEFACTORY_H__
+
diff --git a/esysUtils/src/IndexList.h b/escriptcore/src/IndexList.h
similarity index 64%
rename from esysUtils/src/IndexList.h
rename to escriptcore/src/IndexList.h
index 449bffb..ea2d4e2 100644
--- a/esysUtils/src/IndexList.h
+++ b/escriptcore/src/IndexList.h
@@ -14,40 +14,29 @@
 *
 *****************************************************************************/
 
+#ifndef __ESCRIPT_INDEXLIST_H__
+#define __ESCRIPT_INDEXLIST_H__
 
-/****************************************************************************/
-
-/*   esysUtils: IndexList                                                   */
-
-/****************************************************************************/
-
-/*   Author: Lutz Gross, l.gross at uq.edu.au */
-
-/****************************************************************************/
-
-#ifndef __ESYSUTILS_INDEXLIST_H__
-#define __ESYSUTILS_INDEXLIST_H__
-
-#include "types.h"
+#include <escript/DataTypes.h>
 
 // pre-reserving saves time under OpenMP. The 85 is a value taken over
 // from revision ~101 by jgs.
 #define ESYS_INDEXLIST_LENGTH 85
 
-namespace esysUtils {
+namespace escript {
 
 struct IndexList {
     IndexList() : n(0), extension(NULL) {}
     ~IndexList() { delete extension; }
 
-    index_t m_list[ESYS_INDEXLIST_LENGTH];
-    dim_t n;
+    DataTypes::index_t m_list[ESYS_INDEXLIST_LENGTH];
+    DataTypes::dim_t n;
     IndexList* extension;
 
     /// inserts row index into the IndexList in if it does not exist
-    inline void insertIndex(index_t index)
+    inline void insertIndex(DataTypes::index_t index)
     {
-        for (dim_t i=0; i<n; i++) {
+        for (DataTypes::dim_t i=0; i<n; i++) {
             if (m_list[i] == index)
                 return;
         }
@@ -61,10 +50,11 @@ struct IndexList {
     }
 
     /// counts the number of row indices in the IndexList in
-    inline dim_t count(index_t range_min, index_t range_max) const
+    inline DataTypes::dim_t count(DataTypes::index_t range_min,
+                                  DataTypes::index_t range_max) const
     {
-        dim_t out=0;
-        for (dim_t i=0; i < n; i++) {
+        DataTypes::dim_t out=0;
+        for (DataTypes::dim_t i=0; i < n; i++) {
             if (m_list[i] >= range_min && range_max > m_list[i])
                 ++out;
         }
@@ -74,11 +64,12 @@ struct IndexList {
     }
 
     /// index list to array
-    inline void toArray(index_t* array, index_t range_min, index_t range_max,
-                        index_t index_offset) const
+    inline void toArray(DataTypes::index_t* array,
+                    DataTypes::index_t range_min, DataTypes::index_t range_max,
+                    DataTypes::index_t index_offset) const
     {
-        index_t idx = 0;
-        for (dim_t i=0; i < n; i++) {
+        DataTypes::index_t idx = 0;
+        for (DataTypes::dim_t i=0; i < n; i++) {
             if (m_list[i] >= range_min && range_max > m_list[i]) {
                 array[idx] = m_list[i]+index_offset;
                 ++idx;
@@ -89,7 +80,7 @@ struct IndexList {
     }
 };
 
-} // namespace esysUtils
+} // namespace escript
 
-#endif // __ESYSUTILS_INDEXLIST_H__
+#endif // __ESCRIPT_INDEXLIST_H__
 
diff --git a/escriptcore/src/LapackInverseHelper.cpp b/escriptcore/src/LapackInverseHelper.cpp
index 8915da4..8538431 100644
--- a/escriptcore/src/LapackInverseHelper.cpp
+++ b/escriptcore/src/LapackInverseHelper.cpp
@@ -16,9 +16,9 @@
 
 #include "LapackInverseHelper.h"
 
-#ifdef USE_LAPACK
+#ifdef ESYS_HAVE_LAPACK
 
-#ifdef MKL_LAPACK
+#ifdef ESYS_MKL_LAPACK
 #include <mkl_lapack.h>
 #else	// assuming clapack
 extern "C"
@@ -43,11 +43,11 @@ LapackInverseHelper::LapackInverseHelper(int N)
 	work=0;
 	lwork=0;
 	this->N=N;
-#ifdef USE_LAPACK
+#ifdef ESYS_HAVE_LAPACK
 	piv=new int[N];
 	int blocksize=64;	// this is arbitrary. For implementations that require work array 
 				// maybe we should look into the Lapack ILAENV function
-#ifdef MKL_LAPACK
+#ifdef ESYS_MKL_LAPACK
 	int minus1=-1;
 	double dummyd=0;
 	int result=0;
@@ -80,10 +80,10 @@ LapackInverseHelper::~LapackInverseHelper()
 int 
 LapackInverseHelper::invert(double* matrix)
 {
-#ifndef USE_LAPACK
+#ifndef ESYS_HAVE_LAPACK
 	return NEEDLAPACK;
 #else
-#ifdef MKL_LAPACK
+#ifdef ESYS_MKL_LAPACK
 	int res=0;
 	int size=N;
 	dgetrf(&N,&N,matrix,&N,piv,&res);
diff --git a/escriptcore/src/LocalOps.h b/escriptcore/src/LocalOps.h
deleted file mode 100644
index 1d9400d..0000000
--- a/escriptcore/src/LocalOps.h
+++ /dev/null
@@ -1,563 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_LocalOps_H
-#define escript_LocalOps_H
-#include <cmath>
-#ifndef M_PI
-#   define M_PI           3.14159265358979323846  /* pi */
-#endif
-
-
-/**
-\file LocalOps.h 
-\brief Describes binary operations performed on double*.
-
-For operations on DataAbstract see BinaryOp.h.
-For operations on DataVector see DataMaths.h.
-*/
-
-namespace escript {
-
-/**
-\brief acts as a wrapper to isnan.
-\warning if compiler does not support FP_NAN this function will always return false.
-*/
-inline
-bool nancheck(double d)
-{
-    using namespace std;
-		// Q: so why not just test d!=d?
-		// A: Coz it doesn't always work [I've checked].
-		// One theory is that the optimizer skips the test.
-#if defined _isnan
-    return _isnan(d);
-#else
-    return isnan(d);	// isNan should be a function in C++ land
-#endif
-}
-
-/**
-\brief returns a NaN.
-\warning Should probably only used where you know you can test for NaNs
-*/
-inline
-double makeNaN()
-{
-#ifdef nan
-    return nan("");
-#else
-    return sqrt(-1.);
-#endif
-
-}
-
-
-/**
-   \brief
-   solves a 1x1 eigenvalue A*V=ev*V problem
-
-   \param A00 Input - A_00
-   \param ev0 Output - eigenvalue
-*/
-inline
-void eigenvalues1(const double A00,double* ev0) {
-
-   *ev0=A00;
-
-}
-/**
-   \brief
-   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
-
-   \param A00 Input - A_00
-   \param A01 Input - A_01
-   \param A11 Input - A_11
-   \param ev0 Output - smallest eigenvalue
-   \param ev1 Output - largest eigenvalue
-*/
-inline
-void eigenvalues2(const double A00,const double A01,const double A11,
-                 double* ev0, double* ev1) {
-      const register double trA=(A00+A11)/2.;
-      const register double A_00=A00-trA;
-      const register double A_11=A11-trA;
-      const register double s=sqrt(A01*A01-A_00*A_11);
-      *ev0=trA-s;
-      *ev1=trA+s;
-}
-/**
-   \brief
-   solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
-
-   \param A00 Input - A_00
-   \param A01 Input - A_01
-   \param A02 Input - A_02
-   \param A11 Input - A_11
-   \param A12 Input - A_12
-   \param A22 Input - A_22
-   \param ev0 Output - smallest eigenvalue
-   \param ev1 Output - eigenvalue
-   \param ev2 Output - largest eigenvalue
-*/
-inline
-void eigenvalues3(const double A00, const double A01, const double A02,
-                                   const double A11, const double A12,
-                                                     const double A22,
-                 double* ev0, double* ev1,double* ev2) {
-
-      const register double trA=(A00+A11+A22)/3.;
-      const register double A_00=A00-trA;
-      const register double A_11=A11-trA;
-      const register double A_22=A22-trA;
-      const register double A01_2=A01*A01;
-      const register double A02_2=A02*A02;
-      const register double A12_2=A12*A12;
-      const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
-      if (p<=0.) {
-         *ev2=trA;
-         *ev1=trA;
-         *ev0=trA;
-
-      } else {
-         const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
-         const register double sq_p=sqrt(p/3.);
-         register double z=-q/(2*pow(sq_p,3));
-         if (z<-1.) {
-            z=-1.;
-         } else if (z>1.) {
-            z=1.;
-         }
-         const register double alpha_3=acos(z)/3.;
-         *ev2=trA+2.*sq_p*cos(alpha_3);
-         *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
-         *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
-      }
-}
-/**
-   \brief
-   solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
-
-   \param A00 Input - A_00
-   \param ev0 Output - eigenvalue
-   \param V00 Output - eigenvector
-   \param tol Input - tolerance to identify to eigenvalues
-*/
-inline
-void  eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
-{
-      eigenvalues1(A00,ev0);
-      *V00=1.;
-      return;
-}
-/**
-   \brief
-   returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
-
-   \param A00 Input - matrix component
-   \param A10 Input - matrix component
-   \param A01 Input - matrix component
-   \param A11 Input - matrix component
-   \param V0 Output - vector component
-   \param V1 Output - vector component
-*/
-inline
-void  vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
-                      double* V0, double*V1)
-{
-      register double absA00=fabs(A00);
-      register double absA10=fabs(A10);
-      register double absA01=fabs(A01);
-      register double absA11=fabs(A11);
-      register double m=absA11>absA10 ? absA11 : absA10;
-      if (absA00>m || absA01>m) {
-         *V0=-A01;
-         *V1=A00;
-      } else {
-         if (m<=0) {
-           *V0=1.;
-           *V1=0.;
-         } else {
-           *V0=A11;
-           *V1=-A10;
-         }
-     }
-}
-/**
-   \brief
-   returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
-   assuming that the kernel dimension is at least 1 and A00 is non zero.
-
-   \param A00 Input - matrix component
-   \param A10 Input - matrix component
-   \param A20 Input - matrix component
-   \param A01 Input - matrix component
-   \param A11 Input - matrix component
-   \param A21 Input - matrix component
-   \param A02 Input - matrix component
-   \param A12 Input - matrix component
-   \param A22 Input - matrix component
-   \param V0 Output - vector component
-   \param V1 Output - vector component
-   \param V2 Output - vector component
-*/
-inline
-void  vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
-                                const double A01,const double A11,const double A21,
-                                const double A02,const double A12,const double A22,
-                                double* V0,double* V1,double* V2)
-{
-    double TEMP0,TEMP1;
-    register const double I00=1./A00;
-    register const double IA10=I00*A10;
-    register const double IA20=I00*A20;
-    vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
-                    A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
-    *V0=-(A10*TEMP0+A20*TEMP1);
-    *V1=A00*TEMP0;
-    *V2=A00*TEMP1;
-}
-
-/**
-   \brief
-   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
-   ordered by increasing value and eigen vectors are normalizeVector3d such that
-   length is zero and first non-zero component is positive.
-
-   \param A00 Input - A_00
-   \param A01 Input - A_01
-   \param A11 Input - A_11
-   \param ev0 Output - smallest eigenvalue
-   \param ev1 Output - eigenvalue
-   \param V00 Output - eigenvector componenent coresponding to ev0
-   \param V10 Output - eigenvector componenent coresponding to ev0
-   \param V01 Output - eigenvector componenent coresponding to ev1
-   \param V11 Output - eigenvector componenent coresponding to ev1
-   \param tol Input - tolerance to identify to eigenvalues
-*/
-inline
-void  eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
-                                    double* ev0, double* ev1,
-                                    double* V00, double* V10, double* V01, double* V11,
-                                    const double tol)
-{
-     double TEMP0,TEMP1;
-     eigenvalues2(A00,A01,A11,ev0,ev1);
-     const register double absev0=fabs(*ev0);
-     const register double absev1=fabs(*ev1);
-     register double max_ev=absev0>absev1 ? absev0 : absev1;
-     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
-        *V00=1.;
-        *V10=0.;
-        *V01=0.;
-        *V11=1.;
-     } else {
-        vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
-        const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
-        if (TEMP0<0.) {
-            *V00=-TEMP0*scale;
-            *V10=-TEMP1*scale;
-            if (TEMP1<0.) {
-               *V01=  *V10;
-               *V11=-(*V00);
-            } else {
-               *V01=-(*V10);
-               *V11= (*V00);
-            }
-        } else if (TEMP0>0.) {
-            *V00=TEMP0*scale;
-            *V10=TEMP1*scale;
-            if (TEMP1<0.) {
-               *V01=-(*V10);
-               *V11= (*V00);
-            } else {
-               *V01= (*V10);
-               *V11=-(*V00);
-            }
-        } else {
-           *V00=0.;
-           *V10=1;
-           *V11=0.;
-           *V01=1.;
-       }
-   }
-}
-/**
-   \brief
-   nomalizes a 3-d vector such that length is one and first non-zero component is positive.
-
-   \param V0 - vector componenent
-   \param V1 - vector componenent
-   \param V2 - vector componenent
-*/
-inline
-void  normalizeVector3(double* V0,double* V1,double* V2)
-{
-    register double s;
-    if (*V0>0) {
-        s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
-        *V0*=s;
-        *V1*=s;
-        *V2*=s;
-    } else if (*V0<0)  {
-        s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
-        *V0*=s;
-        *V1*=s;
-        *V2*=s;
-    } else {
-        if (*V1>0) {
-            s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
-            *V1*=s;
-            *V2*=s;
-        } else if (*V1<0)  {
-            s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
-            *V1*=s;
-            *V2*=s;
-        } else {
-            *V2=1.;
-        }
-    }
-}
-/**
-   \brief
-   solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
-   ordered by increasing value and eigen vectors are normalizeVector3d such that
-   length is zero and first non-zero component is positive.
-
-   \param A00 Input - A_00
-   \param A01 Input - A_01
-   \param A02 Input - A_02
-   \param A11 Input - A_11
-   \param A12 Input - A_12
-   \param A22 Input - A_22
-   \param ev0 Output - smallest eigenvalue
-   \param ev1 Output - eigenvalue
-   \param ev2 Output -
-   \param V00 Output - eigenvector componenent coresponding to ev0
-   \param V10 Output - eigenvector componenent coresponding to ev0
-   \param V20 Output -
-   \param V01 Output - eigenvector componenent coresponding to ev1
-   \param V11 Output - eigenvector componenent coresponding to ev1
-   \param V21 Output -
-   \param V02 Output -
-   \param V12 Output -
-   \param V22 Output -
-   \param tol Input - tolerance to identify to eigenvalues
-*/
-inline
-void  eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
-                                    const double A11, const double A12, const double A22,
-                                    double* ev0, double* ev1, double* ev2,
-                                    double* V00, double* V10, double* V20,
-                                    double* V01, double* V11, double* V21,
-                                    double* V02, double* V12, double* V22,
-                                    const double tol)
-{
-      register const double absA01=fabs(A01);
-      register const double absA02=fabs(A02);
-      register const double m=absA01>absA02 ? absA01 : absA02;
-      if (m<=0) {
-        double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
-        eigenvalues_and_eigenvectors2(A11,A12,A22,
-                                      &TEMP_EV0,&TEMP_EV1,
-                                      &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
-        if (A00<=TEMP_EV0) {
-            *V00=1.;
-            *V10=0.;
-            *V20=0.;
-            *V01=0.;
-            *V11=TEMP_V00;
-            *V21=TEMP_V10;
-            *V02=0.;
-            *V12=TEMP_V01;
-            *V22=TEMP_V11;
-            *ev0=A00;
-            *ev1=TEMP_EV0;
-            *ev2=TEMP_EV1;
-        } else if (A00>TEMP_EV1) {
-            *V02=1.;
-            *V12=0.;
-            *V22=0.;
-            *V00=0.;
-            *V10=TEMP_V00;
-            *V20=TEMP_V10;
-            *V01=0.;
-            *V11=TEMP_V01;
-            *V21=TEMP_V11;
-            *ev0=TEMP_EV0;
-            *ev1=TEMP_EV1;
-            *ev2=A00;
-        } else {
-            *V01=1.;
-            *V11=0.;
-            *V21=0.;
-            *V00=0.;
-            *V10=TEMP_V00;
-            *V20=TEMP_V10;
-            *V02=0.;
-            *V12=TEMP_V01;
-            *V22=TEMP_V11;
-            *ev0=TEMP_EV0;
-            *ev1=A00;
-            *ev2=TEMP_EV1;
-        }
-      } else {
-         eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
-         const double absev0=fabs(*ev0);
-         const double absev1=fabs(*ev1);
-         const double absev2=fabs(*ev2);
-         double max_ev=absev0>absev1 ? absev0 : absev1;
-         max_ev=max_ev>absev2 ? max_ev : absev2;
-         const double d_01=fabs((*ev0)-(*ev1));
-         const double d_12=fabs((*ev1)-(*ev2));
-         const double max_d=d_01>d_12 ? d_01 : d_12;
-         if (max_d<=tol*max_ev) {
-             *V00=1.;
-             *V10=0;
-             *V20=0;
-             *V01=0;
-             *V11=1.;
-             *V21=0;
-             *V02=0;
-             *V12=0;
-             *V22=1.;
-         } else {
-            const double S00=A00-(*ev0);
-            const double absS00=fabs(S00);
-            if (absS00>m) {
-                vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
-            } else if (absA02<m) {
-                vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
-            } else {
-                vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
-            }
-            normalizeVector3(V00,V10,V20);;
-            const double T00=A00-(*ev2);
-            const double absT00=fabs(T00);
-            if (absT00>m) {
-                 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
-            } else if (absA02<m) {
-                 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
-            } else {
-                 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
-            }
-            const double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
-            *V02-=dot*(*V00);
-            *V12-=dot*(*V10);
-            *V22-=dot*(*V20);
-            normalizeVector3(V02,V12,V22);
-            *V01=(*V10)*(*V22)-(*V12)*(*V20);
-            *V11=(*V20)*(*V02)-(*V00)*(*V22);
-            *V21=(*V00)*(*V12)-(*V02)*(*V10);
-            normalizeVector3(V01,V11,V21);
-         }
-   }
-}
-
-// General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
-// SM is the product of the last axis_offset entries in arg_0.getShape().
-inline
-void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
-{
-  if (transpose == 0) {
-    for (int i=0; i<SL; i++) {
-      for (int j=0; j<SR; j++) {
-        double sum = 0.0;
-        for (int l=0; l<SM; l++) {
-	  sum += A[i+SL*l] * B[l+SM*j];
-        }
-        C[i+SL*j] = sum;
-      }
-    }
-  }
-  else if (transpose == 1) {
-    for (int i=0; i<SL; i++) {
-      for (int j=0; j<SR; j++) {
-        double sum = 0.0;
-        for (int l=0; l<SM; l++) {
-	  sum += A[i*SM+l] * B[l+SM*j];
-        }
-        C[i+SL*j] = sum;
-      }
-    }
-  }
-  else if (transpose == 2) {
-    for (int i=0; i<SL; i++) {
-      for (int j=0; j<SR; j++) {
-        double sum = 0.0;
-        for (int l=0; l<SM; l++) {
-	  sum += A[i+SL*l] * B[l*SR+j];
-        }
-        C[i+SL*j] = sum;
-      }
-    }
-  }
-}
-
-template <typename UnaryFunction>
-inline void tensor_unary_operation(const int size,
-			     const double *arg1,
-			     double * argRes,
-			     UnaryFunction operation)
-{
-  for (int i = 0; i < size; ++i) {
-    argRes[i] = operation(arg1[i]);
-  }
-  return;
-}
-
-template <typename BinaryFunction>
-inline void tensor_binary_operation(const int size,
-			     const double *arg1,
-			     const double *arg2,
-			     double * argRes,
-			     BinaryFunction operation)
-{
-  for (int i = 0; i < size; ++i) {
-    argRes[i] = operation(arg1[i], arg2[i]);
-  }
-  return;
-}
-
-template <typename BinaryFunction>
-inline void tensor_binary_operation(const int size,
-			     double arg1,
-			     const double *arg2,
-			     double *argRes,
-			     BinaryFunction operation)
-{
-  for (int i = 0; i < size; ++i) {
-    argRes[i] = operation(arg1, arg2[i]);
-  }
-  return;
-}
-
-template <typename BinaryFunction>
-inline void tensor_binary_operation(const int size,
-			     const double *arg1,
-			     double arg2,
-			     double *argRes,
-			     BinaryFunction operation)
-{
-  for (int i = 0; i < size; ++i) {
-    argRes[i] = operation(arg1[i], arg2);
-  }
-  return;
-}
-
-} // end of namespace
-#endif
diff --git a/escriptcore/src/MPIDataReducer.cpp b/escriptcore/src/MPIDataReducer.cpp
index 73f8f5b..f3ed662 100644
--- a/escriptcore/src/MPIDataReducer.cpp
+++ b/escriptcore/src/MPIDataReducer.cpp
@@ -13,73 +13,66 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include "MPIDataReducer.h"
+#include "SplitWorldException.h"
 
-#include <sstream>
 #include <limits>
+#include <sstream>
 #include <boost/python/extract.hpp>
 #include <boost/scoped_array.hpp>
 
-#include "MPIDataReducer.h"
-#include "SplitWorldException.h"
-
 using namespace boost::python;
 using namespace escript;
 
+namespace {
 
-namespace escript
+void combineData(Data& d1, const Data& d2, MPI_Op op)
 {
+    if (op==MPI_SUM)
+    {
+        d1+=d2;
+    } 
+    else if (op==MPI_OP_NULL) 
+    {
+        throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
+    }
+}
+
+} // anonymous namespace
+
+
+namespace escript {
+
 Reducer_ptr makeDataReducer(std::string type)
 {
     MPI_Op op;
     if (type=="SUM")
     {
-	op=MPI_SUM;
+        op=MPI_SUM;
     }
     else if (type=="SET")
     {
-	op=MPI_OP_NULL;
+        op=MPI_OP_NULL;
     }
     else
     {
-	throw SplitWorldException("Unsupported operation for makeDataReducer.");
+        throw SplitWorldException("Unsupported operation for makeDataReducer.");
     }
     MPIDataReducer* m=new MPIDataReducer(op);
     return Reducer_ptr(m);    
 }
 
-}
-
-namespace
-{
-
-void combineData(Data& d1, const Data& d2, MPI_Op op)
-{
-    if (op==MPI_SUM)
-    {
-	d1+=d2;
-    } 
-    else if (op==MPI_OP_NULL) 
-    {
-	throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
-    }
-}
-
-}
-
 MPIDataReducer::MPIDataReducer(MPI_Op op)
   : reduceop(op), had_an_export_this_round(false)
 {
     valueadded=false;
     if ((op==MPI_SUM) || (op==MPI_OP_NULL))
     {
-	// deliberately left blank
+        // deliberately left blank
     }
     else
     {
-	throw SplitWorldException("Unsupported MPI_Op");
+        throw SplitWorldException("Unsupported MPI_Op");
     }
 }
 
@@ -98,7 +91,7 @@ std::string MPIDataReducer::description()
     std::string op="SUM";
     if (reduceop==MPI_OP_NULL)
     {
-	op="SET";
+        op="SET";
     }
     return "Reducer("+op+") for Data objects"; 
 }
@@ -108,15 +101,15 @@ bool MPIDataReducer::valueCompatible(boost::python::object v)
     extract<Data&> ex(v);
     if (!ex.check())
     {
-	return false;
+        return false;
     }
     if (dom.get()!=0)
     {
-	const Data& d=ex();
-	if (d.getDomain().get()!=dom.get())
-	{
-	    return false;	// the domains don't match
-	}
+        const Data& d=ex();
+        if (d.getDomain().get()!=dom.get())
+        {
+            return false;       // the domains don't match
+        }
     }
     return true;
 }
@@ -127,51 +120,51 @@ bool MPIDataReducer::reduceLocalValue(boost::python::object v, std::string& errs
     extract<Data&> ex(v);
     if (!ex.check())
     {
-	errstring="reduceLocalValue: expected Data object. Got something else.";
-	return false;
+        errstring="reduceLocalValue: expected Data object. Got something else.";
+        return false;
     }
     Data& d=ex();
     if (d.isEmpty())
     {
-	errstring="reduceLocalValue: Got an empty Data object. Not allowed to reduce those.";
-	return false;
+        errstring="reduceLocalValue: Got an empty Data object. Not allowed to reduce those.";
+        return false;
     }
     if ((d.getDomain()!=dom) && (dom.get()!=0))
     {
-	errstring="reduceLocalValue: Got a Data object, but it was not using the SubWorld's domain.";
-	return false;
+        errstring="reduceLocalValue: Got a Data object, but it was not using the SubWorld's domain.";
+        return false;
     }
-    d.expand();		// because I don't want to mess about with types of Data
-    if (!valueadded || !had_an_export_this_round)	// first value so answer becomes this one
+    d.expand();         // because I don't want to mess about with types of Data
+    if (!valueadded || !had_an_export_this_round)       // first value so answer becomes this one
     {
-	value=d;
-	dom=d.getDomain();
+        value=d;
+        dom=d.getDomain();
         had_an_export_this_round=true;
-	valueadded=true;
+        valueadded=true;
     }
     else
     {
-	if (reduceop==MPI_OP_NULL)
-	{
-	    if (had_an_export_this_round) 
-	    {
-		reset();
-		errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
-		return false;
-	    }
-	    value=d;
-	    dom=d.getDomain();
-	    had_an_export_this_round=true;
-	}
+        if (reduceop==MPI_OP_NULL)
+        {
+            if (had_an_export_this_round) 
+            {
+                reset();
+                errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
+                return false;
+            }
+            value=d;
+            dom=d.getDomain();
+            had_an_export_this_round=true;
+        }
         else
         { 
-	    had_an_export_this_round=true;
-	    if (d.getFunctionSpace()!=value.getFunctionSpace())
-	    {
-	        errstring="reduceLocalValue: FunctionSpaces for Data objects being combined must match.";
-	        return false;
-	    }
-	    combineData(value, d, reduceop);
+            had_an_export_this_round=true;
+            if (d.getFunctionSpace()!=value.getFunctionSpace())
+            {
+                errstring="reduceLocalValue: FunctionSpaces for Data objects being combined must match.";
+                return false;
+            }
+            combineData(value, d, reduceop);
         }
     }
     return true;
@@ -183,7 +176,7 @@ void MPIDataReducer::reset()
     value=Data();
 }
 
-bool MPIDataReducer::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring)
+bool MPIDataReducer::checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring)
 {
 #ifdef ESYS_MPI    
     // since they can't add it unless it is using the proper domain, we need to check 
@@ -194,33 +187,33 @@ bool MPIDataReducer::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::st
     // still need to incorporate domain version into this
     // or are domains not mutable in any way that matters?
     int* rbuff=new int[mpi_info->size*compat.size()];
-    boost::scoped_array<int> dummy(rbuff);	// to ensure cleanup
+    boost::scoped_array<int> dummy(rbuff);      // to ensure cleanup
     for (int i=0;i<mpi_info->size;++i)
     {
-	rbuff[i]=0;	// since this won't match any valid value we can use it as a failure check
+        rbuff[i]=0;     // since this won't match any valid value we can use it as a failure check
     }
     if (MPI_Allgather(&compat[0], compat.size(), MPI_UNSIGNED, rbuff, 
-	    compat.size(), MPI_UNSIGNED, mpi_info->comm)!=MPI_SUCCESS)
+            compat.size(), MPI_UNSIGNED, mpi_info->comm)!=MPI_SUCCESS)
     {
-	errstring="MPI failure in checkRemoteCompatibility.";
-	return false;
+        errstring="MPI failure in checkRemoteCompatibility.";
+        return false;
     }
     for (int i=0;i<(mpi_info->size-1);++i)
     {
-	if ((rbuff[i*compat.size()]==1) || (rbuff[(i+1)*compat.size()]==1))	// one of them doesn't have a value
-	{
-	    continue;
-	}
-	for (int j=0;j<compat.size();++j)
-	{
-	    if (rbuff[i*compat.size()+j]!=rbuff[(i+1)*compat.size()+j])
-	    {
-		std::ostringstream oss;
-		oss << "Incompatible value found for SubWorld " << i+1 << '.';
-		errstring=oss.str();
-		return false;	      
-	    } 
-	}
+        if ((rbuff[i*compat.size()]==1) || (rbuff[(i+1)*compat.size()]==1))     // one of them doesn't have a value
+        {
+            continue;
+        }
+        for (int j=0;j<compat.size();++j)
+        {
+            if (rbuff[i*compat.size()+j]!=rbuff[(i+1)*compat.size()+j])
+            {
+                std::ostringstream oss;
+                oss << "Incompatible value found for SubWorld " << i+1 << '.';
+                errstring=oss.str();
+                return false;         
+            } 
+        }
     }
     return true;
 #else
@@ -233,17 +226,17 @@ bool MPIDataReducer::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::st
 bool MPIDataReducer::reduceRemoteValues(MPI_Comm& comm)
 {
 #ifdef ESYS_MPI
-    DataTypes::ValueType& vr=value.getExpandedVectorReference();
+    DataTypes::RealVectorType& vr=value.getExpandedVectorReference();
     Data result(0, value.getDataPointShape(), value.getFunctionSpace(), true);
-    DataTypes::ValueType& rr=result.getExpandedVectorReference();
+    DataTypes::RealVectorType& rr=result.getExpandedVectorReference();
     if (reduceop==MPI_OP_NULL)
     {
-	reset();	// we can't be sure what the value should be
-	return false;		// this will stop bad things happening but won't give an informative error message
+        reset();        // we can't be sure what the value should be
+        return false;           // this will stop bad things happening but won't give an informative error message
     }
     if (MPI_Allreduce(&(vr[0]), &(rr[0]), vr.size(), MPI_DOUBLE, reduceop, comm)!=MPI_SUCCESS)
     {
-	return false;
+        return false;
     }
     value=result;
     return true;
@@ -259,48 +252,50 @@ bool MPIDataReducer::reduceRemoteValues(MPI_Comm& comm)
 //  [1]    Functionspace type code
 //  [2]    Only used for tagged --- gives the number of tags (which exist in the data object)
 //  [3..6] Components of the shape  
+//  [7]    Complexity: {0: real, 1:complex}
 void MPIDataReducer::getCompatibilityInfo(std::vector<unsigned>& params)
 {
-    params.resize(7);
-    for (int i=0;i<7;++i)
+    params.resize(8);
+    for (int i=0;i<8;++i)
     {
-	params[0]=0;
+        params[i]=0;
     }
     if (!valueadded)
     {
-	params[0]=1;
-	return;
+        params[0]=1;
+        return;
     }
     if (value.isConstant())
     {
-	params[0]=10;
+        params[0]=10;
     }
     else if (value.isTagged())
     {
-	params[0]=11;
+        params[0]=11;
     }
     else if (value.isExpanded())
     {
-	params[0]=12;
+        params[0]=12;
     }
-    else	// This could be DataEmpty or some other weirdness but we won't allow that
+    else        // This could be DataEmpty or some other weirdness but we won't allow that
     {
-	params[0]=0;	// invalid type to send
-	return;
+        params[0]=0;    // invalid type to send
+        return;
     }    
     params[1]=value.getFunctionSpace().getTypeCode();
     params[2]=static_cast<unsigned>(value.getNumberOfTaggedValues());    
     const DataTypes::ShapeType& s=value.getDataPointShape();
     for (int i=0;i<s.size();++i)
     {
-	params[3+i]=s[i];
-    }    
+        params[3+i]=s[i];
+    }
+    params[7]=value.isComplex();
 }
 
 
-	// Get a value for this variable from another process
-	// This is not a reduction and will replace any existing value
-bool MPIDataReducer::recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)
+// Get a value for this variable from another process
+// This is not a reduction and will replace any existing value
+bool MPIDataReducer::recvFrom(int localid, int source, JMPI& mpiinfo)
 {
 #ifdef ESYS_MPI 
       // first we need to find out what we are expecting
@@ -308,83 +303,108 @@ bool MPIDataReducer::recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysU
     MPI_Status stat;
     if (MPI_Recv(params, 7, MPI_UNSIGNED, source, PARAMTAG, mpiinfo->comm, &stat)!=MPI_SUCCESS)
     {
-	return false;
+        return false;
     }
-    if (params[0]<10)	// the sender somehow tried to send something invalid
+    if (params[0]<10)   // the sender somehow tried to send something invalid
     {
-	return false;
+        return false;
     }
       // now we put the shape object together
     escript::DataTypes::ShapeType s;
     for (int i=0;i<4;++i)
     {
-	if (params[3+i]>0)
-	{
-	    s.push_back(params[3+i]);
-	}
-	else
-	{
-	    break;
-	}
+        if (params[3+i]>0)
+        {
+            s.push_back(params[3+i]);
+        }
+        else
+        {
+            break;
+        }
     }
       // Now we need the FunctionSpace
     FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
     value=Data(0, s, fs, params[0]==12);
-    if (params[0]==11)	// The Data is tagged so we need to work out what tags we need
-    {
-	// TODO:  Need to ship the tags and names over but for now just make sure there
-	// are the same number of tags
-	value.tag();
-	
-	DataVector dv(DataTypes::noValues(s), 0, 1);
-	for (unsigned i=0;i<params[2];++i)
-	{
-	    value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
-	}
-	return false;	// because I don't trust this yet
+    if (params[0]==11)  // The Data is tagged so we need to work out what tags we need
+    {
+        // TODO:  Need to ship the tags and names over but for now just make sure there
+        // are the same number of tags
+        value.tag();
+        
+        DataTypes::RealVectorType dv(DataTypes::noValues(s), 0, 1);
+        for (unsigned i=0;i<params[2];++i)
+        {
+            value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
+        }
+        return false;   // because I don't trust this yet
     }
 #endif    
     return true;
 }
 
-	// Send a value to this variable to another process
-	// This is not a reduction and will replace any existing value    
-bool MPIDataReducer::sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo)
+// Send a value to this variable to another process
+// This is not a reduction and will replace any existing value    
+bool MPIDataReducer::sendTo(int localid, int target, JMPI& mpiinfo)
 {
       if (!valueadded)
       {
-	  return false;		// May be misinterpreted as an MPI failure
+          return false;         // May be misinterpreted as an MPI failure
       }
 #ifdef ESYS_MPI  
       // first step is to let the other world know what sort of thing it needs to make
       if (value.isLazy())
       {
-	  value.resolve();
+          value.resolve();
       }
       std::vector<unsigned> params;
       getCompatibilityInfo(params);
       if (MPI_Send(&params[0], 6, MPI_UNSIGNED, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
       {
-	  return false;
+          return false;
       }
-	// now we have informed the other end of what happened
-	// are we done or is there actually data to send
+      // now we have informed the other end of what happened
+      // are we done or is there actually data to send
       if (params[0]<10)
       {
-	  return false;
+          return false;
       }
-	// at this point, we know there is data to send
-      const DataAbstract::ValueType::value_type* vect=value.getDataRO();
-	// now the receiver knows how much data it should be receive
-	// need to make sure that we aren't trying to send data with no local samples
-      if (vect!=0)
+      
+      if (value.isComplex())
       {
-	  // MPI v3 has this first param as a const void* (as it should be)
-	  // Version on my machine expects void*
-	  if (MPI_Send(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
-	  {
-	      return false;
-	  }
+          DataTypes::cplx_t dummy=0;
+            // at this point, we know there is data to send
+          const DataTypes::cplx_t* vect=value.getDataRO(dummy);
+            // now the receiver knows how much data it should be receive
+            // need to make sure that we aren't trying to send data with no local samples
+          if (vect!=0)
+          {
+              // MPI v3 has this first param as a const void* (as it should be)
+              // Version on my machine expects void*
+              // we don't require MPIv3 yet ... so we can't use MPI_CXX_DOUBLE_COMPLEX
+              // We'll try just sending twice as many doubles
+              //if (MPI_Send(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
+              if (MPI_Send(const_cast<DataTypes::cplx_t*>(vect), 2*value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
+              {
+                  return false;
+              }
+          }
+      }
+      else
+      {
+          DataTypes::real_t dummy=0;
+            // at this point, we know there is data to send
+          const DataTypes::real_t* vect=value.getDataRO(dummy);
+            // now the receiver knows how much data it should be receive
+            // need to make sure that we aren't trying to send data with no local samples
+          if (vect!=0)
+          {
+              // MPI v3 has this first param as a const void* (as it should be)
+              // Version on my machine expects void*
+              if (MPI_Send(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
+              {
+                  return false;
+              }
+          }
       }
 #endif      
       return true;
@@ -397,101 +417,153 @@ boost::python::object MPIDataReducer::getPyObj()
 }
 
 
-	// send from proc 0 in the communicator to all others
-	// second argument is true if this rank is sending
+// send from proc 0 in the communicator to all others
+// second argument is true if this rank is sending
 bool MPIDataReducer::groupSend(MPI_Comm& comm, bool imsending)
 {
       if (dom.get()==0)
       {
-	  return 0;	// trying to avoid throwing here
-			// this will still cause a lockup if it happens
+          return 0;     // trying to avoid throwing here
+                        // this will still cause a lockup if it happens
       }
 #ifdef ESYS_MPI
       if (imsending)
       {
-	  // first step is to let the other world know what sort of thing it needs to make
-	  if (value.isLazy())
-	  {
-	      value.resolve();
-	  }
-	  std::vector<unsigned> params;
-	  getCompatibilityInfo(params);
-	  if (MPI_Bcast(&params[0], params.size(), MPI_UNSIGNED, 0,comm)!=MPI_SUCCESS)
-	  {
-	      return false;
-	  }
-	    // now we have informed the other end of what happened
-	    // are we done or is there actually data to send
-	  if (params[0]<10)
-	  {
-	      return false;
-	  }
-	    // at this point, we know there is data to send
-	  const DataAbstract::ValueType::value_type* vect=value.getDataRO();
-	    // now the receiver knows how much data it should be receive
-	    // need to make sure that we aren't trying to send data with no local samples
-	  if (vect!=0)
-	  {
-	      if (MPI_Bcast(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
-	      {
-		  return false;
-	      }
-	  }
+          // first step is to let the other world know what sort of thing it needs to make
+          if (value.isLazy())
+          {
+              value.resolve();
+          }
+          std::vector<unsigned> params;
+          getCompatibilityInfo(params);
+          if (MPI_Bcast(&params[0], params.size(), MPI_UNSIGNED, 0,comm)!=MPI_SUCCESS)
+          {
+              return false;
+          }
+            // now we have informed the other end of what happened
+            // are we done or is there actually data to send
+          if (params[0]<10)
+          {
+              return false;
+          }
+          
+          if (value.isComplex())
+          {
+              DataTypes::cplx_t dummy=0;
+                // at this point, we know there is data to send
+              const DataTypes::cplx_t* vect=value.getDataRO(dummy);
+                // now the receiver knows how much data it should be receive
+                // need to make sure that we aren't trying to send data with no local samples
+              if (vect!=0)
+              {
+                  // we don't require MPIv3 yet ... so we can't use MPI_CXX_DOUBLE_COMPLEX
+                  // We'll try just sending twice as many doubles               
+                  //if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, 0, comm)!=MPI_SUCCESS)
+                  if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength()*2, MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
+                  {
+                      return false;
+                  }
+              }
+          }
+          else
+          {
+              DataTypes::real_t dummy=0;
+                // at this point, we know there is data to send
+              const DataTypes::real_t* vect=value.getDataRO(dummy);
+                // now the receiver knows how much data it should be receive
+                // need to make sure that we aren't trying to send data with no local samples
+              if (vect!=0)
+              {
+                  if (MPI_Bcast(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
+                  {
+                      return false;
+                  }
+              }
+          }
       }
-      else	// we are receiving
+      else      // we are receiving
       {
-	
-	    // first we need to find out what we are expecting
-	  unsigned params[7];
-	  if (MPI_Bcast(params, 7, MPI_UNSIGNED, 0, comm)!=MPI_SUCCESS)
-	  {
-	      return false;
-	  }
-	  if (params[0]<10)	// the sender somehow tried to send something invalid
-	  {
-	      return false;
-	  }
-	    // now we put the shape object together
-	  escript::DataTypes::ShapeType s;
-	  for (int i=0;i<4;++i)
-	  {
-	      if (params[3+i]>0)
-	      {
-		  s.push_back(params[3+i]);
-	      }
-	      else
-	      {
-		  break;
-	      }
-	  }
-	    // Now we need the FunctionSpace
-	  FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
-	  value=Data(0, s, fs, params[0]==12);
-	  if (params[0]==11)	// The Data is tagged so we need to work out what tags we need
-	  {
-	      // TODO:  Need to ship the tags and names over but for now just make sure there
-	      // are the same number of tags
-	      value.tag();
-	      
-	      DataVector dv(DataTypes::noValues(s), 0, 1);
-	      for (unsigned i=0;i<params[2];++i)
-	      {
-		  value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
-	      }
-	      return false;	// because I don't trust this yet
-	  }
-	  DataAbstract::ValueType::value_type* vect=&(value.getExpandedVectorReference()[0]);
-	  if (MPI_Bcast(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
-	  {
-	      return false;
-	  }
-	  valueadded=true;
+          bool createcplx=false;
+            // first we need to find out what we are expecting
+          unsigned params[8];
+          if (MPI_Bcast(params, 8, MPI_UNSIGNED, 0, comm)!=MPI_SUCCESS)
+          {
+              return false;
+          }
+          if (params[0]<10)     // the sender somehow tried to send something invalid
+          {
+              return false;
+          }
+            // now we put the shape object together
+          escript::DataTypes::ShapeType s;
+          for (int i=0;i<4;++i)
+          {
+              if (params[3+i]>0)
+              {
+                  s.push_back(params[3+i]);
+              }
+              else
+              {
+                  break;
+              }
+          }
+            // Now we need the FunctionSpace
+          FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
+          
+          if (createcplx)       // we need to make a complex data
+          {
+              value=Data(0, s, fs, params[0]==12);
+              value.complicate();
+              if (params[0]==11)        // The Data is tagged so we need to work out what tags we need
+              {
+                  // TODO:  Need to ship the tags and names over but for now just make sure there
+                  // are the same number of tags
+                  value.tag();
+                  
+                  DataTypes::CplxVectorType dv(DataTypes::noValues(s), 0, 1);
+                  for (unsigned i=0;i<params[2];++i)
+                  {
+                      value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
+                  }
+                  return false; // because I don't trust this yet
+              }
+              DataTypes::cplx_t* vect=&(value.getExpandedVectorReference(DataTypes::cplx_t(0))[0]);
+              //if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, 0, comm)!=MPI_SUCCESS)
+              if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength()*2, MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
+              {
+                  return false;
+              }     
+          }
+          else
+          {
+              
+              value=Data(0, s, fs, params[0]==12);
+              if (params[0]==11)        // The Data is tagged so we need to work out what tags we need
+              {
+                  // TODO:  Need to ship the tags and names over but for now just make sure there
+                  // are the same number of tags
+                  value.tag();
+                  
+                  DataTypes::RealVectorType dv(DataTypes::noValues(s), 0, 1);
+                  for (unsigned i=0;i<params[2];++i)
+                  {
+                      value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
+                  }
+                  return false; // because I don't trust this yet
+              }
+              DataTypes::real_t* vect=&(value.getExpandedVectorReference(0)[0]);
+              if (MPI_Bcast(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
+              {
+                  return false;
+              }
+          }
+          valueadded=true;
       }
 #endif        
     return true;
 }
 
-	// We assume compatible values at this point
+// We assume compatible values at this point
 bool MPIDataReducer::groupReduce(MPI_Comm& com, char mystate)
 {
     throw SplitWorldException("groupReduce Not implemented yet.");
@@ -502,15 +574,15 @@ void MPIDataReducer::copyValueFrom(boost::shared_ptr<AbstractReducer>& src)
     MPIDataReducer* sr=dynamic_cast<MPIDataReducer*>(src.get());
     if (sr==0)
     {
-	throw SplitWorldException("Source and destination need to be the same reducer types.");
+        throw SplitWorldException("Source and destination need to be the same reducer types.");
     }
     if (sr->value.isEmpty())
     {
-	throw SplitWorldException("Attempt to copy DataEmpty.");
+        throw SplitWorldException("Attempt to copy DataEmpty.");
     }
     if (sr==this)
     {
-	throw SplitWorldException("Source and destination can not be the same variable.");
+        throw SplitWorldException("Source and destination can not be the same variable.");
     }
     value.copy(sr->value);    
     valueadded=true;
@@ -521,3 +593,5 @@ bool MPIDataReducer::canClash()
     return (reduceop==MPI_OP_NULL);
 }
 
+} // namespace escript
+
diff --git a/escriptcore/src/MPIDataReducer.h b/escriptcore/src/MPIDataReducer.h
index 5c9f72e..44f36a5 100644
--- a/escriptcore/src/MPIDataReducer.h
+++ b/escriptcore/src/MPIDataReducer.h
@@ -13,13 +13,12 @@
 *
 *****************************************************************************/
 
-#ifndef __ESCRIPT_DATAREDUCER_H__
-#define __ESCRIPT_DATAREDUCER_H__
+#ifndef __ESCRIPT_MPIDATAREDUCER_H__
+#define __ESCRIPT_MPIDATAREDUCER_H__
 
-#include "esysUtils/Esys_MPI.h"
-#include "escript/Data.h"
-#include <boost/shared_ptr.hpp>
 #include "AbstractReducer.h"
+#include "Data.h"
+
 namespace escript
 {
 
@@ -28,17 +27,17 @@ class MPIDataReducer : public AbstractReducer
 {
 public:
     MPIDataReducer(MPI_Op op);
-    ~MPIDataReducer(){};
+    ~MPIDataReducer() {}
     
-        // This is not a constructor parameter because 
-        // if these are created outside the subworld, they won't have
-        // access to a domain yet.
-        // I also want SplitWorld to be able to set this
+    // This is not a constructor parameter because 
+    // if these are created outside the subworld, they won't have
+    // access to a domain yet.
+    // I also want SplitWorld to be able to set this
     void setDomain(escript::Domain_ptr d);
     bool valueCompatible(boost::python::object v);
     bool reduceLocalValue(boost::python::object v, std::string& errstring);
     void reset();
-    bool checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring);
+    bool checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring);
     
     void getCompatibilityInfo(std::vector<unsigned>& params);
     
@@ -50,11 +49,11 @@ public:
     
 	// Get a value for this variable from another process
 	// This is not a reduction and will replace any existing value
-    bool recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo);
+    bool recvFrom(int localid, int source, JMPI& mpiinfo);
 
 	// Send a value to this variable to another process
 	// This is not a reduction and will replace any existing value    
-    bool sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo);    
+    bool sendTo(int localid, int target, JMPI& mpiinfo);    
     virtual boost::python::object getPyObj();
 
 	// send from proc 0 in the communicator to all others
@@ -79,5 +78,5 @@ Reducer_ptr makeDataReducer(std::string type);
 
 }
 
-#endif // __ESCRIPT_DATAREDUCER_H__
+#endif // __ESCRIPT_MPIDATAREDUCER_H__
 
diff --git a/escriptcore/src/MPIScalarReducer.cpp b/escriptcore/src/MPIScalarReducer.cpp
index 40b1b9b..8b925fb 100644
--- a/escriptcore/src/MPIScalarReducer.cpp
+++ b/escriptcore/src/MPIScalarReducer.cpp
@@ -13,99 +13,95 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include "MPIScalarReducer.h"
+#include "SplitWorldException.h"
 
-#include <sstream>
 #include <limits>
+#include <sstream>
+
 #include <boost/python/extract.hpp>
 #include <boost/scoped_array.hpp>
 
-#include "MPIScalarReducer.h"
-#include "SplitWorldException.h"
-
 using namespace boost::python;
 using namespace escript;
 
 
-namespace escript
-{
+namespace escript {
 
 Reducer_ptr makeScalarReducer(std::string type)
 {
     MPI_Op op;
     if (type=="SUM")
     {
-	op=MPI_SUM;
+        op=MPI_SUM;
     }
     else if (type=="MAX")
     {
-	op=MPI_MAX;
+        op=MPI_MAX;
     }
     else if (type=="MIN")
     {
-	op=MPI_MIN;
+        op=MPI_MIN;
     }
     else if (type=="SET")
     {
-	op=MPI_OP_NULL;
+        op=MPI_OP_NULL;
     }
     else
     {
-	throw SplitWorldException("Unsupported operation for makeScalarReducer.");
+        throw SplitWorldException("Unsupported operation for makeScalarReducer.");
     }
     MPIScalarReducer* m=new MPIScalarReducer(op);
-    return Reducer_ptr(m);    
+    return Reducer_ptr(m);
 }
 
 
-}
+} // namespace escript
 
-namespace
-{
+namespace {
 
 void combineDouble(double& d1, const double d2, MPI_Op op)
 {
     if (op==MPI_SUM)
     {
-	d1+=d2;
-    }  
+        d1+=d2;
+    }
     else if (op==MPI_MAX)
     {
-	d1=(d2>d1)?d2:d1;
+        d1=(d2>d1)?d2:d1;
     }
     else if (op==MPI_MIN)
     {
-	d1=(d2<d1)?d2:d1;      
+        d1=(d2<d1)?d2:d1;
     }
-    else if (op==MPI_OP_NULL) 
+    else if (op==MPI_OP_NULL)
     {
-	throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
-    }    
-}
+        throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
+    }
 }
 
+} // anonymous namespace
+
 
 MPIScalarReducer::MPIScalarReducer(MPI_Op op)
   : reduceop(op), had_an_export_this_round(false)
 {
     valueadded=false;
-    if ((op==MPI_SUM) || (op==MPI_OP_NULL))	// why not switch? because we don't know MPI_Op is scalar
+    if ((op==MPI_SUM) || (op==MPI_OP_NULL))     // why not switch? because we don't know MPI_Op is scalar
     {
-	identity=0;
+        identity=0;
     }
     else if (op==MPI_MAX)
     {
-	identity=std::numeric_limits<double>::min();
+        identity=std::numeric_limits<double>::min();
     }
     else if (op==MPI_MIN)
     {
-	identity=std::numeric_limits<double>::max();
+        identity=std::numeric_limits<double>::max();
     }
-    else      
+    else
     {
-	throw SplitWorldException("Unsupported MPI_Op");
+        throw SplitWorldException("Unsupported MPI_Op");
     }
 }
 
@@ -119,25 +115,25 @@ std::string MPIScalarReducer::description()
     std::string op;
     if (reduceop==MPI_SUM)
     {
-	op="SUM";
+        op="SUM";
     }
     else if (reduceop==MPI_MAX)
     {
-	op="MAX";
-    } 
+        op="MAX";
+    }
     else if (reduceop==MPI_MIN)
     {
-	op="MIN";
+        op="MIN";
     }
     else if (reduceop==MPI_OP_NULL)
     {
-	op="SET";
-    }    
+        op="SET";
+    }
     else
     {
-	throw SplitWorldException("Unsupported MPI reduction operation");
+        throw SplitWorldException("Unsupported MPI reduction operation");
     }
-    return "Reducer("+op+") for double scalars"; 
+    return "Reducer("+op+") for double scalars";
 }
 
 void MPIScalarReducer::newRunJobs()
@@ -150,44 +146,43 @@ bool MPIScalarReducer::valueCompatible(boost::python::object v)
     extract<double> ex(v);
     if (!ex.check())
     {
-	return false;
+        return false;
     }
     return true;
 }
 
-
 bool MPIScalarReducer::reduceLocalValue(boost::python::object v, std::string& errstring)
 {
     extract<double> ex(v);
     if (!ex.check())
     {
-	errstring="reduceLocalValue: expected double value. Got something else.";
-	return false;
+        errstring="reduceLocalValue: expected double value. Got something else.";
+        return false;
     }
-    if (!valueadded || !had_an_export_this_round)	// first value so answer becomes this one
+    if (!valueadded || !had_an_export_this_round)
     {
-	value=ex();
-	valueadded=true;
+        // first value so answer becomes this one
+        value=ex();
+        valueadded=true;
         had_an_export_this_round=true;
     }
     else
     {
-	if (reduceop==MPI_OP_NULL)
-	{
-	    if (had_an_export_this_round) 
-	    {
-		reset();
-		errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
-		return false;
-	    }
-	    value=ex();
-	}
+        if (reduceop==MPI_OP_NULL)
+        {
+            if (had_an_export_this_round)
+            {
+                reset();
+                errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
+                return false;
+            }
+            value=ex();
+        }
         else
-        { 
-	    combineDouble(value, ex(), reduceop);
-        }      
+        {
+            combineDouble(value, ex(), reduceop);
+        }
         had_an_export_this_round=true;
-	
     }
     return true;
 }
@@ -198,25 +193,25 @@ void MPIScalarReducer::reset()
     value=0;
 }
 
-bool MPIScalarReducer::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring)
+bool MPIScalarReducer::checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring)
 {
     return true;
 }
 
-// By the time this function is called, we know that all the values 
+// By the time this function is called, we know that all the values
 // are compatible
 bool MPIScalarReducer::reduceRemoteValues(MPI_Comm& com)
 {
 #ifdef ESYS_MPI
     if (reduceop==MPI_OP_NULL)
-    {	
-	reset();
-	return false;		// this will stop bad things happening but won't give an informative error message
+    {
+        reset();
+        return false;           // this will stop bad things happening but won't give an informative error message
     }
     double rvalue;
     if (MPI_Allreduce(&value, &rvalue, 1, MPI_DOUBLE, reduceop, com)!=MPI_SUCCESS)
     {
-	return false;
+        return false;
     }
     value=rvalue;
     return true;
@@ -231,38 +226,34 @@ bool MPIScalarReducer::reduceRemoteValues(MPI_Comm& com)
 //  [0]    Type of Data:  {0 : error,  1: DataEmpty, 10: constant, 11:tagged, 12:expanded}
 //  [1]    Functionspace type code
 //  [2]    Only used for tagged --- gives the number of tags (which exist in the data object)
-//  [3..6] Components of the shape  
+//  [3..6] Components of the shape
 void MPIScalarReducer::getCompatibilityInfo(std::vector<unsigned>& params)
 {
-    params.resize(1);	// in case someone tries to do something with it
+    params.resize(1);   // in case someone tries to do something with it
 }
 
 
-	// Get a value for this variable from another process
-	// This is not a reduction and will replace any existing value
-bool MPIScalarReducer::recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)
+// Get a value for this variable from another process
+// This is not a reduction and will replace any existing value
+bool MPIScalarReducer::recvFrom(int localid, int source, JMPI& mpiinfo)
 {
-#ifdef ESYS_MPI  
+#ifdef ESYS_MPI
     MPI_Status stat;
     if (MPI_Recv(&value, 1, MPI_DOUBLE, source, PARAMTAG, mpiinfo->comm, &stat)!=MPI_SUCCESS)
-    {
-	return false;
-    }
-#endif    
+        return false;
+#endif
     return true;
 }
 
-	// Send a value to this variable to another process
-	// This is not a reduction and will replace any existing value    
-bool MPIScalarReducer::sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo)
+// Send a value to this variable to another process
+// This is not a reduction and will replace any existing value
+bool MPIScalarReducer::sendTo(int localid, int target, JMPI& mpiinfo)
 {
-#ifdef ESYS_MPI  
-      if (MPI_Send(&value, 1, MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
-      {
-	  return false;
-      }
-#endif      
-      return true;
+#ifdef ESYS_MPI
+    if (MPI_Send(&value, 1, MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
+        return false;
+#endif
+    return true;
 }
 
 double MPIScalarReducer::getDouble()
@@ -270,7 +261,6 @@ double MPIScalarReducer::getDouble()
     return value;
 }
 
-
 boost::python::object MPIScalarReducer::getPyObj()
 {
     boost::python::object o(value);
@@ -278,14 +268,13 @@ boost::python::object MPIScalarReducer::getPyObj()
 }
 
 #ifdef ESYS_MPI
-
-	// send from proc 0 in the communicator to all others
+// send from proc 0 in the communicator to all others
 bool MPIScalarReducer::groupSend(MPI_Comm& com, bool imsending)
 {
     if (MPI_Bcast(&value, 1, MPI_DOUBLE, 0, com)==MPI_SUCCESS)
     {
-	valueadded=true;
-	return true;
+        valueadded=true;
+        return true;
     }
     return false;
 }
@@ -294,26 +283,24 @@ bool MPIScalarReducer::groupReduce(MPI_Comm& com, char mystate)
 {
     double answer=0;
     if (reduceop==MPI_OP_NULL)
-    {
-	return false;
-    }
+        return false;
+
     if (MPI_Allreduce((mystate==reducerstatus::NEW)?&value:&identity, &answer, 1, MPI_DOUBLE, reduceop, com)==MPI_SUCCESS)
     {
-	value=answer;
-	valueadded=true;
-	return true;
+        value=answer;
+        valueadded=true;
+        return true;
     }
     return false;
 }
-
-#endif
+#endif // ESYS_MPI
 
 void MPIScalarReducer::copyValueFrom(boost::shared_ptr<AbstractReducer>& src)
 {
     MPIScalarReducer* sr=dynamic_cast<MPIScalarReducer*>(src.get());
     if (sr==0)
     {
-	throw SplitWorldException("Source and destination need to be the same reducer types.");
+        throw SplitWorldException("Source and destination need to be the same reducer types.");
     }
     value=sr->value;
     valueadded=true;
diff --git a/escriptcore/src/MPIScalarReducer.h b/escriptcore/src/MPIScalarReducer.h
index b722306..214c654 100644
--- a/escriptcore/src/MPIScalarReducer.h
+++ b/escriptcore/src/MPIScalarReducer.h
@@ -16,10 +16,8 @@
 #ifndef __ESCRIPT_SCALARREDUCER_H__
 #define __ESCRIPT_SCALARREDUCER_H__
 
-#include "esysUtils/Esys_MPI.h"
-#include "escript/Data.h"
-#include <boost/shared_ptr.hpp>
 #include "AbstractReducer.h"
+
 namespace escript
 {
 
@@ -30,40 +28,42 @@ public:
     MPIScalarReducer(MPI_Op op);
     ~MPIScalarReducer(){};
     
-        // This is not a constructor parameter because 
-        // if these are created outside the subworld, they won't have
-        // access to a domain yet.
-        // I also want SplitWorld to be able to set this
-    void setDomain(escript::Domain_ptr d);
+    // This is not a constructor parameter because 
+    // if these are created outside the subworld, they won't have
+    // access to a domain yet.
+    // I also want SplitWorld to be able to set this
+    void setDomain(Domain_ptr d);
     bool valueCompatible(boost::python::object v);
     bool reduceLocalValue(boost::python::object v, std::string& errstring);
     void reset();
-    bool checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring);
+    bool checkRemoteCompatibility(JMPI& mpiInfo, std::string& errstring);
     
     void getCompatibilityInfo(std::vector<unsigned>& params);
     
-      // talk to corresponding processes in other subworlds
+    // talk to corresponding processes in other subworlds
     bool reduceRemoteValues(MPI_Comm& com);
     
       // human readable description
     std::string description();
     
-	// Get a value for this variable from another process
-	// This is not a reduction and will replace any existing value
-    bool recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo);
+    // Get a value for this variable from another process
+    // This is not a reduction and will replace any existing value
+    bool recvFrom(int localid, int source, JMPI& mpiinfo);
+
+    // Send a value to this variable to another process
+    // This is not a reduction and will replace any existing value    
+    bool sendTo(int localid, int target, JMPI& mpiinfo);    
 
-	// Send a value to this variable to another process
-	// This is not a reduction and will replace any existing value    
-    bool sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo);    
     double getDouble();
+
     virtual boost::python::object getPyObj(); 
     
-    	// send from proc 0 in the communicator to all others
+    // send from proc 0 in the communicator to all others
     bool groupSend(MPI_Comm& com, bool imsending);
 
     bool canClash();    
     
-	// reduction with some procs submitting identity values
+    // reduction with some procs submitting identity values
     bool groupReduce(MPI_Comm& com, char mystate);
     
     void copyValueFrom(boost::shared_ptr<AbstractReducer>& src);    
@@ -75,13 +75,11 @@ private:
     MPI_Op reduceop;
     double identity;
     bool had_an_export_this_round;
-    
 };
 
 
 Reducer_ptr makeScalarReducer(std::string type);
 
-
 }
 
 #endif // __ESCRIPT_SCALARREDUCER_H__
diff --git a/escriptcore/src/NonReducedVariable.cpp b/escriptcore/src/NonReducedVariable.cpp
index 6bf8a76..2fc7664 100644
--- a/escriptcore/src/NonReducedVariable.cpp
+++ b/escriptcore/src/NonReducedVariable.cpp
@@ -1,6 +1,6 @@
 /*****************************************************************************
 *
-* Copyright (c) 2015-2016 by The University of Queensland
+* Copyright (c)2015-2016 by The University of Queensland
 * http://www.uq.edu.au
 *
 * Primary Business: Queensland, Australia
@@ -13,10 +13,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "NonReducedVariable.h"
 #include "SplitWorldException.h"
 
@@ -59,7 +55,7 @@ void NonReducedVariable::reset()
 
 // Since we aren't actually don't a check here, this call won't function
 // as a barrier like other implementations of this method
-bool NonReducedVariable::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring)
+bool NonReducedVariable::checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring)
 {
     return true;
 }
@@ -79,12 +75,12 @@ std::string NonReducedVariable::description()
     return "Non-Reduced Variable.";
 }
 
-bool NonReducedVariable::recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)
+bool NonReducedVariable::recvFrom(int localid, int source, JMPI& mpiinfo)
 {
     return true;
 }
 
-bool NonReducedVariable::sendTo(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)
+bool NonReducedVariable::sendTo(int localid, int source, JMPI& mpiinfo)
 {
     return true;
 }
@@ -121,13 +117,13 @@ void NonReducedVariable::copyValueFrom(boost::shared_ptr<AbstractReducer>& src)
 }
 
 
-namespace escript
-{
+namespace escript {
+
 Reducer_ptr makeNonReducedVariable()
 {
     NonReducedVariable* m=new NonReducedVariable();
     return Reducer_ptr(m);
-
 }
 
 }
+
diff --git a/escriptcore/src/NonReducedVariable.h b/escriptcore/src/NonReducedVariable.h
index fd793d8..94addfe 100644
--- a/escriptcore/src/NonReducedVariable.h
+++ b/escriptcore/src/NonReducedVariable.h
@@ -16,10 +16,9 @@
 #ifndef __ESCRIPT_NONREDUCEDVARIABLE_H__
 #define __ESCRIPT_NONREDUCEDVARIABLE_H__
 
-#include "esysUtils/Esys_MPI.h"
-#include "escript/Data.h"
-#include <boost/shared_ptr.hpp>
 #include "AbstractReducer.h"
+#include "Data.h"
+
 namespace escript
 {
 
@@ -30,16 +29,16 @@ class NonReducedVariable : public AbstractReducer
 public:
     NonReducedVariable();
     ~NonReducedVariable();
-    
-        // This is not a constructor parameter because 
-        // if these are created outside the subworld, they won't have
-        // access to a domain yet.
-        // I also want SplitWorld to be able to set this
+
+    // This is not a constructor parameter because 
+    // if these are created outside the subworld, they won't have
+    // access to a domain yet.
+    // I also want SplitWorld to be able to set this
     void setDomain(escript::Domain_ptr d);
     bool valueCompatible(boost::python::object v);
     bool reduceLocalValue(boost::python::object v, std::string& errstring);
     void reset();
-    bool checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring);
+    bool checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring);
     
     void getCompatibilityInfo(std::vector<unsigned>& params);
     
@@ -49,20 +48,20 @@ public:
       // human readable description
     std::string description();
     
-	// Get a value for this variable from another process
-	// This is not a reduction and will replace any existing value
-    bool recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo);
+    // Get a value for this variable from another process
+    // This is not a reduction and will replace any existing value
+    bool recvFrom(int localid, int source, JMPI& mpiinfo);
 
-	// Send a value to this variable to another process
-	// This is not a reduction and will replace any existing value    
-    bool sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo);    
+    // Send a value to this variable to another process
+    // This is not a reduction and will replace any existing value    
+    bool sendTo(int localid, int target, JMPI& mpiinfo);    
     double getDouble();
     virtual boost::python::object getPyObj(); 
     
-    	// send from proc 0 in the communicator to all others
+        // send from proc 0 in the communicator to all others
     bool groupSend(MPI_Comm& com, bool imsending);
     
-	// reduction with some procs submitting identity values
+    // reduction with some procs submitting identity values
     bool groupReduce(MPI_Comm& com, char mystate);
     
     void copyValueFrom(boost::shared_ptr<AbstractReducer>& src);
@@ -72,11 +71,9 @@ private:
     boost::python::object identity;
 };
 
-
 Reducer_ptr makeNonReducedVariable();
 
-
-}
+} // namespace escript
 
 #endif // __ESCRIPT_NONREDUCEDVARIABLE_H__
 
diff --git a/escriptcore/src/NullDomain.cpp b/escriptcore/src/NullDomain.cpp
index 2b45ae6..b64bd5a 100644
--- a/escriptcore/src/NullDomain.cpp
+++ b/escriptcore/src/NullDomain.cpp
@@ -14,13 +14,9 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "DomainException.h"
 #include "NullDomain.h" 
 #include "Data.h"
+#include "DomainException.h"
 
 namespace escript {
 
@@ -31,7 +27,7 @@ int defaultList[2]={0,1}; // an array to return in borrowListOfTagsInUse();
 // Null domains only support 1 functionspace type.
 // The choice of -7 as the value is to prevent collision with other domain enums
 int NullDomain::NullDomainFS = -7;
-dim_t NullDomain::referenceID = dim_t(10); // arbitrary
+DataTypes::dim_t NullDomain::referenceID = DataTypes::dim_t(10); // arbitrary
 
 std::string NullDomain::getDescription() const 
 {
@@ -43,6 +39,11 @@ std::string NullDomain::functionSpaceTypeAsString(int functionSpaceType) const
     return "Default_FunctionSpace";
 }
 
+JMPI NullDomain::getMPI() const
+{
+    throw DomainException("NullDomain::getMPI() not supported.");
+}
+
 void NullDomain::interpolateOnDomain(Data& target,const Data& source) const
 {
    if (source.getFunctionSpace().getDomain().get()!=this)  
@@ -66,13 +67,13 @@ void NullDomain::interpolateAcross(Data& target, const Data& source) const
    throw DomainException("Error - interpolation to the NullDomain not supported.");
 }
 
-std::pair<int,dim_t> NullDomain::getDataShape(int functionSpaceCode) const
+std::pair<int,DataTypes::dim_t> NullDomain::getDataShape(int functionSpaceCode) const
 {
   //
   // return an arbitrary value
   // - I know it says arbitrary but its not a good idea to change it now.
   // - some tests assume that the null domain holds a single value
-  return std::pair<int,dim_t>(1,1);
+  return std::pair<int,DataTypes::dim_t>(1,1);
 }
 
 bool NullDomain::operator==(const AbstractDomain& other) const
@@ -93,7 +94,7 @@ const int* NullDomain::borrowListOfTagsInUse(int functionSpaceCode) const
 escript::Data NullDomain::randomFill(const DataTypes::ShapeType& shape,
        const FunctionSpace& what, long seed, const boost::python::tuple& filter) const
 {
-    throw DataException("Attempted randomFill on NullDomain. NullDomains do not store values.");
+    throw DomainException("Attempted randomFill on NullDomain. NullDomains do not store values.");
 }
 void NullDomain::dump(std::string const&) const
 {
@@ -113,7 +114,7 @@ bool NullDomain::isCellOriented(int) const
     throwStandardException("NullDomain::isCellOriented");
     return false;
 }
-bool NullDomain::ownSample(int, index_t) const
+bool NullDomain::ownSample(int, DataTypes::index_t) const
 {
     throwStandardException("NullDomain::ownSample");
     return false;
@@ -184,3 +185,4 @@ void NullDomain::setToX(escript::Data&) const
 
 
 }  // end of namespace
+
diff --git a/escriptcore/src/NullDomain.h b/escriptcore/src/NullDomain.h
index c231efa..f787b85 100644
--- a/escriptcore/src/NullDomain.h
+++ b/escriptcore/src/NullDomain.h
@@ -18,7 +18,6 @@
 #define __ESCRIPT_NULLDOMAIN_H__
 
 #include "system_dep.h"
-
 #include "AbstractDomain.h"
 
 namespace escript {
@@ -41,11 +40,12 @@ class ESCRIPT_DLL_API NullDomain : public AbstractDomain
 {
 private:
     static int NullDomainFS;
-    static dim_t referenceID;
+    static DataTypes::dim_t referenceID;
         
 public:
     NullDomain() {}
 
+    virtual JMPI getMPI() const;
     virtual int getMPISize() const { return 1; }
     virtual int getMPIRank() const { return 0; }
     virtual void MPIBarrier() const {}
@@ -79,11 +79,11 @@ public:
     virtual int getReducedSolutionCode() const { return NullDomainFS; }
     virtual int getDiracDeltaFunctionsCode() const { return NullDomainFS; }
 
-    virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
+    virtual std::pair<int,DataTypes::dim_t> getDataShape(int functionSpaceCode) const;
 
-    virtual int getTagFromSampleNo(int, index_t) const { return 1; }
+    virtual int getTagFromSampleNo(int, DataTypes::index_t) const { return 1; }
 
-    virtual const dim_t* borrowSampleReferenceIDs(int) const { return &referenceID; }
+    virtual const DataTypes::dim_t* borrowSampleReferenceIDs(int) const { return &referenceID; }
 
     virtual int getDim() const { return 1; }
 
@@ -131,7 +131,7 @@ public:
     virtual void setToNormal(escript::Data& out) const;
     virtual void setToSize(escript::Data& out) const;
     virtual void setToGradient(escript::Data& grad, const escript::Data& arg) const;
-    virtual bool ownSample(int fs_code, index_t id) const;
+    virtual bool ownSample(int fs_code, DataTypes::index_t id) const;
     virtual escript::Data randomFill(const DataTypes::ShapeType& shape,
                                      const FunctionSpace& what, long seed,
                                      const boost::python::tuple& filter) const;
diff --git a/escriptcore/src/Pointers.h b/escriptcore/src/Pointers.h
index 85ed308..3daf6ce 100644
--- a/escriptcore/src/Pointers.h
+++ b/escriptcore/src/Pointers.h
@@ -18,21 +18,22 @@
 #define POINTERS_H_2008
 
 /** \file Pointers.h 
-\brief Typedefs and macros for reference counted storage.
+  \brief Typedefs and macros for reference counted storage.
 */
 
-// The idea is that we should be able to easily switch between shared_ptr and intrusive_ptr if required
+// The idea is that we should be able to easily switch between shared_ptr
+// and intrusive_ptr if required
 
 // Where to find the base class which supplies refcounting
 #define REFCOUNT_BASE_FILE <boost/enable_shared_from_this.hpp>
 // The name of the class to extend
 #define REFCOUNT_BASE_CLASS(x) boost::enable_shared_from_this<x>
 
-
 #define POINTER_WRAPPER_CLASS(x) boost::shared_ptr<x>
 
+#define REFCOUNTNS	boost
 
 #include REFCOUNT_BASE_FILE
 
-
 #endif
+
diff --git a/escriptcore/src/Random.cpp b/escriptcore/src/Random.cpp
new file mode 100644
index 0000000..f75112a
--- /dev/null
+++ b/escriptcore/src/Random.cpp
@@ -0,0 +1,244 @@
+/*****************************************************************************
+*
+* Copyright (c) 2013-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <escript/EsysMPI.h>
+
+#include <algorithm>
+#include <vector>
+
+#include <boost/random/mersenne_twister.hpp>
+
+using namespace std;
+
+namespace {
+    
+boost::mt19937 base;            // used to seed all the other generators  
+vector<boost::mt19937*> gens;
+vector<boost::uint32_t> seeds;
+
+void seedGens(long seed)
+{
+#ifdef _OPENMP
+    int numthreads=omp_get_max_threads();
+#else
+    int numthreads=1;
+#endif
+    if (gens.size()==0)         // we haven't instantiated the generators yet  
+    {
+        gens.resize(numthreads);        
+        seeds.resize(numthreads);
+    }  
+    if (seed!=0)
+    {
+        base.seed((boost::uint32_t)seed); // without this cast, icc gets confused
+        for (int i=0;i<numthreads;++i)
+        {
+            boost::uint32_t b=base();
+            seeds[i]=b; // initialise each generator with successive random values      
+        }
+#pragma omp parallel for
+        for (int i=0; i<numthreads; ++i)
+        {
+            gens[i]=new boost::mt19937(seeds[i]);
+        }
+    }
+}
+  
+} // anonymous namespace
+
+namespace escript
+{
+
+// Put n random values from the interval [0,1] into array.
+// Idea here is to create an array of seeds by feeding the original seed into
+// the random generator.
+// The code at the beginning of the function to compute the seed if one is
+// given is just supposed to introduce some variety (and ensure that multiple
+// ranks don't get the same seed).
+// I make no claim about how well these initial seeds are distributed.
+// notes:
+// - uses openmp
+// - don't forget to call CHECK_FOR_EX_WRITE if using this on Data
+void randomFillArray(long seed, double* array, size_t n)
+{
+    // So if we create a bunch of objects we don't get the same start seed 
+    static unsigned prevseed=0;
+    if (seed==0)                // for each one
+    {
+        if (prevseed==0) 
+        {
+            time_t s=time(0);
+            seed=s;
+        }
+        else
+        {
+            seed=prevseed+419;  // these numbers are arbitrary
+            if (seed>3040101)   // I want to avoid overflow on 32bit systems
+            {
+                seed=((int)(seed)%0xABCD)+1;
+            }
+        }
+    }  
+    // now we need to consider MPI since we don't want each rank to start with
+    // the same seed. Rank in COMM_WORLD will do
+#ifdef ESYS_MPI
+    int rank;
+    int mperr=MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    if (mperr != MPI_SUCCESS) {
+        rank=0;
+    }
+    seed+=rank*5;
+#endif
+    prevseed=seed;  
+    
+    boost::mt19937::result_type RMAX=base.max();
+    seedGens(seed);
+    
+#pragma omp parallel
+    {
+        size_t i;
+#ifdef _OPENMP
+        int tnum=omp_get_thread_num();
+#else
+        int tnum=0;
+#endif
+        boost::mt19937& generator=*(gens[tnum]);
+        
+#pragma omp for schedule(static)
+        for (i=0;i<n;++i)
+        {
+            array[i]=((double)generator())/RMAX;
+        }
+    }
+}
+
+// see patternFillArray for details on parameters
+void patternFillArray2D(size_t x, size_t y, double* array, size_t spacing,
+                        size_t basex, size_t basey, size_t numpoints)
+{
+    std::fill_n(array, x*y*numpoints, 0.);
+    size_t xoff=basex%spacing;
+    size_t yoff=basey%spacing;
+    for (size_t r=0;r<y;++r)
+    {
+        size_t step=((r+yoff)%spacing)?spacing:1; 
+        for (size_t c=0;c<x;++c)
+        {
+            if ((c+xoff)%step==0)
+            {
+                for (size_t p=0;p<numpoints;++p)
+                {
+                    array[(c+r*x)*numpoints+p]=1+p;
+                }             
+            }
+        }
+    }
+}
+
+
+// fill the array (which we assume is 3D with x by y by z points in it) with
+// a pattern.
+// The base? params give the coordinates (in # of elements) of the origin of
+// _this_ rank.
+// Used to ensure patterns are generated consistently across multiple ranks.
+// This is only for internal debug so the patterns (or this function) may
+// disappear without notice
+void patternFillArray(int pattern, size_t x, size_t y, size_t z, double* array,
+                      size_t spacing, size_t basex, size_t basey, size_t basez,
+                      size_t numpoints)
+{
+    if (pattern==0)     // a cross pattern in the z=0 plane, repeated for each z layer
+    {
+        std::fill_n(array, x*y*numpoints, 0.);
+        size_t xoff=basex%spacing;
+        size_t yoff=basey%spacing;
+        for (size_t r=0;r<y;++r)
+        {
+            size_t step=((r+yoff)%spacing)?spacing:1;
+            for (size_t c=0;c<x;++c)
+            {
+                if ((c+xoff)%step==0)
+                {
+                    for (size_t p=0;p<numpoints;++p)
+                    {
+                        array[(c+r*x)*numpoints+p]=p+1;
+                    }
+                }
+            }
+        }
+        for (size_t l=1;l<z;++l)
+        {
+            std::copy(array, &array[x*y*numpoints], &array[l*x*y*numpoints]);
+        }
+    }
+    else                // pattern 1. A grid in all 3 dimensions 
+    {
+        if (z<2)
+        {
+            patternFillArray(0, x, y, z, array, spacing, basex, basey, basez, numpoints);
+            return;     // this pattern needs a minimum of 2 layers
+        }
+        size_t xoff=basex%spacing;
+        size_t yoff=basey%spacing;
+        size_t zoff=basez%spacing;
+        
+        double* buff1=new double[x*y*numpoints];        // stores the main cross pattern
+        double* buff2=new double[x*y*numpoints];        // stores the "verticals"
+        std::fill_n(buff1, x*y*numpoints, 0.);
+        std::fill_n(buff2, x*y*numpoints, 0.);
+        // fill in buff1
+        for (size_t r=0;r<y;++r)
+        {
+            size_t step=((r+yoff)%spacing)?spacing:1;
+            for (size_t c=0;c<x;++c)
+            {
+                if ((c+xoff)%step==0)
+                {
+                    for (size_t p=0;p<numpoints;++p)
+                    {
+                        buff1[(c+r*x)*numpoints+p]=p+1;
+                    }
+                }
+            }       
+        }
+        
+        for (size_t r=(spacing-yoff)%spacing;r<y;r+=spacing)
+        {
+            for (size_t c=(spacing-xoff)%spacing;c<x;c+=spacing)
+            {
+                for (size_t p=0;p<numpoints;++p)
+                {
+                    buff2[(c+r*x)*numpoints+p]=p+1;
+                }
+            }
+        }       
+        for (size_t l=0;l<z;++l)
+        {
+            if ((l+zoff)%spacing)
+            {
+                std::copy(buff2, buff2+x*y*numpoints, &array[x*y*l*numpoints]);
+            }
+            else
+            {
+                std::copy(buff1, buff1+x*y*numpoints, &array[x*y*l*numpoints]);
+            }
+        }
+        delete[] buff1;
+        delete[] buff2;
+    }
+}
+
+} // end namespace
+
diff --git a/esysUtils/src/EsysRandom.h b/escriptcore/src/Random.h
similarity index 71%
rename from esysUtils/src/EsysRandom.h
rename to escriptcore/src/Random.h
index 025f7e3..cdeccc7 100644
--- a/esysUtils/src/EsysRandom.h
+++ b/escriptcore/src/Random.h
@@ -13,9 +13,10 @@
 *
 *****************************************************************************/
 
-#ifndef ESYS_RANDOM_H
-#define ESYS_RANDOM_H
-namespace esysUtils
+#ifndef __ESCRIPT_RANDOM_H__
+#define __ESCRIPT_RANDOM_H__
+
+namespace escript
 {
 /* \brief put n random doubles (from [0.0, 1.0]) in array (uses OpenMP).
    If using this on Data, then be sure to CHECK_EX_WRITE first
@@ -23,10 +24,15 @@ namespace esysUtils
 void randomFillArray(long seed, double* array, size_t n);
 
 
-void patternFillArray2D(size_t x, size_t y, double* array, size_t spacing, size_t basex, size_t basey, size_t numpoints);
+void patternFillArray2D(size_t x, size_t y, double* array, size_t spacing,
+                        size_t basex, size_t basey, size_t numpoints);
 
-/* Intended for debugging use only */
-void patternFillArray(int pattern, size_t x, size_t y, size_t z, double* array, size_t spacing, size_t basex, size_t basey, size_t basez, size_t numpoints);
+// Intended for debugging use only
+void patternFillArray(int pattern, size_t x, size_t y, size_t z, double* array,
+                      size_t spacing, size_t basex, size_t basey, size_t basez,
+                      size_t numpoints);
 
 }
-#endif
+
+#endif // __ESCRIPT_RANDOM_H__
+
diff --git a/escriptcore/src/SConscript b/escriptcore/src/SConscript
index 61f4c4b..3e0da6a 100644
--- a/escriptcore/src/SConscript
+++ b/escriptcore/src/SConscript
@@ -13,16 +13,9 @@
 #
 ##############################################################################
 
-import os
 Import('*')
 
-local_env = env.Clone()
-local_dodgy = dodgy_env.Clone()
-py_wrapper_local_env = env.Clone()
-
-# Remove the sharedlibrary prefix on all platform - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'escript'
 
 sources = """
     AbstractContinuousDomain.cpp
@@ -30,80 +23,89 @@ sources = """
     AbstractReducer.cpp
     AbstractSystemMatrix.cpp
     AbstractTransportProblem.cpp
+    ArrayOps.cpp    
+    BinaryDataReadyOps.cpp    
     Data.cpp
     DataAbstract.cpp
-    DataBlocks2D.cpp
-    DataC.cpp
     DataConstant.cpp
     DataEmpty.cpp
-    DataException.cpp
     DataExpanded.cpp
     DataFactory.cpp
     DataLazy.cpp
-    DataMaths.cpp
     DataReady.cpp
     DataTagged.cpp
     DataTypes.cpp
     DataVector.cpp
-    DomainException.cpp
+    DataVectorAlt.cpp
+    DataVectorOps.cpp
+    DataVectorTaipan.cpp    
     EscriptParams.cpp
+    EsysMPI.cpp
+    ES_optype.cpp
+    ExceptionTranslators.cpp
     FunctionSpace.cpp
-    FunctionSpaceException.cpp
     FunctionSpaceFactory.cpp
     LapackInverseHelper.cpp
-    NonReducedVariable.cpp
-    NullDomain.cpp
     MPIDataReducer.cpp
     MPIScalarReducer.cpp
+    NonReducedVariable.cpp
+    NullDomain.cpp
+    pyerr.cpp
+    Random.cpp
+    SolverOptions.cpp
     SplitWorld.cpp
-    SplitWorldException.cpp
     SubWorld.cpp
-    SystemMatrixException.cpp
     Taipan.cpp
     TestDomain.cpp
-    TransportProblemException.cpp
     Utils.cpp
     WrappedArray.cpp
-    SolverOptions.cpp
-    SolverOptionsException.cpp
 """.split()
-    # blocktimer.c
+
 headers = """
     AbstractContinuousDomain.h
     AbstractDomain.h
     AbstractReducer.h
     AbstractSystemMatrix.h
     AbstractTransportProblem.h
-    BinaryOp.h
+    ArrayOps.h    
+    Assert.h
+    BinaryDataReadyOps.h
     Data.h
     DataAbstract.h
-    DataAlgorithm.h
-    DataBlocks2D.h
-    DataC.h
     DataConstant.h
     DataEmpty.h
     DataException.h
     DataExpanded.h
     DataFactory.h
     DataLazy.h
-    DataMaths.h
     DataReady.h
     DataTagged.h
     DataTypes.h
     DataVector.h
+    DataVectorAlt.h
+    DataVectorOps.h    
+    DataVectorTaipan.h    
+    Distribution.h    
     Dodgy.h
     DomainException.h
     EscriptParams.h
+    EsysException.h
+    EsysMPI.h
+    ES_optype.h
+    ExceptionTranslators.h
+    FileWriter.h
     FunctionSpace.h
     FunctionSpaceException.h
     FunctionSpaceFactory.h
+    IndexList.h
     LapackInverseHelper.h
-    LocalOps.h
     NonReducedVariable.h
     NullDomain.h
     MPIDataReducer.h
     MPIScalarReducer.h
     Pointers.h
+    Random.h
+    SolverOptions.h
     SplitWorld.h
     SplitWorldException.h
     SubWorld.h
@@ -111,65 +113,56 @@ headers = """
     Taipan.h
     TestDomain.h
     TransportProblemException.h
-    UnaryFuncs.h
-    UnaryOp.h
-    UtilC.h
     Utils.h
     WrappedArray.h
+    index.h
+    pyerr.h
     system_dep.h
-    SolverOptions.h
-    SolverOptionsException.h
 """.split()
-    # blocktimer.h
 
 dodgy_sources = """
     Dodgy.cpp
 """.split()
 
-local_env.Prepend(LIBS = ['esysUtils'])
-local_dodgy.Prepend(LIBS = ['esysUtils'])
+local_env = env.Clone()
+
+# collect dependencies for other modules
+escriptlibs = []
+
+if env['uselapack']:
+    escriptlibs += env['lapack_libs']
+if env['mkl']:
+    escriptlibs += env['mkl_libs']
+if env['netcdf']:
+    escriptlibs += env['netcdf_libs']
+
+local_env.PrependUnique(LIBS = escriptlibs)
+env['escript_libs'] = [module_name] + escriptlibs
+
 if IS_WINDOWS:
     local_env.Append(CPPDEFINES = ['ESCRIPT_EXPORTS'])
-    local_dodgy.Append(CPPDEFINES = ['ESCRIPT_EXPORTS'])
-
-module_name = 'escript'
 
 # specify to build shared object
 if local_env['iknowwhatimdoing']:
-    nonped=[local_dodgy.SharedObject(x) for x in dodgy_sources]
+    nonped = [local_env.SharedObject(x) for x in dodgy_sources]
 else:
-    nonped=[]
+    nonped = []
 
-lib = local_env.SharedLibrary(module_name, sources+nonped)
-env.Alias('build_escript_lib', lib)
-
-include_path = Dir('escript', local_env['incinstall'])
+include_path = Dir(module_name, local_env['incinstall'])
 hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_escript_headers', hdr_inst)
 
+lib = local_env.SharedLibrary(module_name, sources+nonped)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_escript_lib', lib_inst)
 
 ### Python wrapper ###
-py_wrapper_local_env.Prepend(LIBS = ['escript', 'esysUtils'])
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'escriptcpp.cpp')
-env.Alias('build_escriptcpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], 'escriptcore')
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_escriptcpp_lib', mod_inst)
+py_env = env.Clone()
+py_env.Prepend(LIBS = [module_name])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'escriptcpp.cpp')
 
-# configure python module
-local_env.SConscript(dirs = ['#/escriptcore/py_src'], variant_dir='py', duplicate=0)
+mod_path = Dir('escriptcore', local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
 
-# configure unit tests
-local_env.SConscript(dirs = ['#/escriptcore/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
+build = env.Alias('build_escript', [hdr_inst, lib, py_lib])
+env.Alias('install_escript', [build, lib_inst, mod_inst])
 
diff --git a/escriptcore/src/SolverOptions.cpp b/escriptcore/src/SolverOptions.cpp
index dd1dd3b..6748fff 100644
--- a/escriptcore/src/SolverOptions.cpp
+++ b/escriptcore/src/SolverOptions.cpp
@@ -14,10 +14,10 @@
 *
 *****************************************************************************/
 
-#include <boost/python.hpp>
-
 #include "SolverOptions.h"
-#include "SolverOptionsException.h"
+#include "EsysException.h"
+
+#include <boost/python.hpp>
 
 namespace bp = boost::python;
 
@@ -50,12 +50,13 @@ SolverBuddy::SolverBuddy() :
     tolerance(1e-8),
     absolute_tolerance(0.),
     inner_tolerance(0.9),
-    drop_tolerance(0.01),
+    drop_tolerance(0.0005),
     drop_storage(2.),
     iter_max(100000),
     inner_iter_max(10),
     truncation(20),
     restart(0),
+    is_complex(false),
     symmetric(false),
     verbose(false),
     adapt_inner_tolerance(true),
@@ -73,6 +74,11 @@ SolverBuddy::SolverBuddy() :
     resetDiagnostics(true);
 }
 
+SolverBuddy::~SolverBuddy()
+{
+}
+
+
 std::string SolverBuddy::getSummary() const
 {
     std::stringstream out;
@@ -89,7 +95,8 @@ std::string SolverBuddy::getSummary() const
         << "Adapt innner tolerance = " << adaptInnerTolerance() << std::endl;
 
     if (getPackage() == SO_DEFAULT || getPackage() == SO_PACKAGE_PASO ||
-            getPackage() == SO_PACKAGE_CUSP) {
+            getPackage() == SO_PACKAGE_CUSP ||
+            getPackage() == SO_PACKAGE_TRILINOS) {
         out << "Solver method = " << getName(getSolverMethod()) << std::endl;
         if (getSolverMethod() == SO_METHOD_GMRES) {
             out << "Truncation  = " << getTruncation() << std::endl
@@ -187,8 +194,6 @@ const char* SolverBuddy::getName(int key) const
         case SO_PACKAGE_CUSP: return "CUSP";
         case SO_PACKAGE_MKL: return "MKL";
         case SO_PACKAGE_PASO: return "PASO";
-        case SO_PACKAGE_PASTIX: return "PASTIX";
-        case SO_PACKAGE_SUPER_LU: return "SUPER_LU";
         case SO_PACKAGE_TRILINOS: return "TRILINOS";
         case SO_PACKAGE_UMFPACK: return "UMFPACK";
 
@@ -198,6 +203,10 @@ const char* SolverBuddy::getName(int key) const
         case SO_METHOD_CHOLEVSKY: return "CHOLEVSKY";
         case SO_METHOD_CR: return "CR";
         case SO_METHOD_DIRECT: return "DIRECT";
+        case SO_METHOD_DIRECT_MUMPS: return "DIRECT_MUMPS";
+        case SO_METHOD_DIRECT_PARDISO: return "DIRECT_PARDISO";
+        case SO_METHOD_DIRECT_SUPERLU: return "DIRECT_SUPERLU";
+        case SO_METHOD_DIRECT_TRILINOS: return "DIRECT_TRILINOS";
         case SO_METHOD_GMRES: return "GMRES";
         case SO_METHOD_HRZ_LUMPING: return "HRZ_LUMPING";
         case SO_METHOD_ITERATIVE: return "ITERATIVE";
@@ -245,7 +254,7 @@ const char* SolverBuddy::getName(int key) const
         case SO_REORDERING_NESTED_DISSECTION: return "NESTED_DISSECTION";
         case SO_REORDERING_NONE: return "NO_REORDERING";
         default:
-            throw SolverOptionsException("getName() invalid option given");
+            throw ValueError("getName() invalid option given");
     }
     return "invalid option";
 }
@@ -273,9 +282,52 @@ void SolverBuddy::resetDiagnostics(bool all)
     }
 }
 
-void SolverBuddy::updateDiagnostics(std::string name, const bp::object& value)
+void SolverBuddy::updateDiagnostics(const std::string& name, bool value)
 {
+    if (name == "converged") {
+        converged = value;
+    } else if (name == "time_step_backtracking_used") {
+        time_step_backtracking_used = value;
+    } else {
+        throw ValueError(std::string("Unknown diagnostic: ") + name);
+    }
+}
+
+void SolverBuddy::updateDiagnostics(const std::string& name, int value)
+{
+    if (name == "num_iter") {
+        cum_num_iter += num_iter = value;
+    } else if (name == "num_level") {
+        num_level = value;
+    } else if (name == "num_inner_iter") {
+        cum_num_inner_iter += num_inner_iter = value;
+    } else if (name == "num_coarse_unknowns") {
+        num_coarse_unknowns = value;
+    } else {
+        throw ValueError(std::string("Unknown diagnostic: ") + name);
+    }
+}
+
+void SolverBuddy::updateDiagnostics(const std::string& name, double value)
+{
+    if (name == "time") {
+        cum_time += time = value;
+    } else if (name == "set_up_time") {
+        cum_set_up_time += set_up_time = value;
+    } else if (name == "net_time") {
+        cum_net_time += net_time = value;
+    } else if (name == "residual_norm") {
+        residual_norm = value;
+    } else if (name == "coarse_level_sparsity") {
+        coarse_level_sparsity = value;
+    } else {
+        throw ValueError(std::string("Unknown diagnostic: ") + name);
+    }
+}
 
+void SolverBuddy::updateDiagnosticsPy(const std::string& name,
+                                      const bp::object& value)
+{
     int i=0;
     double d=0; // to keep older compilers happy
     bool b=false;
@@ -285,50 +337,50 @@ void SolverBuddy::updateDiagnostics(std::string name, const bp::object& value)
 
     if (name == "num_iter") {
         if (!ib)
-            throw SolverOptionsException("setting num_iter to non-int value");
+            throw ValueError("setting num_iter to non-int value");
         cum_num_iter += num_iter = i;
     } else if (name == "num_level") {
         if (!ib)
-            throw SolverOptionsException("setting num_level to non-int value");
+            throw ValueError("setting num_level to non-int value");
         num_level = i;
     } else if (name == "num_inner_iter") {
         if (!ib)
-            throw SolverOptionsException("setting num_inner_iter to non-int value");
+            throw ValueError("setting num_inner_iter to non-int value");
         cum_num_inner_iter += num_inner_iter = i;
     } else if (name == "time") {
         if (!db)
-            throw SolverOptionsException("setting time to non-double value");
+            throw ValueError("setting time to non-double value");
         cum_time += time = d;
     } else if (name == "set_up_time") {
         if (!db)
-            throw SolverOptionsException("setting set_up_time to non-double value");
+            throw ValueError("setting set_up_time to non-double value");
         cum_set_up_time += set_up_time = d;
     } else if (name == "net_time") {
         if (!db)
-            throw SolverOptionsException("setting net_time to non-double value");
+            throw ValueError("setting net_time to non-double value");
         cum_net_time += net_time = d;
     } else if (name == "residual_norm") {
         if (!db)
-            throw SolverOptionsException("setting residual_norm to non-double value");
+            throw ValueError("setting residual_norm to non-double value");
         residual_norm = d;
     } else if (name == "converged") {
         if (!bb)
-            throw SolverOptionsException("setting converged to non-bool value");
+            throw ValueError("setting converged to non-bool value");
         converged = b;
     } else if (name == "time_step_backtracking_used") {
         if (!bb)
-            throw SolverOptionsException("setting time_step_backtracking_used to non-bool value");
+            throw ValueError("setting time_step_backtracking_used to non-bool value");
         time_step_backtracking_used = b;
     } else if (name == "coarse_level_sparsity") {
         if (!db)
-            throw SolverOptionsException("setting coarse_level_sparsity to non-double value");
+            throw ValueError("setting coarse_level_sparsity to non-double value");
         coarse_level_sparsity = d;
     } else if (name == "num_coarse_unknowns") {
         if (!ib)
-            throw SolverOptionsException("setting num_coarse_unknowns to non-int value");
+            throw ValueError("setting num_coarse_unknowns to non-int value");
         num_coarse_unknowns = i;
     } else {
-        throw SolverOptionsException(std::string("Unknown diagnostic: ") + name);
+        throw ValueError(std::string("Unknown diagnostic: ") + name);
     }
 }
 
@@ -353,8 +405,7 @@ double SolverBuddy::getDiagnostics(const std::string name) const
     else if (name == "coarse_level_sparsity") return coarse_level_sparsity;
     else if (name == "num_coarse_unknowns") return num_coarse_unknowns;
 
-    throw SolverOptionsException(std::string("unknown diagnostic item: ")
-                                 + name);
+    throw ValueError(std::string("unknown diagnostic item: ") + name);
 }
 
 bool SolverBuddy::hasConverged() const
@@ -379,7 +430,7 @@ void SolverBuddy::setCoarsening(int method)
             coarsening = meth;
             break;
         default:
-            throw SolverOptionsException("unknown coarsening method");
+            throw ValueError("unknown coarsening method");
     }
 }
 
@@ -391,7 +442,7 @@ SolverOptions SolverBuddy::getCoarsening() const
 void SolverBuddy::setMinCoarseMatrixSize(int size)
 {
     if (size < 0) {
-        throw SolverOptionsException("minimum size of the coarsest level "
+        throw ValueError("minimum size of the coarsest level "
                                      "matrix must be non-negative.");
     }
     min_coarse_matrix_size = size;
@@ -407,10 +458,12 @@ void SolverBuddy::setPreconditioner(int precon)
     SolverOptions preconditioner = static_cast<SolverOptions>(precon);
     switch(preconditioner) {
         case SO_PRECONDITIONER_AMG:
+/*
 #ifdef ESYS_MPI
-            throw SolverOptionsException("AMG preconditioner is not supported in MPI builds");
+            throw ValueError("AMG preconditioner is not supported in MPI builds");
             break;
 #endif
+*/
         case SO_PRECONDITIONER_AMLI:
         case SO_PRECONDITIONER_BOOMERAMG:
         case SO_PRECONDITIONER_GAUSS_SEIDEL:
@@ -423,7 +476,7 @@ void SolverBuddy::setPreconditioner(int precon)
             this->preconditioner = preconditioner;
             break;
         default:
-            throw SolverOptionsException("unknown preconditioner");
+            throw ValueError("unknown preconditioner");
     }
 }
 
@@ -437,7 +490,7 @@ void SolverBuddy::setSmoother(int s)
     SolverOptions smoother = static_cast<SolverOptions>(s);
     if (smoother != SO_PRECONDITIONER_JACOBI &&
             smoother != SO_PRECONDITIONER_GAUSS_SEIDEL) {
-        throw SolverOptionsException("unknown smoother");
+        throw ValueError("unknown smoother");
     }
     this->smoother = smoother;
 }
@@ -470,20 +523,24 @@ void SolverBuddy::setSolverMethod(int method)
             this->method = meth;
             break;
         case SO_METHOD_DIRECT:
-#ifdef USE_UMFPACK
-            this->method = meth;
-            break;
-#elif defined MKL
-            this->method = meth;
-            break;
-#elif defined PASTIX
+        case SO_METHOD_DIRECT_MUMPS:
+        case SO_METHOD_DIRECT_PARDISO:
+        case SO_METHOD_DIRECT_SUPERLU:
+        case SO_METHOD_DIRECT_TRILINOS:
+#if defined(ESYS_HAVE_UMFPACK) || defined(ESYS_HAVE_TRILINOS) || defined(ESYS_HAVE_MKL)
+#ifndef ESYS_HAVE_TRILINOS
+            // translate specific direct solver setting to generic one for PASO
+            this->method = SO_METHOD_DIRECT;
+#else
             this->method = meth;
+#endif
             break;
 #else
-            throw SolverOptionsException("Cannot use DIRECT solver method, the running escript was not compiled with a direct solver enabled");
+            throw ValueError("Cannot use DIRECT solver method, the running "
+                    "escript was not compiled with a direct solver enabled");
 #endif
         default:
-            throw SolverOptionsException("unknown solver method");
+            throw ValueError("unknown solver method");
     }
 }
 
@@ -501,7 +558,7 @@ void SolverBuddy::setSolverTarget(int target)
             this->target = targ;
             break;
         default:
-            throw SolverOptionsException("unknown solver target");
+            throw ValueError("unknown solver target");
     }
 }
 
@@ -518,14 +575,12 @@ void SolverBuddy::setPackage(int package)
         case SO_PACKAGE_CUSP:
         case SO_PACKAGE_MKL:
         case SO_PACKAGE_PASO:
-        case SO_PACKAGE_PASTIX:
-        case SO_PACKAGE_SUPER_LU:
         case SO_PACKAGE_TRILINOS:
         case SO_PACKAGE_UMFPACK:
             this->package = pack;
             break;
         default:
-            throw SolverOptionsException("unknown solver package");
+            throw ValueError("unknown solver package");
     }
 }
 
@@ -545,7 +600,7 @@ void SolverBuddy::setReordering(int ordering)
             reordering = ord;
             break;
         default:
-            throw SolverOptionsException("unknown reordering strategy");
+            throw ValueError("unknown reordering strategy");
     }
 }
 
@@ -557,7 +612,7 @@ SolverOptions SolverBuddy::getReordering() const
 void SolverBuddy::setRestart(int restart)
 {
     if (restart < 0)
-        throw SolverOptionsException("restart must be non-negative.");
+        throw ValueError("restart must be non-negative.");
 
     this->restart = restart;
 }
@@ -578,7 +633,7 @@ int SolverBuddy::_getRestartForC() const
 void SolverBuddy::setDiagonalDominanceThreshold(double value)
 {
     if (value < 0. || value > 1.)
-        throw SolverOptionsException("Diagonal dominance threshold must be between 0 and 1.");
+        throw ValueError("Diagonal dominance threshold must be between 0 and 1.");
     diagonal_dominance_threshold = value;
 }
 
@@ -590,7 +645,7 @@ double SolverBuddy::getDiagonalDominanceThreshold() const
 void SolverBuddy::setTruncation(int truncation)
 {
     if (truncation < 1)
-        throw SolverOptionsException("truncation must be positive.");
+        throw ValueError("truncation must be positive.");
     this->truncation = truncation;
 }
 
@@ -602,7 +657,7 @@ int SolverBuddy::getTruncation() const
 void SolverBuddy::setInnerIterMax(int iter_max)
 {
     if (iter_max < 1)
-        throw SolverOptionsException("maximum number of inner iteration must be positive.");
+        throw ValueError("maximum number of inner iteration must be positive.");
     inner_iter_max = iter_max;
 }
 
@@ -614,7 +669,7 @@ int SolverBuddy::getInnerIterMax() const
 void SolverBuddy::setIterMax(int iter_max)
 {
     if (iter_max < 1)
-        throw SolverOptionsException("maximum number of iteration steps must be positive.");
+        throw ValueError("maximum number of iteration steps must be positive.");
     this->iter_max = iter_max;
 }
 
@@ -626,7 +681,7 @@ int SolverBuddy::getIterMax() const
 void SolverBuddy::setLevelMax(int level_max)
 {
     if (level_max < 0)
-        throw SolverOptionsException("maximum number of coarsening levels must be non-negative.");
+        throw ValueError("maximum number of coarsening levels must be non-negative.");
     this->level_max = level_max;
 }
 
@@ -648,7 +703,7 @@ int SolverBuddy::getCycleType() const
 void SolverBuddy::setCoarseningThreshold(double theta)
 {
     if (theta < 0. || theta > 1.)
-        throw SolverOptionsException("threshold must be between 0 and 1.");
+        throw ValueError("threshold must be between 0 and 1.");
     coarsening_threshold = theta;
 }
 
@@ -660,7 +715,7 @@ double SolverBuddy::getCoarseningThreshold() const
 void SolverBuddy::setNumSweeps(int sweeps)
 {
     if (sweeps < 1)
-        throw SolverOptionsException("number of sweeps must be positive.");
+        throw ValueError("number of sweeps must be positive.");
     this->sweeps = sweeps;
 }
 
@@ -672,7 +727,7 @@ int SolverBuddy::getNumSweeps() const
 void SolverBuddy::setNumPreSweeps(int sweeps)
 {
     if (sweeps < 1)
-        throw SolverOptionsException("number of pre-sweeps must be positive.");
+        throw ValueError("number of pre-sweeps must be positive.");
     pre_sweeps = sweeps;
 }
 
@@ -684,7 +739,7 @@ int SolverBuddy::getNumPreSweeps() const
 void SolverBuddy::setNumPostSweeps(int sweeps)
 {
     if (sweeps < 1)
-       throw SolverOptionsException("number of post-sweeps must be positive.");
+       throw ValueError("number of post-sweeps must be positive.");
     post_sweeps = sweeps;
 }
 
@@ -696,7 +751,7 @@ int SolverBuddy::getNumPostSweeps() const
 void SolverBuddy::setTolerance(double rtol)
 {
     if (rtol < 0. || rtol > 1.)
-        throw SolverOptionsException("tolerance must be between 0 and 1.");
+        throw ValueError("tolerance must be between 0 and 1.");
     tolerance = rtol;
 }
 
@@ -708,7 +763,7 @@ double SolverBuddy::getTolerance() const
 void SolverBuddy::setAbsoluteTolerance(double atol)
 {
     if (atol < 0.)
-       throw SolverOptionsException("absolute tolerance must be non-negative.");
+       throw ValueError("absolute tolerance must be non-negative.");
     absolute_tolerance = atol;
 }
 
@@ -720,7 +775,7 @@ double SolverBuddy::getAbsoluteTolerance() const
 void SolverBuddy::setInnerTolerance(double rtol)
 {
     if (rtol <= 0. || rtol > 1.)
-        throw SolverOptionsException("tolerance must be positive and less than or equal to 1.");
+        throw ValueError("tolerance must be positive and less than or equal to 1.");
     inner_tolerance = rtol;
 }
 
@@ -732,7 +787,7 @@ double SolverBuddy::getInnerTolerance() const
 void SolverBuddy::setDropTolerance(double drop_tol)
 {
     if (drop_tol < 0. || drop_tol > 1.)
-        throw SolverOptionsException("drop tolerance must be between 0 and 1.");
+        throw ValueError("drop tolerance must be between 0 and 1.");
     drop_tolerance = drop_tol;
 }
 
@@ -744,7 +799,7 @@ double SolverBuddy::getDropTolerance() const
 void SolverBuddy::setDropStorage(double storage)
 {
     if (storage < 1.)
-        throw SolverOptionsException("allowed storage increase must be greater than or equal to 1.");
+        throw ValueError("allowed storage increase must be greater than or equal to 1.");
     drop_storage = storage;
 }
 
@@ -756,7 +811,7 @@ double SolverBuddy::getDropStorage() const
 void SolverBuddy::setRelaxationFactor(double factor)
 {
     if (factor < 0.)
-        throw SolverOptionsException("relaxation factor must be non-negative.");
+        throw ValueError("relaxation factor must be non-negative.");
     relaxation = factor;
 }
 
@@ -765,6 +820,16 @@ double SolverBuddy::getRelaxationFactor() const
     return relaxation;
 }
 
+bool SolverBuddy::isComplex() const
+{
+    return is_complex;
+}
+
+void SolverBuddy::setComplex(bool flag)
+{
+    is_complex = flag;
+}
+
 bool SolverBuddy::isSymmetric() const
 {
     return symmetric;
@@ -883,7 +948,7 @@ void SolverBuddy::setLocalPreconditioner(bool use)
 void SolverBuddy::setMinCoarseMatrixSparsity(double sparsity)
 {
     if (sparsity < 0. || sparsity > 1.)
-        throw SolverOptionsException("sparsity must be between 0 and 1.");
+        throw ValueError("sparsity must be between 0 and 1.");
     min_sparsity = sparsity;
 }
 
@@ -895,7 +960,7 @@ double SolverBuddy::getMinCoarseMatrixSparsity() const
 void SolverBuddy::setNumRefinements(int refinements)
 {
     if (refinements < 0)
-        throw SolverOptionsException("number of refinements must be non-negative.");
+        throw ValueError("number of refinements must be non-negative.");
     this->refinements = refinements;
 }
 
@@ -907,7 +972,7 @@ int SolverBuddy::getNumRefinements() const
 void SolverBuddy::setNumCoarseMatrixRefinements(int refinements)
 {
     if (refinements < 0)
-        throw SolverOptionsException("number of coarse matrix refinements must be non-negative.");
+        throw ValueError("number of coarse matrix refinements must be non-negative.");
     coarse_refinements = refinements;
 }
 
@@ -949,7 +1014,7 @@ void SolverBuddy::setAMGInterpolation(int method)
             amg_interpolation_method = meth;
             break;
         default:
-            throw SolverOptionsException("unknown AMG interpolation method");
+            throw ValueError("unknown AMG interpolation method");
     }
 }
 
@@ -968,7 +1033,7 @@ void SolverBuddy::setODESolver(int method)
             ode_solver = ode;
             break;
         default:
-            throw SolverOptionsException("unknown ODE solver method");
+            throw ValueError("unknown ODE solver method");
     }
 }
 
@@ -977,5 +1042,18 @@ SolverOptions SolverBuddy::getODESolver() const
     return ode_solver;
 }
 
+void SolverBuddy::setTrilinosParameter(const std::string& name,
+                                       const bp::object& value)
+{
+#ifdef ESYS_HAVE_TRILINOS
+    trilinosParams[name] = value;
+#endif
+}
+
+bp::dict SolverBuddy::getTrilinosParameters() const
+{
+    return trilinosParams;
+}
+
 } // namespace escript
 
diff --git a/escriptcore/src/SolverOptions.h b/escriptcore/src/SolverOptions.h
index 46e049e..8e10a2a 100644
--- a/escriptcore/src/SolverOptions.h
+++ b/escriptcore/src/SolverOptions.h
@@ -17,6 +17,7 @@
 #ifndef __ESCRIPT_SOLVEROPTIONS_H__
 #define __ESCRIPT_SOLVEROPTIONS_H__
 
+#include <boost/python/dict.hpp>
 #include <boost/python/object.hpp>
 #include "system_dep.h"
 
@@ -32,8 +33,6 @@ SO_TARGET_GPU: use GPUs to solve system
 SO_PACKAGE_CUSP: CUDA sparse linear algebra package
 SO_PACKAGE_MKL: Intel's MKL solver library
 SO_PACKAGE_PASO: PASO solver package
-SO_PACKAGE_PASTIX: the Pastix direct solver_package
-SO_PACKAGE_SUPER_LU: the Super_LU solver package
 SO_PACKAGE_TRILINOS: The TRILINOS parallel solver class library from Sandia National Labs
 SO_PACKAGE_UMFPACK: The UMFPACK library
 
@@ -41,7 +40,11 @@ SO_METHOD_BICGSTAB: The stabilized Bi-Conjugate Gradient method
 SO_METHOD_CHOLEVSKY: The direct solver based on LDLT factorization (can only be applied for symmetric PDEs)
 SO_METHOD_CGS: The conjugate gradient square method
 SO_METHOD_CR: The conjugate residual method
-SO_METHOD_DIRECT: The direct solver based on LDU factorization
+SO_METHOD_DIRECT: A direct solver based on LDU factorization
+SO_METHOD_DIRECT_MUMPS: MUMPS parallel direct solver
+SO_METHOD_DIRECT_PARDISO: MKL Pardiso direct solver
+SO_METHOD_DIRECT_SUPERLU: SuperLU direct solver
+SO_METHOD_DIRECT_TRILINOS: Trilinos-based direct solver
 SO_METHOD_GMRES: The Gram-Schmidt minimum residual method
 SO_METHOD_HRZ_LUMPING: Matrix lumping using the HRZ approach
 SO_METHOD_ITERATIVE: The default iterative solver
@@ -98,8 +101,6 @@ enum SolverOptions
     SO_PACKAGE_CUSP,
     SO_PACKAGE_MKL,
     SO_PACKAGE_PASO,
-    SO_PACKAGE_PASTIX,
-    SO_PACKAGE_SUPER_LU,
     SO_PACKAGE_TRILINOS,
     SO_PACKAGE_UMFPACK,
 
@@ -110,6 +111,10 @@ enum SolverOptions
     SO_METHOD_CHOLEVSKY,
     SO_METHOD_CR,
     SO_METHOD_DIRECT,
+    SO_METHOD_DIRECT_MUMPS,
+    SO_METHOD_DIRECT_PARDISO,
+    SO_METHOD_DIRECT_SUPERLU,
+    SO_METHOD_DIRECT_TRILINOS,
     SO_METHOD_GMRES,
     SO_METHOD_HRZ_LUMPING,
     SO_METHOD_ITERATIVE,
@@ -160,10 +165,27 @@ enum SolverOptions
     SO_REORDERING_NONE
 };
 
+/// returns true if the passed solver method refers to a direct solver type
+inline bool isDirectSolver(const SolverOptions& method)
+{
+    switch (method) {
+        case SO_METHOD_DIRECT:
+        case SO_METHOD_DIRECT_MUMPS:
+        case SO_METHOD_DIRECT_PARDISO:
+        case SO_METHOD_DIRECT_SUPERLU:
+        case SO_METHOD_DIRECT_TRILINOS:
+            return true;
+        default:
+            break;
+    }
+    return false;
+}
+
 class ESCRIPT_DLL_API SolverBuddy
 {
 public:
     SolverBuddy();
+    ~SolverBuddy();
 
     /**
         Returns a string reporting the current settings
@@ -193,8 +215,12 @@ public:
                  "net_time", "residual_norm", "converged").
         \param value new value of the diagnostic information
     */
-    void updateDiagnostics(const std::string key,
-                           const boost::python::object& value);
+    void updateDiagnosticsPy(const std::string& key,
+                             const boost::python::object& value);
+
+    void updateDiagnostics(const std::string& key, bool value);
+    void updateDiagnostics(const std::string& key, int value);
+    void updateDiagnostics(const std::string& key, double value);
 
     /**
         Returns the diagnostic information for the given ``name``.
@@ -311,7 +337,9 @@ public:
         iterative rather than a direct solver should be used.
 
         \param method key of the solver method to be used, should be in
-            `SO_DEFAULT`, `SO_METHOD_DIRECT`, `SO_METHOD_CHOLEVSKY`,
+            `SO_DEFAULT`, `SO_METHOD_DIRECT`, `SO_METHOD_DIRECT_MUMPS`,
+            `SO_METHOD_DIRECT_PARDISO`, `SO_METHOD_DIRECT_SUPERLU`,
+            `SO_METHOD_DIRECT_TRILINOS`, `SO_METHOD_CHOLEVSKY`,
             `SO_METHOD_PCG`, `SO_METHOD_CR`, `SO_METHOD_CGS`,
             `SO_METHOD_BICGSTAB`, `SO_METHOD_GMRES`, `SO_METHOD_PRES20`,
             `SO_METHOD_ROWSUM_LUMPING`, `SO_METHOD_HRZ_LUMPING`,
@@ -351,8 +379,8 @@ public:
 
         \param package key of the solver package to be used, should be in
                `SO_DEFAULT`, `SO_PACKAGE_CUSP`, `SO_PACKAGE_PASO`,
-               `SO_PACKAGE_PASTIX`, `SO_PACKAGE_MKL`, `SO_PACKAGE_UMFPACK`,
-               `SO_PACKAGE_SUPER_LU`, `SO_PACKAGE_TRILINOS`
+               `SO_PACKAGE_MKL`, `SO_PACKAGE_UMFPACK`,
+               `SO_PACKAGE_TRILINOS`
 
         \note Not all packages are supported on all implementation.
               An exception may be thrown on some platforms if the selected
@@ -615,6 +643,20 @@ public:
     double getRelaxationFactor() const;
 
     /**
+        Checks if the coefficient matrix is set to be complex-valued.
+
+        \return true if a complex-valued PDE is indicated, false otherwise
+    */
+    bool isComplex() const;
+
+    /**
+        Sets the complex flag for the coefficient matrix to ``flag``.
+
+        \param complex If true, the complex flag is set otherwise reset.
+    */
+    void setComplex(bool complex);
+
+    /**
         Checks if symmetry of the coefficient matrix is indicated.
 
         \return true if a symmetric PDE is indicated, false otherwise
@@ -844,8 +886,27 @@ public:
     */
     SolverOptions getODESolver() const;
 
+    /**
+        Sets a Trilinos preconditioner/solver parameter.
+        \note Escript does not check for validity of the parameter name
+        (e.g. spelling mistakes). Parameters are passed 1:1 to escript's
+        Trilinos wrapper and from there to the relevant Trilinos package.
+        See the relevant Trilinos documentation for valid parameter strings
+        and values.
+        \note This method does nothing in a non-Trilinos build.
+    */
+    void setTrilinosParameter(const std::string& name,
+                              const boost::python::object& value);
+
+    /**
+        Returns a boost python dictionary of set Trilinos parameters.
+        \note This method returns an empty dictionary in a non-Trilinos build.
+    */
+    boost::python::dict getTrilinosParameters() const;
 
 protected:
+    boost::python::dict trilinosParams;
+
     SolverOptions target;
     SolverOptions package;
     SolverOptions method;
@@ -869,6 +930,7 @@ protected:
     int inner_iter_max;
     int truncation;
     int restart; //0 will have to be None in python, will get tricky
+    bool is_complex;
     bool symmetric;
     bool verbose;
     bool adapt_inner_tolerance;
diff --git a/escriptcore/src/SolverOptionsException.cpp b/escriptcore/src/SolverOptionsException.cpp
deleted file mode 100644
index ca54343..0000000
--- a/escriptcore/src/SolverOptionsException.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include "SolverOptionsException.h"
-
-namespace escript {
-
-const std::string SolverOptionsException::exceptionNameValue("SolverOptionsException");
-
-const std::string& SolverOptionsException::exceptionName() const {
-    return exceptionNameValue;
-}
-
-}
-
diff --git a/escriptcore/src/SolverOptionsException.h b/escriptcore/src/SolverOptionsException.h
deleted file mode 100644
index 291d09b..0000000
--- a/escriptcore/src/SolverOptionsException.h
+++ /dev/null
@@ -1,91 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#ifndef __SOLVEROPTIONS_EXCEPTION_H__
-#define __SOLVEROPTIONS_EXCEPTION_H__
-
-#include "system_dep.h"
-#include <esysUtils/EsysException.h>
-
-namespace escript {
-
-/**
-   \brief
-   SolverOptionsException exception class.
-*/
-class ESCRIPT_DLL_API SolverOptionsException : public esysUtils::EsysException
-{
-protected:
-    typedef EsysException Parent;
-
-public:
-    /**
-       \brief
-       Default constructor for the exception.
-    */
-    SolverOptionsException() : Parent() { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    SolverOptionsException(const char *cstr) : Parent(cstr) { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    SolverOptionsException(const std::string &str) : Parent(str) { updateMessage(); }
-
-    /**
-       \brief
-       Copy Constructor.
-    */
-    SolverOptionsException(const SolverOptionsException &other) : Parent(other)
-    {
-        updateMessage();
-    }
-
-    /// Destructor
-    virtual ~SolverOptionsException() THROW(NO_ARG) {}
-
-    /**
-       \brief
-       Assignment operator.
-    */
-    inline SolverOptionsException& operator=(const SolverOptionsException &other ) THROW(NO_ARG)
-    {
-        Parent::operator=(other);
-        updateMessage();
-        return *this;
-    }
-
-    /**
-       \brief
-       Returns the name of the exception.
-    */
-    virtual const std::string& exceptionName() const;
-
-private:
-    //
-    // the exception name is immutable and class-wide.
-    static const std::string exceptionNameValue;
-};
-
-}
-
-#endif
-
diff --git a/escriptcore/src/SplitWorld.cpp b/escriptcore/src/SplitWorld.cpp
index c54b8e9..153bc82 100644
--- a/escriptcore/src/SplitWorld.cpp
+++ b/escriptcore/src/SplitWorld.cpp
@@ -13,18 +13,12 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "esysUtils/Esys_MPI.h"
 #include "SplitWorld.h"
+
 #include "AbstractDomain.h"
 #include "SplitWorldException.h"
-#include "SplitWorldException.h"
-#include "esysUtils/pyerr.h"
+#include "pyerr.h"
 
-#include <iostream>
 #include <sstream>
 
 using namespace boost::python;
@@ -32,7 +26,7 @@ using namespace escript;
 namespace rs=escript::reducerstatus;
 
 
-double SplitWorld::getScalarVariable(const std::string& name)
+DataTypes::real_t SplitWorld::getScalarVariable(const std::string& name)
 {
     // do we have a variable of that name?
     return localworld->getScalarVariable(name);
@@ -45,16 +39,16 @@ boost::python::object SplitWorld::getLocalObjectVariable(const std::string& name
 }
 
 
-
 SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
-    :localworld((SubWorld*)0), swcount(numgroups>0?numgroups:1), jobcounter(1), manualimport(false)
+    : localworld((SubWorld*)0), swcount(numgroups > 0 ? numgroups : 1),
+      jobcounter(1), manualimport(false)
 {
-    globalcom=esysUtils::makeInfo(global);
+    globalcom = makeInfo(global);
     
     int grank=0;
     int wsize=1;		// each world has this many processes
-    esysUtils::JMPI subcom;	// communicator linking other processes in this subworld
-    esysUtils::JMPI corrcom;	// communicator linking corresponding processes in different subworlds
+    JMPI subcom;	// communicator linking other processes in this subworld
+    JMPI corrcom;	// communicator linking corresponding processes in different subworlds
     #ifdef ESYS_MPI
 	int gsize=globalcom->size;
 	grank=globalcom->rank;
@@ -69,7 +63,7 @@ SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
 	{
 	    throw SplitWorldException("SplitWorld error: Unable to form communicator.");
 	}
-	subcom=esysUtils::makeInfo(sub,true);
+	subcom=makeInfo(sub,true);
 	
 	
 	MPI_Comm corrsub;
@@ -78,7 +72,7 @@ SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
 	{
 	    throw SplitWorldException("SplitWorld error: Unable to form communicator.");
 	}
-	corrcom=esysUtils::makeInfo(corrsub,true);
+	corrcom=makeInfo(corrsub,true);
 	
     #else
 	if (numgroups!=1)
@@ -86,8 +80,8 @@ SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
 	    throw SplitWorldException("SplitWorld error: non-MPI builds can only create 1 subworld.");
 	  
 	}
-	subcom=esysUtils::makeInfo(0);
-	corrcom=esysUtils::makeInfo(0);
+	subcom=makeInfo(0);
+	corrcom=makeInfo(0);
     #endif
     localworld=SubWorld_ptr(new SubWorld(globalcom, subcom,corrcom, swcount, grank/wsize,manualimport));
     localid=grank/wsize;
@@ -128,7 +122,7 @@ object SplitWorld::buildDomains(tuple t, dict kwargs)
 // Executes all pending jobs on all subworlds
 void SplitWorld::runJobs()
 {
-    esysUtils::NoCOMM_WORLD ncw;	// it's destructor will unset the flag
+    NoCOMM_WORLD ncw;	// it's destructor will unset the flag
     localworld->resetInterest();  
     localworld->newRunJobs();
     try 
@@ -172,7 +166,7 @@ void SplitWorld::runJobs()
 	} while (false);
         int res=mres;
         // now we find out about the other worlds
-        if (!esysUtils::checkResult(res, mres, globalcom))
+        if (!checkResult(res, mres, globalcom))
         {
 	    throw SplitWorldException("MPI appears to have failed.");
         }
@@ -197,7 +191,7 @@ void SplitWorld::runJobs()
 	    char* resultstr=0;
 	    // now we ship around the error message - This should be safe since
 	    // eveyone must have finished their Jobs to get here
-	    if (!esysUtils::shipString(err.c_str(), &resultstr, globalcom->comm))
+	    if (!shipString(err.c_str(), &resultstr, globalcom->comm))
 	    {
 		throw SplitWorldException("MPI appears to have failed.");
 	    }
@@ -261,13 +255,13 @@ void SplitWorld::addJobPerWorld(boost::python::object creator, boost::python::tu
     
     // MPI check to ensure that it worked for everybody
     int mstat=0;
-    if (!esysUtils::checkResult(errstat, mstat, globalcom))
+    if (!checkResult(errstat, mstat, globalcom))
     {
 	throw SplitWorldException("MPI appears to have failed.");
     }
     
       // Now we need to find out if anyone else had an error      
-    if (!esysUtils::checkResult(errstat, mstat, globalcom))
+    if (!checkResult(errstat, mstat, globalcom))
     {
 	throw SplitWorldException("MPI appears to have failed.");
     }
@@ -278,7 +272,7 @@ void SplitWorld::addJobPerWorld(boost::python::object creator, boost::python::tu
 	char* resultstr=0;
 	// now we ship around the error message - This should be safe since
 	// eveyone must have finished their Jobs to get here
-	if (!esysUtils::shipString(errmsg.c_str(), &resultstr, globalcom->comm))
+	if (!shipString(errmsg.c_str(), &resultstr, globalcom->comm))
 	{
 	    throw SplitWorldException("MPI appears to have failed.");
 	}      
@@ -405,13 +399,13 @@ void SplitWorld::distributeJobs()
     
     // MPI check to ensure that it worked for everybody
     int mstat=0;
-    if (!esysUtils::checkResult(errstat, mstat, globalcom))
+    if (!checkResult(errstat, mstat, globalcom))
     {
 	throw SplitWorldException("MPI appears to have failed.");
     }
     
       // Now we need to find out if anyone else had an error      
-    if (!esysUtils::checkResult(errstat, mstat, globalcom))
+    if (!checkResult(errstat, mstat, globalcom))
     {
 	throw SplitWorldException("MPI appears to have failed.");
     }
@@ -422,7 +416,7 @@ void SplitWorld::distributeJobs()
 	char* resultstr=0;
 	// now we ship around the error message - This should be safe since
 	// eveyone must have finished their Jobs to get here
-	if (!esysUtils::shipString(errmsg.c_str(), &resultstr, globalcom->comm))
+	if (!shipString(errmsg.c_str(), &resultstr, globalcom->comm))
 	{
 	    throw SplitWorldException("MPI appears to have failed.");
 	}      
@@ -541,6 +535,5 @@ boost::python::object raw_addVariable(boost::python::tuple t, boost::python::dic
     return object();
 }
 
-
-
 }
+
diff --git a/escriptcore/src/SplitWorld.h b/escriptcore/src/SplitWorld.h
index 59b1792..d75a032 100644
--- a/escriptcore/src/SplitWorld.h
+++ b/escriptcore/src/SplitWorld.h
@@ -16,11 +16,13 @@
 
 #ifndef escript_SplitWorld_H
 #define escript_SplitWorld_H
+
+#include "AbstractReducer.h"
+#include "SubWorld.h"
+
 #include <boost/python.hpp>
 #include <boost/smart_ptr.hpp>
-#include "esysUtils/Esys_MPI.h"
-#include "SubWorld.h"
-#include "AbstractReducer.h"
+
 namespace escript
 {
 
@@ -54,7 +56,7 @@ public:
 
     void clearAllJobs();
 
-    double getScalarVariable(const std::string& name);
+    DataTypes::real_t getScalarVariable(const std::string& name);
     boost::python::object getLocalObjectVariable(const std::string& name);
     
     int getSubWorldCount();
@@ -64,8 +66,8 @@ public:
     
     
 private:    
-    esysUtils::JMPI globalcom;	// communicator linking all procs used in this splitworld
-    esysUtils::JMPI leadercom;	// communicator linking the first proc in each subworld
+    JMPI globalcom;	// communicator linking all procs used in this splitworld
+    JMPI leadercom;	// communicator linking the first proc in each subworld
     escript::SubWorld_ptr localworld;	// subworld which this process belongs to
     unsigned int swcount;		// number of subwords
     unsigned int localid;		// position of localworld in overall world sequence
@@ -104,3 +106,4 @@ boost::python::object raw_addJobPerWorld(boost::python::tuple t, boost::python::
 boost::python::object raw_addVariable(boost::python::tuple t, boost::python::dict kwargs);
 }
 #endif
+
diff --git a/escriptcore/src/SplitWorldException.cpp b/escriptcore/src/SplitWorldException.cpp
deleted file mode 100644
index 0986f1b..0000000
--- a/escriptcore/src/SplitWorldException.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2014-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-// Adapted from FunctionSpaceException.cpp
-
-#include "SplitWorldException.h"
-
-
-using namespace escript;
-
-
-const std::string 
-SplitWorldException::exceptionNameValue("SplitWorldException");
-
-
-const std::string &
-SplitWorldException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
diff --git a/escriptcore/src/SplitWorldException.h b/escriptcore/src/SplitWorldException.h
index 7708f60..59c7183 100644
--- a/escriptcore/src/SplitWorldException.h
+++ b/escriptcore/src/SplitWorldException.h
@@ -19,87 +19,19 @@
 #ifndef  __ESCRIPT_SPLITWORLDEXCEPTION_H__
 #define  __ESCRIPT_SPLITWORLDEXCEPTION_H__
 
-#include "system_dep.h"
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
 namespace escript
 {
 
-  /**
-  \brief
-  SplitWorldException exception class.
-
-  Description:
-  SplitWorldException exception class.
-  The class provides a public function returning the exception name
-  */
-  class SplitWorldException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SplitWorldException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SplitWorldException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SplitWorldException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SplitWorldException(const SplitWorldException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESCRIPT_DLL_API
-    inline SplitWorldException &
-    operator=(const SplitWorldException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-
-    /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~SplitWorldException() THROW(NO_ARG) {}
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string& exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
+class SplitWorldException : public EsysException
+{
+public:
+    SplitWorldException(const std::string& str) : EsysException(str) {}
+    virtual ~SplitWorldException() throw() {}
+};
 
 } // end of namespace
 
-#endif //  __ESCRIPT_SPLITWORLDEXCEPTION_H__
+#endif // __ESCRIPT_SPLITWORLDEXCEPTION_H__
 
diff --git a/escriptcore/src/SubWorld.cpp b/escriptcore/src/SubWorld.cpp
index 4af3c03..c0af186 100644
--- a/escriptcore/src/SubWorld.cpp
+++ b/escriptcore/src/SubWorld.cpp
@@ -14,17 +14,12 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "SubWorld.h"
-#include "SplitWorldException.h"
-#include "esysUtils/pyerr.h"
-
 #include "MPIDataReducer.h"
 #include "MPIScalarReducer.h"
 #include "NonReducedVariable.h"
+#include "SplitWorldException.h"
+#include "pyerr.h"
 
 #include <boost/python/import.hpp>
 #include <boost/python/dict.hpp>
@@ -32,20 +27,21 @@
 #include <iostream>
 
 using namespace escript;
-namespace bp=boost::python;
-using namespace esysUtils;
-namespace rs=escript::reducerstatus;
+namespace bp = boost::python;
+namespace rs = escript::reducerstatus;
 
 using namespace std;
 
-SubWorld::SubWorld(JMPI& global, JMPI& comm, JMPI& corr, unsigned int subworldcount, unsigned int local_id, bool manualimport)
-    :everyone(global), swmpi(comm), corrmpi(corr), domain((AbstractDomain*)0), 
+SubWorld::SubWorld(JMPI& global, JMPI& comm, JMPI& corr,
+                   unsigned int subworldcount, unsigned int local_id,
+                   bool manualimport)
+    : everyone(global), swmpi(comm), corrmpi(corr), domain((AbstractDomain*)0),
     swcount(subworldcount), localid(local_id), manualimports(manualimport)
-#ifdef ESYS_MPI    
+#ifdef ESYS_MPI
     ,globalinfoinvalid(true)
-#endif    
+#endif
 {
-	swcount=subworldcount;	// redundant to keep clang happy
+        swcount=subworldcount;  // redundant to keep clang happy
 }
 
 SubWorld::~SubWorld()
@@ -72,7 +68,7 @@ Domain_ptr SubWorld::getDomain()
     return domain;
 }
 
-void SubWorld::addJob(boost::python::object j)
+void SubWorld::addJob(bp::object j)
 {
     jobvec.push_back(j);
 }
@@ -89,71 +85,69 @@ void SubWorld::setMyVarState(const std::string& vname, char state)
 
 void SubWorld::setAllVarsState(const std::string& vname, char state)
 {
-#ifdef ESYS_MPI  
+#ifdef ESYS_MPI
       // we need to know where the variable is in the sequence
     str2char::iterator it=varstate.find(vname);
     size_t c=0;
     for (;it!=varstate.end();++it,++c)
     {
-	if (it->first==vname)
-	{
-	    break;
-	}
+        if (it->first==vname)
+        {
+            break;
+        }
     }
     if (it==varstate.end())
     {
-	return;
+        return;
     }
     it->second=state;
-    c--;		// we now have the sequence position of the variable
+    c--;                // we now have the sequence position of the variable
     for (char z=rs::NONE; z<=rs::NEW;++z)
     {
-	globalvarcounts[vname][z]=0;
+        globalvarcounts[vname][z]=0;
     }
     globalvarcounts[vname][state]=swcount;
-    if (!globalinfoinvalid)	// it will be updated in the next synch
+    if (!globalinfoinvalid)     // it will be updated in the next synch
     {
-	for (size_t p=c;p<globalvarinfo.size();p+=getNumVars())
-	{
-	    globalvarinfo[p]=state;
-	}
+        for (size_t p=c;p<globalvarinfo.size();p+=getNumVars())
+        {
+            globalvarinfo[p]=state;
+        }
     }
 #else
     varstate[vname]=state;
-#endif    
-  
-  
+#endif
 }
 
 
 void SubWorld::setVarState(const std::string& vname, char state, int swid)
 {
-#ifdef ESYS_MPI  
+#ifdef ESYS_MPI
       // we need to know where the variable is in thbe sequence
     str2char::iterator it;
     size_t c=0;
     for (it=varstate.begin();it!=varstate.end();++it,++c)
     {
-	if (it->first==vname)
-	{
-	    break;
-	}
+        if (it->first==vname)
+        {
+            break;
+        }
     }
     if (it==varstate.end())
     {
-	return;
+        return;
     }
-	// we now have the sequence position of the variable
-    if (!globalinfoinvalid)	// it will be updated in the next synch
+        // we now have the sequence position of the variable
+    if (!globalinfoinvalid)     // it will be updated in the next synch
     {
-	unsigned char ostate=globalvarinfo[c+getNumVars()*swid];
-	globalvarinfo[c+getNumVars()*swid]=state;
-	globalvarcounts[vname][ostate]--;
-	globalvarcounts[vname][state]++;
+        unsigned char ostate=globalvarinfo[c+getNumVars()*swid];
+        globalvarinfo[c+getNumVars()*swid]=state;
+        globalvarcounts[vname][ostate]--;
+        globalvarcounts[vname][state]++;
     }
-    if (swid==localid)	// we are updating our own state so we need to change "varstate"
+    if (swid==localid)  // we are updating our own state so we need to change "varstate"
     {
-	it->second=state;
+        it->second=state;
     }
 #else
     varstate[vname]=state;
@@ -166,51 +160,51 @@ bool SubWorld::deliverImports(std::string& errmsg)
 {
     for (size_t i=0;i<jobvec.size();++i)
     {
-	if (manualimports)
-	{
-	    bp::list wanted=bp::extract<bp::list>(jobvec[i].attr("wantedvalues"))();		  
-	    for (size_t j=0;j<len(wanted);++j)
-	    {
-		bp::extract<std::string> exs(wanted[j]);	// must have been checked by now
-		std::string n=exs();
-		  // now we need to check to see if this value is known
-		str2reduce::iterator it=reducemap.find(n);
-		if (it==reducemap.end())
-		{
-		    errmsg="Attempt to import variable \""+n+"\". SplitWorld was not told about this variable.";
-		    return false;
-		}
-		try
-		{
-		    jobvec[i].attr("setImportValue")(it->first, reducemap[it->first]->getPyObj());
-		}
-		catch (boost::python::error_already_set e)
-		{
-		    getStringFromPyException(e, errmsg);   	      
-		    return false;
-		} 
-	    }
-	}
-	else
-	{
-	      // For automatic imports, we want to import "Everything" into every job.
-	      // However, we don't want to import things with no value yet
-	    for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it)
-	    {
-		if (it->second->hasValue())
-		{		  
-		    try
-		    {
-			jobvec[i].attr("setImportValue")(it->first, it->second->getPyObj());
-		    }
-		    catch (boost::python::error_already_set e)
-		    {
-			getStringFromPyException(e, errmsg);   	      
-			return false;
-		    }
-		}
-	    }
-	}
+        if (manualimports)
+        {
+            bp::list wanted=bp::extract<bp::list>(jobvec[i].attr("wantedvalues"))();
+            for (size_t j=0;j<len(wanted);++j)
+            {
+                bp::extract<std::string> exs(wanted[j]);        // must have been checked by now
+                std::string n=exs();
+                  // now we need to check to see if this value is known
+                str2reduce::iterator it=reducemap.find(n);
+                if (it==reducemap.end())
+                {
+                    errmsg="Attempt to import variable \""+n+"\". SplitWorld was not told about this variable.";
+                    return false;
+                }
+                try
+                {
+                    jobvec[i].attr("setImportValue")(it->first, reducemap[it->first]->getPyObj());
+                }
+                catch (bp::error_already_set e)
+                {
+                    getStringFromPyException(e, errmsg);
+                    return false;
+                }
+            }
+        }
+        else
+        {
+              // For automatic imports, we want to import "Everything" into every job.
+              // However, we don't want to import things with no value yet
+            for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it)
+            {
+                if (it->second->hasValue())
+                {
+                    try
+                    {
+                        jobvec[i].attr("setImportValue")(it->first, it->second->getPyObj());
+                    }
+                    catch (bp::error_already_set e)
+                    {
+                        getStringFromPyException(e, errmsg);
+                        return false;
+                    }
+                }
+            }
+        }
     }
     return true;
 }
@@ -219,47 +213,47 @@ bool SubWorld::deliverImports(std::string& errmsg)
 bool SubWorld::localTransport(std::string& errmsg)
 {
     for (size_t i=0;i<jobvec.size();++i)
-    {  
-	bp::dict expmap=bp::extract<bp::dict>(jobvec[i].attr("exportedvalues"))();	
-	bp::list items=expmap.items();
-	size_t l=bp::len(items);
-	for (int j=0;j<l;++j)
-	{
-	    bp::object o1=items[j][0];
-	    bp::object o2=items[j][1];
-	    bp::extract<std::string> ex1(o1);
-	    if (!ex1.check())
-	    {
-		errmsg="Job attempted export using a name which was not a string.";
-		return false;
-	    }
-	    std::string name=ex1();
-	    std::map<std::string, Reducer_ptr>::iterator it=reducemap.find(name);
-	    if (it==reducemap.end())
-	    {
-		errmsg="Attempt to export variable \""+name+"\". SplitWorld was not told about this variable.";
-		return false;
-	    }
-	    // so now we know it is a known name, we check that it is not None and that it is compatible
-	    if (o2.is_none())
-	    {
-		errmsg="Attempt to export variable \""+name+"\" with value of None, this is not permitted.";
-		return false;
-	    }
-	    if (!(it->second)->valueCompatible(o2))
-	    {
-		errmsg="Attempt to export variable \""+name+"\" with an incompatible value. Using ";
-		errmsg+=(it->second)->description();
-		return false;
-	    }
-	    if (!(it->second)->reduceLocalValue(o2, errmsg))
-	    {
-		return false;	// the error string will be set by the reduceLocalValue
-	    }
-	    setMyVarState(name, rs::NEW);
-	}
-    } 
-    return true;      
+    {
+        bp::dict expmap=bp::extract<bp::dict>(jobvec[i].attr("exportedvalues"))();      
+        bp::list items=expmap.items();
+        size_t l=bp::len(items);
+        for (int j=0;j<l;++j)
+        {
+            bp::object o1=items[j][0];
+            bp::object o2=items[j][1];
+            bp::extract<std::string> ex1(o1);
+            if (!ex1.check())
+            {
+                errmsg="Job attempted export using a name which was not a string.";
+                return false;
+            }
+            std::string name=ex1();
+            std::map<std::string, Reducer_ptr>::iterator it=reducemap.find(name);
+            if (it==reducemap.end())
+            {
+                errmsg="Attempt to export variable \""+name+"\". SplitWorld was not told about this variable.";
+                return false;
+            }
+            // so now we know it is a known name, we check that it is not None and that it is compatible
+            if (o2.is_none())
+            {
+                errmsg="Attempt to export variable \""+name+"\" with value of None, this is not permitted.";
+                return false;
+            }
+            if (!(it->second)->valueCompatible(o2))
+            {
+                errmsg="Attempt to export variable \""+name+"\" with an incompatible value. Using ";
+                errmsg+=(it->second)->description();
+                return false;
+            }
+            if (!(it->second)->reduceLocalValue(o2, errmsg))
+            {
+                return false;   // the error string will be set by the reduceLocalValue
+            }
+            setMyVarState(name, rs::NEW);
+        }
+    }
+    return true;
 }
 
 void SubWorld::debug()
@@ -268,116 +262,113 @@ void SubWorld::debug()
     using namespace escript::reducerstatus;
     std::cout << "Variables:";
 #ifdef ESYS_MPI
-	if (!globalinfoinvalid)
-	{
-	    cout << "{ NONE INTR OLD OINT NEW }";
-	}
-	else
-	{
-	    cout << "(no valid global info)";
-	}
-#endif	    
+        if (!globalinfoinvalid)
+        {
+            cout << "{ NONE INTR OLD OINT NEW }";
+        }
+        else
+        {
+            cout << "(no valid global info)";
+        }
+#endif
     std::cout << std::endl;
     int i=0;
     for (str2char::iterator it=varstate.begin();it!=varstate.end();++it,++i)
     {
-	std::cout << it->first << ": ";
-	std::cout << reducemap[it->first]->description() << " ";
-	switch (it->second)
-	{
-	  case NONE: cout << "NONE "; break;
-	  case INTERESTED: cout << "INTR "; break;
-	  case OLDINTERESTED: cout << "OINT "; break;
-	  case OLD: cout << "OLD  "; break;
-	  case NEW: cout << "NEW  "; break;
-	}
+        std::cout << it->first << ": ";
+        std::cout << reducemap[it->first]->description() << " ";
+        switch (it->second)
+        {
+          case NONE: cout << "NONE "; break;
+          case INTERESTED: cout << "INTR "; break;
+          case OLDINTERESTED: cout << "OINT "; break;
+          case OLD: cout << "OLD  "; break;
+          case NEW: cout << "NEW  "; break;
+        }
 #ifdef ESYS_MPI
-	if (!globalinfoinvalid)
-	{
-	    cout << "{ ";
-	    for (unsigned char z=rs::NONE;z<=rs::NEW;++z)
-	    {
-		cout << globalvarcounts[it->first][z] << ' ';
-	    }
-	    cout << " } ";
-	    
-	}
-	else
-	{
-	    cout << "(no valid global info)";
-	}
-#endif	
-	cout << endl;
+        if (!globalinfoinvalid)
+        {
+            cout << "{ ";
+            for (unsigned char z=rs::NONE;z<=rs::NEW;++z)
+            {
+                cout << globalvarcounts[it->first][z] << ' ';
+            }
+            cout << " } ";
+        }
+        else
+        {
+            cout << "(no valid global info)";
+        }
+#endif  
+        cout << endl;
     }
-    
-#ifdef ESYS_MPI
 
+#ifdef ESYS_MPI
     if (!globalinfoinvalid)
     {
-	cout << "[";
-	for (size_t i=0;i<globalvarinfo.size();++i)
-	{
-	    if (i%getNumVars()==0)
-	    {
-		cout << " ";
-	    }
-	    cout << (short)globalvarinfo[i];
-	}
-	cout << " ] ";
-	
+        cout << "[";
+        for (size_t i=0;i<globalvarinfo.size();++i)
+        {
+            if (i%getNumVars()==0)
+            {
+                cout << " ";
+            }
+            cout << (short)globalvarinfo[i];
+        }
+        cout << " ] ";
+        
     }
 
 
 #endif
     std::cout << "Debug end\n";
     std::cout.flush();
-  
 }
 
 
 // not to be called while running jobs
 // The tricky bit, is that this could be be called between job runs
 // this means that the values of variables may not have been synched yet
-double SubWorld::getScalarVariable(const std::string& name)
+DataTypes::real_t SubWorld::getScalarVariable(const std::string& name)
 {
     str2reduce::iterator it=reducemap.find(name);
     if (it==reducemap.end())
     {
-	throw SplitWorldException("No variable of that name.");
+        throw SplitWorldException("No variable of that name.");
     }
-	// need to indicate we are interested in the variable
+        // need to indicate we are interested in the variable
     if (varstate[name]==rs::NONE)
     {
-	setMyVarState(name, rs::INTERESTED);
+        setMyVarState(name, rs::INTERESTED);
     }
     else if (varstate[name]==rs::OLD)
     {
-      	setMyVarState(name, rs::OLDINTERESTED);
+        setMyVarState(name, rs::OLDINTERESTED);
     }
-	// anything else, indicates interest anyway
+        // anything else, indicates interest anyway
 #ifdef ESYS_MPI
     std::string errmsg;
     if (!synchVariableInfo(errmsg))
     {
-	throw SplitWorldException(std::string("(Getting scalar --- Variable information) ")+errmsg);
+        throw SplitWorldException(std::string("(Getting scalar --- Variable information) ")+errmsg);
     }
     if (!synchVariableValues(errmsg))
     {
-	throw SplitWorldException(std::string("(Getting scalar --- Variable value) ")+errmsg);
+        throw SplitWorldException(std::string("(Getting scalar --- Variable value) ")+errmsg);
     }
 #endif
     if (dynamic_cast<MPIScalarReducer*>(it->second.get()))
     {
-	return dynamic_cast<MPIScalarReducer*>(it->second.get())->getDouble();
+        return dynamic_cast<MPIScalarReducer*>(it->second.get())->getDouble();
     }
     if (dynamic_cast<NonReducedVariable*>(it->second.get()))
     {
-	boost::python::extract<double> ex(it->second->getPyObj());
-	if (!ex.check())
-	{
-	    throw SplitWorldException("Variable is not scalar.");
-	}
-	return ex();
+        bp::extract<DataTypes::real_t> ex(it->second->getPyObj());
+        if (!ex.check())
+        {
+            throw SplitWorldException("Variable is not scalar.");
+        }
+        return ex();
     }
     throw SplitWorldException("Variable is not scalar.");
 }
@@ -386,37 +377,37 @@ double SubWorld::getScalarVariable(const std::string& name)
 // not to be called while running jobs
 // The tricky bit, is that this could be be called between job runs
 // this means that the values of variables may not have been synched yet
-boost::python::object SubWorld::getLocalObjectVariable(const std::string& name)
+bp::object SubWorld::getLocalObjectVariable(const std::string& name)
 {
     str2reduce::iterator it=reducemap.find(name);
     if (it==reducemap.end())
     {
-	throw SplitWorldException("No variable of that name.");
+        throw SplitWorldException("No variable of that name.");
     }
-	// need to indicate we are interested in the variable
+        // need to indicate we are interested in the variable
     if (varstate[name]==rs::NONE)
     {
-	setMyVarState(name, rs::INTERESTED);
+        setMyVarState(name, rs::INTERESTED);
     }
     else if (varstate[name]==rs::OLD)
     {
-      	setMyVarState(name, rs::OLDINTERESTED);
+        setMyVarState(name, rs::OLDINTERESTED);
     }
-	// anything else, indicates interest anyway
+        // anything else, indicates interest anyway
 #ifdef ESYS_MPI
     std::string errmsg;
     if (!synchVariableInfo(errmsg))
     {
-	throw SplitWorldException(std::string("(Getting local object --- Variable information) ")+errmsg);
+        throw SplitWorldException(std::string("(Getting local object --- Variable information) ")+errmsg);
     }
     if (!synchVariableValues(errmsg))
     {
-	throw SplitWorldException(std::string("(Getting local object --- Variable value) ")+errmsg);
+        throw SplitWorldException(std::string("(Getting local object --- Variable value) ")+errmsg);
     }
 #endif
     if (dynamic_cast<NonReducedVariable*>(it->second.get()))
     {
-	return dynamic_cast<NonReducedVariable*>(it->second.get())->getPyObj();
+        return dynamic_cast<NonReducedVariable*>(it->second.get())->getPyObj();
     }
     throw SplitWorldException("Variable is not a local object.");
 }
@@ -427,26 +418,25 @@ bool SubWorld::checkRemoteCompatibility(std::string& errmsg)
 {
     for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it)
     {
-	if (! it->second->checkRemoteCompatibility(corrmpi, errmsg))
-	{
-	    return false;
-	}
+        if (! it->second->checkRemoteCompatibility(corrmpi, errmsg))
+        {
+            return false;
+        }
     }
     return true;
 }
 
-#ifdef ESYS_MPI  
-  
+#ifdef ESYS_MPI
 bool SubWorld::makeComm(MPI_Comm& sourcecom, JMPI& ncom,std::vector<int>& members)
 {
       MPI_Comm subcom;
       MPI_Group sourceg, g;
       if (MPI_Comm_group(sourcecom, &sourceg)!=MPI_SUCCESS) {return false;}
-      if (MPI_Group_incl(sourceg, members.size(), &members[0], &g)!=MPI_SUCCESS) {return false;}    
+      if (MPI_Group_incl(sourceg, members.size(), &members[0], &g)!=MPI_SUCCESS) {return false;}
       // then create a communicator with that group
-      if (MPI_Comm_create(sourcecom, g, &subcom)!=MPI_SUCCESS) 
+      if (MPI_Comm_create(sourcecom, g, &subcom)!=MPI_SUCCESS)
       {
-	  return false;	
+          return false; 
       }
       ncom=makeInfo(subcom, true);
       return true;
@@ -464,28 +454,28 @@ bool SubWorld::makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, J
             || (mystate==rs::INTERESTED)
             || (mystate==rs::OLDINTERESTED))
     {
-	// first create a group with all the updates in it
-	std::vector<int> redmembers;
-	std::vector<int> copmembers;
+        // first create a group with all the updates in it
+        std::vector<int> redmembers;
+        std::vector<int> copmembers;
         for (int i=0+vnum;i<globalvarinfo.size();i+=getNumVars())
         {
-	    bool havesrc=false;
-	    int world=i/getNumVars();
+            bool havesrc=false;
+            int world=i/getNumVars();
             // make a vector of the involved procs with New at the front
             switch (globalvarinfo[i])
             {
                 case rs::NEW:
-		    if (!havesrc)
-		    {
-		        copmembers.insert(copmembers.begin(), world);
-			havesrc=true;
-			if (world==localid)
-			{
-			    incopy=true;			
-			}
-		    }
-		    redmembers.push_back(world);
-		    break;
+                    if (!havesrc)
+                    {
+                        copmembers.insert(copmembers.begin(), world);
+                        havesrc=true;
+                        if (world==localid)
+                        {
+                            incopy=true;                        
+                        }
+                    }
+                    redmembers.push_back(world);
+                    break;
                 case rs::INTERESTED:
                 case rs::OLDINTERESTED:
                           copmembers.push_back(world);
@@ -496,14 +486,14 @@ bool SubWorld::makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, J
                           break;
             }
         }
-	if (!makeComm(srccom, red, redmembers))
-	{
-	    return false;
-	}
-	if (!makeComm(srccom, cop, copmembers))
-	{
-	    return false;
-	}
+        if (!makeComm(srccom, red, redmembers))
+        {
+            return false;
+        }
+        if (!makeComm(srccom, cop, copmembers))
+        {
+            return false;
+        }
         return true;
 
     }
@@ -511,17 +501,17 @@ bool SubWorld::makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, J
     {     // This would be a nice time to use MPI_Comm_create_group
           // but it does not exist in MPI2.1
         MPI_Comm temp;
-	if (MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp)!=MPI_SUCCESS)
-	{
-	    return false;
-	}
-	red=makeInfo(temp, true);
         if (MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp)!=MPI_SUCCESS)
-	{
-	    return false;
-	}
-	cop=makeInfo(temp, true);
-	return true;
+        {
+            return false;
+        }
+        red=makeInfo(temp, true);
+        if (MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp)!=MPI_SUCCESS)
+        {
+            return false;
+        }
+        cop=makeInfo(temp, true);
+        return true;
     }
 
 }
@@ -532,278 +522,277 @@ bool SubWorld::makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, J
 bool SubWorld::makeGroupComm1(MPI_Comm& srccom, int vnum, char mystate, JMPI& com)
 {
       if ((mystate==rs::NEW)
-	    || (mystate==rs::INTERESTED)
-	    || (mystate==rs::OLDINTERESTED))
+            || (mystate==rs::INTERESTED)
+            || (mystate==rs::OLDINTERESTED))
       {
       // first create a group with [updates, interested and oldinterested in it]
-	  std::vector<int> members;
-	  for (int i=0+vnum;i<globalvarinfo.size();i+=getNumVars())
-	  {
-	      // make a vector of the involved procs with New at the front
-	      switch (globalvarinfo[i])
-	      {
-		case rs::NEW:   members.insert(members.begin(), i/getNumVars()); break;
-		case rs::INTERESTED:     
-		case rs::OLDINTERESTED:
-			  members.push_back(i/getNumVars());
-			  break;
-	      }
-	  }
-	  return makeComm(srccom, com, members);
+          std::vector<int> members;
+          for (int i=0+vnum;i<globalvarinfo.size();i+=getNumVars())
+          {
+              // make a vector of the involved procs with New at the front
+              switch (globalvarinfo[i])
+              {
+                case rs::NEW:   members.insert(members.begin(), i/getNumVars()); break;
+                case rs::INTERESTED:
+                case rs::OLDINTERESTED:
+                          members.push_back(i/getNumVars());
+                          break;
+              }
+          }
+          return makeComm(srccom, com, members);
       }
-      else	// for people not in involved in the value shipping
-      {		// This would be a nice time to use MPI_Comm_create_group
-		// but it does not exist in MPI2.1
+      else      // for people not in involved in the value shipping
+      {         // This would be a nice time to use MPI_Comm_create_group
+                // but it does not exist in MPI2.1
           MPI_Comm temp;
-	  MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp);
-	  com=makeInfo(temp, true);
-	  return true;
+          MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp);
+          com=makeInfo(temp, true);
+          return true;
       }
 }
 
-// A group with a single OLD or OLDINT at the front and all the INT worlds 
+// A group with a single OLD or OLDINT at the front and all the INT worlds
 // following it
 bool SubWorld::makeGroupComm2(MPI_Comm& srccom, int vnum, char mystate, JMPI& com, bool& ingroup)
 {
       ingroup=false;
       if ((mystate==rs::OLD)
-	    || (mystate==rs::INTERESTED)
-	    || (mystate==rs::OLDINTERESTED))
+            || (mystate==rs::INTERESTED)
+            || (mystate==rs::OLDINTERESTED))
       {
-	  // first create a group with [old, interested and oldinterested in it]
-	  std::vector<int> members;
-	  bool havesrc=false;
-	  for (int i=0+vnum;i<globalvarinfo.size();i+=getNumVars())
-	  {
-	      int world=i/getNumVars(); 
-	      // make a vector of the involved procs with OLD/OLDINTERESTED at the front
-	      switch (globalvarinfo[i])
-	      {
-		case rs::NEW:   return false;  break;
-		case rs::INTERESTED: members.push_back(world);
-			  if (world==localid)
-			  {
-			      ingroup=true;
-			  }
-			  break;     
-		case rs::OLD: 
-		case rs::OLDINTERESTED:
-			  if (!havesrc)
-			  {
-			      members.insert(members.begin(), world);
-			      havesrc=true;
-			      if (world==localid)
-			      {
-				ingroup=true;
-			      }
-			  }
-			  break;
-	      }
-	  }		
-	  return makeComm(srccom, com, members);
+          // first create a group with [old, interested and oldinterested in it]
+          std::vector<int> members;
+          bool havesrc=false;
+          for (int i=0+vnum;i<globalvarinfo.size();i+=getNumVars())
+          {
+              int world=i/getNumVars();
+              // make a vector of the involved procs with OLD/OLDINTERESTED at the front
+              switch (globalvarinfo[i])
+              {
+                case rs::NEW:   return false;  break;
+                case rs::INTERESTED: members.push_back(world);
+                          if (world==localid)
+                          {
+                              ingroup=true;
+                          }
+                          break;
+                case rs::OLD:
+                case rs::OLDINTERESTED:
+                          if (!havesrc)
+                          {
+                              members.insert(members.begin(), world);
+                              havesrc=true;
+                              if (world==localid)
+                              {
+                                ingroup=true;
+                              }
+                          }
+                          break;
+              }
+          }             
+          return makeComm(srccom, com, members);
       }
-      else	// for people not in involved in the value shipping
-      {		// This would be a nice time to use MPI_Comm_create_group
-		// but it does not exist in MPI2.1	
+      else      // for people not in involved in the value shipping
+      {         // This would be a nice time to use MPI_Comm_create_group
+                // but it does not exist in MPI2.1      
           MPI_Comm temp;
-	  MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp);
-	  com=makeInfo(temp, true);
-	  return true;
+          MPI_Comm_create(srccom, MPI_GROUP_EMPTY, &temp);
+          com=makeInfo(temp, true);
+          return true;
       }
 }
-
-#endif  
+#endif
 
 
 bool SubWorld::synchVariableValues(std::string& err)
 {
-#ifdef ESYS_MPI    
+#ifdef ESYS_MPI
     // There are three possibilities here but since all worlds have the same knowledge
     // we can be sure that they will all make the same choice
     // 1) No updates are required
     // 2) There is a single world with a new value so it can broadcast it
     // 3) There are multiple worlds with updates
-    
+
     // need to keep track of which vars have updates
     std::vector<std::string> varswithupdates;
 
-    int vnum=0;    
+    int vnum=0;
     for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it, ++vnum)
     {
          // check to see if anyone needs it
-	int needcount=0; // who wants a new value
-	int newcount=0;	// who has a new version
-	int oldcount=0;	// who has an old version
-	int oldintcount=0;
-	newcount=globalvarcounts[it->first][rs::NEW];
-	oldcount=globalvarcounts[it->first][rs::OLD];
-	oldintcount=globalvarcounts[it->first][rs::OLDINTERESTED];
-	needcount=globalvarcounts[it->first][rs::INTERESTED]+oldintcount;
-	if (newcount>0)
-	{
-	    varswithupdates.push_back(it->first);
-	}
-	if (needcount+newcount+oldcount==0)
-	{
-	    continue;		// noone cares about this variable
-	}
-	if (needcount>0 && (oldcount+oldintcount+newcount)==0)
-	{
-	    err="Import attempted for a variable \""+(it->first)+"\" with no value.";
-	    return false;
-	}
-	    // worlds have the variable but noone is interested in it
-	    // note that if there are multiple new values, we still want to merge them
-	if ((needcount==0) && (newcount<=1))
-	{
-	    continue;	
-	}
-	if (swcount==1)
-	{		// nobody else to communicate with
-	    continue;
-	}
-	    // to reach this point, there must be >=1 source and >=1 sink and multiple worlds
-	    // first deal updates as source(s)
-	if (newcount==1)	// only one update so send from that
-	{
-	    JMPI com;
-	    if (!makeGroupComm1(corrmpi->comm, vnum, varstate[it->first],com))
-	    {
-		err="Error creating group for sharing values,";
-		return false;
-	    }
-	    if (varstate[it->first]!=rs::NONE && varstate[it->first]!=rs::OLD)
-	    {
-		it->second->groupSend(com->comm, (varstate[it->first]==rs::NEW));
-		  // Now record the fact that we have the variable now
-		if (varstate[it->first]==rs::INTERESTED)
-		{
-		    setMyVarState(it->first, rs::OLDINTERESTED); 
-		}
-	    }
-	    continue;
-	}
-	if (newcount==swcount)		// everybody is in on this
-	{
-	    if (!it->second->reduceRemoteValues(corrmpi->comm))
-	    {
-		it->second->reset();
-		setAllVarsState(it->first, rs::NONE);
-		//setMyVarState(it->first, rs::NONE);
-		err=it->first+"Either MPI failed, or there were multiple simultaneous updates to a variable with the SET operation.";
-		return false;
-	    }
-	        // Now record the fact that we have the variable now
-	    if (varstate[it->first]==rs::INTERESTED)
-	    {
-		setMyVarState(it->first, rs::OLDINTERESTED); 
-	    }	    
-	    continue;
-	}
-	if (newcount>1)
-	{
-	    // make groups to reduce and then copy
-	    JMPI red;
-	    JMPI cop;
-	    bool incopy;
-	    if (!makeGroupReduceGroups(corrmpi->comm, vnum, varstate[it->first], red, cop, incopy))
-	    {
-		err="Error creating groups for sharing values,";
-		return false;
-	    }
-	    char reduceresult=0;
-		// only new values get reduced
-	    if (varstate[it->first]==rs::NEW)
-	    {
-	        if (!it->second->reduceRemoteValues(red->comm))
-		{
-		    char s=1;
-		    MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);
-		    reduceresult=1;
-
-		}
-		else
-		{
-		    if (it->second->canClash())
-		    {
-			char s=0;
-			MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);		    
-		    }
-		}
-	    }
-	    else
-	    {
-		if (it->second->canClash())
-		{
-		    char s=0;
-		    MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);		    
-		}
-	    }
-		// if there was a clash somewhere
-	    if (reduceresult!=0)
-	    {
-		it->second->reset();
-		setAllVarsState(it->first, rs::NONE);
-		err="Either MPI failed, or there were multiple simultaneous updates to a variable with the SET operation.";
-		return false;	      
-	    }
-	      
-		// if we are involved in copying the new value around
-	    if (incopy)
-	    {
+        int needcount=0; // who wants a new value
+        int newcount=0; // who has a new version
+        int oldcount=0; // who has an old version
+        int oldintcount=0;
+        newcount=globalvarcounts[it->first][rs::NEW];
+        oldcount=globalvarcounts[it->first][rs::OLD];
+        oldintcount=globalvarcounts[it->first][rs::OLDINTERESTED];
+        needcount=globalvarcounts[it->first][rs::INTERESTED]+oldintcount;
+        if (newcount>0)
+        {
+            varswithupdates.push_back(it->first);
+        }
+        if (needcount+newcount+oldcount==0)
+        {
+            continue;           // noone cares about this variable
+        }
+        if (needcount>0 && (oldcount+oldintcount+newcount)==0)
+        {
+            err="Import attempted for a variable \""+(it->first)+"\" with no value.";
+            return false;
+        }
+            // worlds have the variable but noone is interested in it
+            // note that if there are multiple new values, we still want to merge them
+        if ((needcount==0) && (newcount<=1))
+        {
+            continue;   
+        }
+        if (swcount==1)
+        {               // nobody else to communicate with
+            continue;
+        }
+            // to reach this point, there must be >=1 source and >=1 sink and multiple worlds
+            // first deal updates as source(s)
+        if (newcount==1)        // only one update so send from that
+        {
+            JMPI com;
+            if (!makeGroupComm1(corrmpi->comm, vnum, varstate[it->first],com))
+            {
+                err="Error creating group for sharing values,";
+                return false;
+            }
+            if (varstate[it->first]!=rs::NONE && varstate[it->first]!=rs::OLD)
+            {
+                it->second->groupSend(com->comm, (varstate[it->first]==rs::NEW));
+                  // Now record the fact that we have the variable now
+                if (varstate[it->first]==rs::INTERESTED)
+                {
+                    setMyVarState(it->first, rs::OLDINTERESTED);
+                }
+            }
+            continue;
+        }
+        if (newcount==swcount)          // everybody is in on this
+        {
+            if (!it->second->reduceRemoteValues(corrmpi->comm))
+            {
+                it->second->reset();
+                setAllVarsState(it->first, rs::NONE);
+                //setMyVarState(it->first, rs::NONE);
+                err=it->first+"Either MPI failed, or there were multiple simultaneous updates to a variable with the SET operation.";
+                return false;
+            }
+                // Now record the fact that we have the variable now
+            if (varstate[it->first]==rs::INTERESTED)
+            {
+                setMyVarState(it->first, rs::OLDINTERESTED);
+            }
+            continue;
+        }
+        if (newcount>1)
+        {
+            // make groups to reduce and then copy
+            JMPI red;
+            JMPI cop;
+            bool incopy;
+            if (!makeGroupReduceGroups(corrmpi->comm, vnum, varstate[it->first], red, cop, incopy))
+            {
+                err="Error creating groups for sharing values,";
+                return false;
+            }
+            char reduceresult=0;
+                // only new values get reduced
+            if (varstate[it->first]==rs::NEW)
+            {
+                if (!it->second->reduceRemoteValues(red->comm))
+                {
+                    char s=1;
+                    MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);
+                    reduceresult=1;
+
+                }
+                else
+                {
+                    if (it->second->canClash())
+                    {
+                        char s=0;
+                        MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);
+                    }
+                }
+            }
+            else
+            {
+                if (it->second->canClash())
+                {
+                    char s=0;
+                    MPI_Allreduce(&s, &reduceresult, 1, MPI_CHAR, MPI_MAX, corrmpi->comm);
+                }
+            }
+                // if there was a clash somewhere
+            if (reduceresult!=0)
+            {
+                it->second->reset();
+                setAllVarsState(it->first, rs::NONE);
+                err="Either MPI failed, or there were multiple simultaneous updates to a variable with the SET operation.";
+                return false;
+            }
+
+                // if we are involved in copying the new value around
+            if (incopy)
+            {
                 it->second->groupSend(cop->comm, (varstate[it->first]==rs::NEW));
-	        if (varstate[it->first]==rs::INTERESTED) 
-	        {
-		    setMyVarState(it->first, rs::OLDINTERESTED); 
-		}
-	    }
+                if (varstate[it->first]==rs::INTERESTED)
+                {
+                    setMyVarState(it->first, rs::OLDINTERESTED);
+                }
+            }
             if (varstate[it->first]==rs::NEW)
             {
-		setMyVarState(it->first, rs::OLDINTERESTED);
+                setMyVarState(it->first, rs::OLDINTERESTED);
             }
-	    continue;
-	}
-	    // at this point, we need to ship info around but there are no updates
-	    // that is, we are shipping an old copy
-	    // picking a source arbitarily (the first one in the array)
-	    
-	    // but first, eliminate the special case where the only interested ones
-	    // already have a copy
-	if (oldintcount==needcount)
-	{
-	    continue;
-	}
-	JMPI com;
-	bool ingroup=false;
-	if (!makeGroupComm2(corrmpi->comm, vnum, varstate[it->first],com, ingroup))
-	{
-	    err="Error creating group for sharing values";
-	    return false;
-	}
-	// form group to send to [latestsource and interested]
-	
-	if (ingroup)		// since only one holder needs to send
-	{
-	    bool imsending=(varstate[it->first]==rs::NEW);
-	    it->second->groupSend(com->comm, imsending);
-	}
+            continue;
+        }
+            // at this point, we need to ship info around but there are no updates
+            // that is, we are shipping an old copy
+            // picking a source arbitarily (the first one in the array)
+
+            // but first, eliminate the special case where the only interested ones
+            // already have a copy
+        if (oldintcount==needcount)
+        {
+            continue;
+        }
+        JMPI com;
+        bool ingroup=false;
+        if (!makeGroupComm2(corrmpi->comm, vnum, varstate[it->first],com, ingroup))
+        {
+            err="Error creating group for sharing values";
+            return false;
+        }
+        // form group to send to [latestsource and interested]
+        
+        if (ingroup)            // since only one holder needs to send
+        {
+            bool imsending=(varstate[it->first]==rs::NEW);
+            it->second->groupSend(com->comm, imsending);
+        }
     }
-	// now we need to age any out of date copies of vars
+        // now we need to age any out of date copies of vars
     for (size_t i=0;i<varswithupdates.size();++i)
-    {      
+    {
         std::string vname=varswithupdates[i];
-        	if (varstate[vname]==rs::NEW)
-	{
-	    setMyVarState(vname, rs::OLD);
-	}
-	else if (varstate[vname]==rs::OLD)
-	{
-	    setMyVarState(vname, rs::NONE);
-	    reducemap[vname]->clear();
-	}
+                if (varstate[vname]==rs::NEW)
+        {
+            setMyVarState(vname, rs::OLD);
+        }
+        else if (varstate[vname]==rs::OLD)
+        {
+            setMyVarState(vname, rs::NONE);
+            reducemap[vname]->clear();
+        }
     }
 #endif
-    return true;    
+    return true;
 }
 
 bool SubWorld::amLeader()
@@ -817,42 +806,42 @@ bool SubWorld::synchVariableInfo(std::string& err)
 {
     if (getNumVars()==0)
     {
-	return true;
+        return true;
     }
-    if (manualimports)		// manual control over imports
+    if (manualimports)          // manual control over imports
     {
-	for (size_t i=0;i<jobvec.size();++i)
-	{
-	    bp::list wanted=bp::extract<bp::list>(jobvec[i].attr("wantedvalues"))();		  
-	    for (size_t j=0;j<len(wanted);++j)
-	    {
-		bp::extract<std::string> exs(wanted[j]);
-		if (!exs.check()) 
-		{
-		    err="names in wantedvalues must be strings";
-		    return false;
-		}
-		std::string n=exs();
-		  // now we need to check to see if this value is known
-		str2char::iterator it=varstate.find(n);
-		if (it==varstate.end())
-		{
-		    err="Attempt to import variable \""+n+"\". SplitWorld was not told about this variable.";
-		    return false;
-		}
-		// So at least one job wants this variable
-		switch (it->second)
-		{
-		  case rs::NONE: it->second=rs::INTERESTED; break;
-		  case rs::INTERESTED: break;
-		  case rs::OLD: it->second=rs::OLDINTERESTED; break;
-		  case rs::NEW: break;
-		  default:
-		    err="Unknown variable state";
-		    return false;
-		}
-	    }
-	}
+        for (size_t i=0;i<jobvec.size();++i)
+        {
+            bp::list wanted=bp::extract<bp::list>(jobvec[i].attr("wantedvalues"))();
+            for (size_t j=0;j<len(wanted);++j)
+            {
+                bp::extract<std::string> exs(wanted[j]);
+                if (!exs.check())
+                {
+                    err="names in wantedvalues must be strings";
+                    return false;
+                }
+                std::string n=exs();
+                  // now we need to check to see if this value is known
+                str2char::iterator it=varstate.find(n);
+                if (it==varstate.end())
+                {
+                    err="Attempt to import variable \""+n+"\". SplitWorld was not told about this variable.";
+                    return false;
+                }
+                // So at least one job wants this variable
+                switch (it->second)
+                {
+                  case rs::NONE: it->second=rs::INTERESTED; break;
+                  case rs::INTERESTED: break;
+                  case rs::OLD: it->second=rs::OLDINTERESTED; break;
+                  case rs::NEW: break;
+                  default:
+                    err="Unknown variable state";
+                    return false;
+                }
+            }
+        }
     }
         // Make a vector to hold the info from the map (so we can send it around)
     std::vector<char> lb(getNumVars(), rs::NONE);
@@ -866,92 +855,91 @@ bool SubWorld::synchVariableInfo(std::string& err)
 #ifdef ESYS_MPI
         // Vector to hold the result
     globalvarinfo.resize(getNumVars()*swcount, rs::NONE);
-    if (amLeader())	// we only need on representative from each world to send
+    if (amLeader())     // we only need on representative from each world to send
     {
-	// The leaders of each world, send their variable information to the proc "0" in
-	// the global world (which will be the leader of subworld "0").
-	//    There is an issue here if this operation fails
-	if (MPI_Gather(&lb[0], getNumVars(), MPI_CHAR, &globalvarinfo[0], getNumVars(), 
-		   MPI_CHAR, 0, getCorrMPI()->comm)!=MPI_SUCCESS) 
-	{
-	    for (size_t i=0;i<globalvarinfo.size();++i)
-	    {
-		globalvarinfo[i]=rs::ERROR;
-	    }
-	}      
+        // The leaders of each world, send their variable information to the proc "0" in
+        // the global world (which will be the leader of subworld "0").
+        //    There is an issue here if this operation fails
+        if (MPI_Gather(&lb[0], getNumVars(), MPI_CHAR, &globalvarinfo[0], getNumVars(),
+                   MPI_CHAR, 0, getCorrMPI()->comm)!=MPI_SUCCESS)
+        {
+            for (size_t i=0;i<globalvarinfo.size();++i)
+            {
+                globalvarinfo[i]=rs::ERROR;
+            }
+        }
     }
     // now share the combined info with all processes
     if ((MPI_Bcast(&globalvarinfo[0], globalvarinfo.size(), MPI_CHAR, 0, everyone->comm)!=MPI_SUCCESS)
-	  || (globalvarinfo[0]==rs::ERROR))
+          || (globalvarinfo[0]==rs::ERROR))
     {
-	err="Error while gathering variable use information.";
-	return false;	
+        err="Error while gathering variable use information.";
+        return false;   
     }
       // now we convert that info into a form which is easier to read
-    int p=0;  
+    int p=0;
     for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it,++p)
     {
-	globalvarcounts[it->first][rs::NONE]=0;
-	globalvarcounts[it->first][rs::INTERESTED]=0;
-	globalvarcounts[it->first][rs::OLD]=0;
-	globalvarcounts[it->first][rs::OLDINTERESTED]=0;
-	globalvarcounts[it->first][rs::NEW]=0;
-	for (int j=p;j<globalvarinfo.size();j+=getNumVars())
-	{
-	    if (globalvarinfo[j]<=rs::NEW)
-	    {
-		globalvarcounts[it->first][globalvarinfo[j]]++;
-	    }
-	}
+        globalvarcounts[it->first][rs::NONE]=0;
+        globalvarcounts[it->first][rs::INTERESTED]=0;
+        globalvarcounts[it->first][rs::OLD]=0;
+        globalvarcounts[it->first][rs::OLDINTERESTED]=0;
+        globalvarcounts[it->first][rs::NEW]=0;
+        for (int j=p;j<globalvarinfo.size();j+=getNumVars())
+        {
+            if (globalvarinfo[j]<=rs::NEW)
+            {
+                globalvarcounts[it->first][globalvarinfo[j]]++;
+            }
+        }
     }
-    
-#endif    
-    if (!manualimports)	
+
+#endif
+    if (!manualimports) 
     {
-	    // import all known variables _BUT_ don't import something if noone has a value
-	    // for it  
-	int vnum=0;
-	for (str2char::iterator it=varstate.begin();it!=varstate.end();++it, ++vnum)
-	{
+            // import all known variables _BUT_ don't import something if noone has a value
+            // for it
+        int vnum=0;
+        for (str2char::iterator it=varstate.begin();it!=varstate.end();++it, ++vnum)
+        {
 #ifdef ESYS_MPI
-	      // if at least one world has a value for a variable
-	    if (globalvarcounts[it->first][rs::OLDINTERESTED] 
-	         + globalvarcounts[it->first][rs::OLD] 
-	         + globalvarcounts[it->first][rs::NEW] > 0 )
-	    {
+              // if at least one world has a value for a variable
+            if (globalvarcounts[it->first][rs::OLDINTERESTED]
+                 + globalvarcounts[it->first][rs::OLD]
+                 + globalvarcounts[it->first][rs::NEW] > 0 )
+            {
 #endif
-	      
-		if (it->second==rs::NONE)
-		{
-		    it->second=rs::INTERESTED;
-		}
-		else if (it->second==rs::OLD)
-		{
-		    it->second=rs::OLDINTERESTED;
-		}
+
+                if (it->second==rs::NONE)
+                {
+                    it->second=rs::INTERESTED;
+                }
+                else if (it->second==rs::OLD)
+                {
+                    it->second=rs::OLDINTERESTED;
+                }
 #ifdef ESYS_MPI
-		  // now we need to update the globalvarinfo to record all the extra interest
-		for (int j=vnum;j<globalvarinfo.size();j+=getNumVars())
-		{
-		    if (globalvarinfo[j]==rs::NONE)
-		    {
-			globalvarinfo[j]=rs::INTERESTED;
-			globalvarcounts[it->first][rs::NONE]--;
-			globalvarcounts[it->first][rs::INTERESTED]++;			
-		    }
-		    else if (globalvarinfo[j]==rs::OLD)
-		    {
-			globalvarinfo[j]=rs::OLDINTERESTED;
-			globalvarcounts[it->first][rs::OLD]--;
-			globalvarcounts[it->first][rs::OLDINTERESTED]++;			
-		    }
-		  
-		}
-	    }
-#endif	
-	}
+                  // now we need to update the globalvarinfo to record all the extra interest
+                for (int j=vnum;j<globalvarinfo.size();j+=getNumVars())
+                {
+                    if (globalvarinfo[j]==rs::NONE)
+                    {
+                        globalvarinfo[j]=rs::INTERESTED;
+                        globalvarcounts[it->first][rs::NONE]--;
+                        globalvarcounts[it->first][rs::INTERESTED]++;                   
+                    }
+                    else if (globalvarinfo[j]==rs::OLD)
+                    {
+                        globalvarinfo[j]=rs::OLDINTERESTED;
+                        globalvarcounts[it->first][rs::OLD]--;
+                        globalvarcounts[it->first][rs::OLDINTERESTED]++;                        
+                    }
+                }
+            }
+#endif  
+        }
     }
-#ifdef ESYS_MPI    
+#ifdef ESYS_MPI
     globalinfoinvalid=false;
 #endif
 
@@ -959,7 +947,7 @@ bool SubWorld::synchVariableInfo(std::string& err)
 }
 
 // if 4, a Job performed an invalid export
-// if 3, a Job threw an exception 
+// if 3, a Job threw an exception
 // if 2, a Job did not return a bool
 // if 1, at least one Job returned False
 // if 0, all jobs in this world returned True
@@ -969,27 +957,27 @@ char SubWorld::runJobs(std::string& errormsg)
     int ret=0;
     try
     {
-	for (size_t i=0;i<jobvec.size();++i)
-	{
-	    boost::python::object result=jobvec[i].attr("work")();
-	    boost::python::extract<bool> ex(result);
-	    if (!ex.check() || (result.is_none()))
-	    {
-		return 2;	
-	    }
-	    // check to see if we need to keep running
-	    if (!ex())
-	    {
-		ret=1;
-	    }
-
-	}
-    } 
-    catch (boost::python::error_already_set e)
+        for (size_t i=0;i<jobvec.size();++i)
+        {
+            bp::object result=jobvec[i].attr("work")();
+            bp::extract<bool> ex(result);
+            if (!ex.check() || (result.is_none()))
+            {
+                return 2;       
+            }
+            // check to see if we need to keep running
+            if (!ex())
+            {
+                ret=1;
+            }
+
+        }
+    }
+    catch (bp::error_already_set e)
     {
-	getStringFromPyException(e, errormsg);      
-	return 3;
-    }  
+        getStringFromPyException(e, errormsg);
+        return 3;
+    }
     return ret;
 }
 
@@ -1003,25 +991,25 @@ void SubWorld::addVariable(std::string& name, Reducer_ptr& rp)
 {
     if (reducemap.find(name)!=reducemap.end())
     {
-	std::ostringstream oss;
-	throw SplitWorldException(oss.str());    
+        std::ostringstream oss;
+        throw SplitWorldException(oss.str());
     }
     if (domain.get()==0)
     {
-	throw SplitWorldException("No domain has been set yet.");
+        throw SplitWorldException("No domain has been set yet.");
     }
     rp->setDomain(domain);
     reducemap[name]=rp;
     varstate[name]=reducerstatus::NONE;
     if (!manualimports)
     {
-	for (size_t i=0;i<jobvec.size();++i)
-	{
-	    jobvec[i].attr("declareImport")(name);
-	}
+        for (size_t i=0;i<jobvec.size();++i)
+        {
+            jobvec[i].attr("declareImport")(name);
+        }
     }
 #ifdef ESYS_MPI
-    globalinfoinvalid=true;	// since we need to regenerate globalvarinfo
+    globalinfoinvalid=true;     // since we need to regenerate globalvarinfo
 #endif
 }
 
@@ -1029,11 +1017,11 @@ void SubWorld::removeVariable(std::string& s)
 {
     reducemap.erase(s);
     varstate.erase(s);
-#ifdef ESYS_MPI    
+#ifdef ESYS_MPI
     globalinfoinvalid=true;
     globalvarinfo.resize(0);
     globalvarcounts.erase(s);
-#endif    
+#endif
 }
 
 void SubWorld::clearVariable(std::string& name)
@@ -1041,7 +1029,7 @@ void SubWorld::clearVariable(std::string& name)
     str2reduce::iterator it=reducemap.find(name);
     if (it==reducemap.end())
     {
-	return;
+        return;
     }
     it->second->reset();
       // if we got here, we must have a valid name so we can change state directly
@@ -1052,14 +1040,14 @@ void SubWorld::resetInterest()
 {
     for (str2char::iterator it=varstate.begin();it!=varstate.end();++it)
     {
-	if (it->second==rs::INTERESTED)
-	{
-	    it->second=rs::NONE;
-	}
-	else if (it->second==rs::OLDINTERESTED)
-	{
-	    it->second=rs::OLD;
-	}
+        if (it->second==rs::INTERESTED)
+        {
+            it->second=rs::NONE;
+        }
+        else if (it->second==rs::OLDINTERESTED)
+        {
+            it->second=rs::OLD;
+        }
     }
 }
 
@@ -1067,8 +1055,8 @@ void SubWorld::newRunJobs()
 {
     for (str2reduce::iterator it=reducemap.begin();it!=reducemap.end();++it)
     {
-	it->second->newRunJobs();
-    }    
+        it->second->newRunJobs();
+    }
 }
 
 std::list<std::pair<std::string, bool> > SubWorld::getVarList()
@@ -1087,10 +1075,10 @@ std::list<std::pair<std::string, std::string> > SubWorld::getVarInfo()
     for (std::map<std::string, Reducer_ptr>::iterator it=reducemap.begin();it!=reducemap.end();++it)
     {
         std::string desc=it->second->description();
-	if (!it->second->hasValue())
-	{
-	    desc+=" (No value)";
-	}
+        if (!it->second->hasValue())
+        {
+            desc+=" (No value)";
+        }
         res.push_back(std::pair<std::string, std::string>(it->first, desc));
     }
     return res;
@@ -1099,20 +1087,16 @@ std::list<std::pair<std::string, std::string> > SubWorld::getVarInfo()
 
 void SubWorld::copyVariable(const std::string& src, const std::string& dest)
 {
-	if (reducemap.find(src)==reducemap.end())
-	{
-	    throw SplitWorldException("Source variable name is not known");
-	}
-	if (reducemap.find(dest)==reducemap.end())
-	{
-	    throw SplitWorldException("Destination variable name is not known");
-	}
+        if (reducemap.find(src)==reducemap.end())
+        {
+            throw SplitWorldException("Source variable name is not known");
+        }
+        if (reducemap.find(dest)==reducemap.end())
+        {
+            throw SplitWorldException("Destination variable name is not known");
+        }
         Reducer_ptr sptr=reducemap[src];
-	Reducer_ptr dptr=reducemap[dest];
-	dptr->copyValueFrom(sptr);
+        Reducer_ptr dptr=reducemap[dest];
+        dptr->copyValueFrom(sptr);
 }
 
-
-
-
-
diff --git a/escriptcore/src/SubWorld.h b/escriptcore/src/SubWorld.h
index 231a516..29a6052 100644
--- a/escriptcore/src/SubWorld.h
+++ b/escriptcore/src/SubWorld.h
@@ -17,9 +17,9 @@
 #ifndef escript_SubWorld_H
 #define escript_SubWorld_H
 
-#include "esysUtils/Esys_MPI.h"
 #include "AbstractDomain.h"
 #include "AbstractReducer.h"
+#include "EsysMPI.h"
 
 namespace escript
 {
@@ -44,18 +44,22 @@ namespace escript
  *   world 2:  v+=1   --- local v=1+9
  * What is the value of v? 20, not 11
 */
-class SubWorld : public boost::enable_shared_from_this<SubWorld>
+class SubWorld : public REFCOUNT_BASE_CLASS(SubWorld)
 {
 public:
-    SubWorld(esysUtils::JMPI& globalcom, esysUtils::JMPI& comm, esysUtils::JMPI& corr, unsigned int subworldcount, unsigned int local_id, bool manualimport);
+    SubWorld(JMPI& globalcom, JMPI& comm, JMPI& corr,
+             unsigned int subworldcount, unsigned int local_id,
+             bool manualimport);
+
     ~SubWorld();
+
     void setDomain(Domain_ptr d);
     Domain_ptr getDomain();
-    esysUtils::JMPI& getMPI();
-    esysUtils::JMPI& getCorrMPI();
-    void addJob(boost::python::object j);		// add a Job to the current batch
-    char runJobs(std::string& errmsg);		// run all jobs in the current batch
-    void clearJobs();				// remove all jobs in the current batch
+    JMPI& getMPI();
+    JMPI& getCorrMPI();
+    void addJob(boost::python::object j);       // add a Job to the current batch
+    char runJobs(std::string& errmsg);          // run all jobs in the current batch
+    void clearJobs();                           // remove all jobs in the current batch
 
     void addVariable(std::string&, Reducer_ptr& red);
     void removeVariable(std::string& name);  
@@ -64,17 +68,17 @@ public:
     std::list<std::pair<std::string, std::string> > getVarInfo();
     size_t getNumVars();
     
-    bool localTransport(std::string& errmsg);	// gather exported values from jobs
-    bool checkRemoteCompatibility(std::string& errmsg);	// check to ensure values
-						// in all worlds are compatible
+    bool localTransport(std::string& errmsg);   // gather exported values from jobs
+    bool checkRemoteCompatibility(std::string& errmsg); // check to ensure values
+                                                // in all worlds are compatible
     
-    bool deliverImports(std::string& errmsg);	// load imports into Job objects
-    bool amLeader();	// true if this proc is the leader for its world
+    bool deliverImports(std::string& errmsg);   // load imports into Job objects
+    bool amLeader();    // true if this proc is the leader for its world
     
-    double getScalarVariable(const std::string& name);
+    DataTypes::real_t getScalarVariable(const std::string& name);
     boost::python::object getLocalObjectVariable(const std::string& name);    
     
-    void debug();	// print out current state information
+    void debug();       // print out current state information
     
     
     
@@ -87,49 +91,49 @@ public:
     void newRunJobs();
     
 private:
-    esysUtils::JMPI everyone;	// communicator linking all procs in all subworlds
-    esysUtils::JMPI swmpi;	// communicator linking all procs in this subworld
-    esysUtils::JMPI corrmpi;	// communicator linking corresponding procs in all subworlds
-				// eg: If this proc is the first in its domain, then corrmpi
-				//     links to the other "first in its domain" processes.
-				//      (So one in each SubWorld).
+    JMPI everyone;   // communicator linking all procs in all subworlds
+    JMPI swmpi;      // communicator linking all procs in this subworld
+    JMPI corrmpi;    // communicator linking corresponding procs in all subworlds
+                                // eg: If this proc is the first in its domain, then corrmpi
+                                //     links to the other "first in its domain" processes.
+                                //      (So one in each SubWorld).
     escript::Domain_ptr domain;
-    std::vector<boost::python::object> jobvec;	// jobs in the current batch
+    std::vector<boost::python::object> jobvec;  // jobs in the current batch
     
     
-    unsigned int swcount;		// number of subwords
-    unsigned int localid;    	// position of this subworld in that sequence
+    unsigned int swcount;       // number of subwords
+    unsigned int localid;       // position of this subworld in that sequence
     
-typedef std::map<std::string, Reducer_ptr> str2reduce;  
-typedef std::map<std::string, unsigned char> str2char;
-    str2reduce reducemap;		// map: name ->reducer for that variable
-    str2char varstate;		// using the state values from AbstractReducer.h
+    typedef std::map<std::string, Reducer_ptr> str2reduce;  
+    typedef std::map<std::string, unsigned char> str2char;
+    str2reduce reducemap;       // map: name ->reducer for that variable
+    str2char varstate;          // using the state values from AbstractReducer.h
 
     bool manualimports;
     
 #ifdef ESYS_MPI    
-    std::vector<unsigned char> globalvarinfo;	// info about which worlds want which vars
-				  // [vars on process0][vars on process 1][vars on ...]
-typedef std::map<unsigned char, int> countmap;
-typedef std::map<std::string, countmap> str2countmap;
+    std::vector<unsigned char> globalvarinfo;   // info about which worlds want which vars
+                                  // [vars on process0][vars on process 1][vars on ...]
+    typedef std::map<unsigned char, int> countmap;
+    typedef std::map<std::string, countmap> str2countmap;
     str2countmap globalvarcounts;
     bool globalinfoinvalid;
     
     
-    bool makeComm(MPI_Comm& sourcecom, esysUtils::JMPI& sub,std::vector<int>& members);
+    bool makeComm(MPI_Comm& sourcecom, JMPI& sub,std::vector<int>& members);
 
 
     // a group with NEW nodes at the front and INT and OLDINT at the back
     // NONE worlds get an empty communicator
-    bool makeGroupComm1(MPI_Comm& srccom, int vnum, char mystate, esysUtils::JMPI& com);
+    bool makeGroupComm1(MPI_Comm& srccom, int vnum, char mystate, JMPI& com);
 
     // reduce on the first group and copy from cop[0] to others in cop
-    bool makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, esysUtils::JMPI& red, esysUtils::JMPI& cop, bool& incopy);
+    bool makeGroupReduceGroups(MPI_Comm& srccom, int vnum, char mystate, JMPI& red, JMPI& cop, bool& incopy);
 
 
     // A group with a single OLD or OLDINT at the front and all the INT worlds 
     // following it
-    bool makeGroupComm2(MPI_Comm& srccom, int vnum, char mystate, esysUtils::JMPI& com, bool& ingroup);    
+    bool makeGroupComm2(MPI_Comm& srccom, int vnum, char mystate, JMPI& com, bool& ingroup);    
     
 #endif
     
@@ -141,7 +145,7 @@ typedef std::map<std::string, countmap> str2countmap;
 
 typedef boost::shared_ptr<SubWorld> SubWorld_ptr;
 
+} // namespace escript
 
-
-}
 #endif
+
diff --git a/escriptcore/src/SystemMatrixException.cpp b/escriptcore/src/SystemMatrixException.cpp
deleted file mode 100644
index 1ed2c51..0000000
--- a/escriptcore/src/SystemMatrixException.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "SystemMatrixException.h"
-
-using namespace escript;
-
-const std::string 
-SystemMatrixException::exceptionNameValue("SystemMatrixException");
-
-
-const std::string &
-SystemMatrixException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/escriptcore/src/SystemMatrixException.h b/escriptcore/src/SystemMatrixException.h
index 6050864..07d21f1 100644
--- a/escriptcore/src/SystemMatrixException.h
+++ b/escriptcore/src/SystemMatrixException.h
@@ -19,11 +19,10 @@
 #define escript_SystemMatrixException_20040608_H
 
 #include "system_dep.h"
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
 namespace escript
 {
-
   /**
   \brief
   SystemMatrixException exception class.
@@ -32,70 +31,16 @@ namespace escript
   SystemMatrixException exception class.
   The class provides a public function returning the exception name
   */
-  class SystemMatrixException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SystemMatrixException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SystemMatrixException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SystemMatrixException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    SystemMatrixException(const SystemMatrixException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESCRIPT_DLL_API
-    inline SystemMatrixException &
-    operator=(const SystemMatrixException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
+class SystemMatrixException : public EsysException
+{
+public:
+    SystemMatrixException(const std::string& str) : EsysException(str) {}
 
     /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~SystemMatrixException() THROW(NO_ARG) {}
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
+    virtual ~SystemMatrixException() throw() {}
+};
 
 } // end of namespace
+
 #endif
+
diff --git a/escriptcore/src/TestDomain.cpp b/escriptcore/src/TestDomain.cpp
index c3cc4cb..9eba7e7 100644
--- a/escriptcore/src/TestDomain.cpp
+++ b/escriptcore/src/TestDomain.cpp
@@ -14,15 +14,13 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <sstream>
 
-
-#include "DomainException.h"
 #include "TestDomain.h"
 #include "Data.h"
+#include "DomainException.h"
+#include "Random.h"
 #include "Utils.h" // for MPI functions
-#include <esysUtils/EsysRandom.h>
 
 namespace escript {
 
@@ -33,18 +31,32 @@ const int TestDomainFS=1;     // Null domains only support 1 functionspace type.
 }
 
 TestDomain::TestDomain(int pointspersample, int numsamples, int dpsize)
-        : m_samples(numsamples), m_dpps(pointspersample), m_dpsize(dpsize)
+        : m_totalsamples(numsamples), m_samples(numsamples), m_dpps(pointspersample), m_dpsize(dpsize),
+        myworld(makeInfo(MPI_COMM_WORLD))
 {
     int world=getMPISizeWorld();
     int rank=getMPIRankWorld();
     m_samples/=world;
+    m_originsample=rank*m_samples;	// how many samples have gone to earlier ranks before we start    
     if (world > 1 && rank < numsamples%world) {
         m_samples++;
     }
-    m_samplerefids=new dim_t[numsamples];
-    for (dim_t i=0; i<numsamples; ++i) {
+    if ((world > 1) && (numsamples%world))
+    {
+	m_originsample+=(rank<numsamples%world?rank:numsamples%world);      
+    }
+    m_endsample=m_originsample+numsamples/world;
+    if ((world > 1) && (rank < numsamples%world))
+    {
+	m_endsample++;      
+    }
+    m_endsample--;		// so that the end_sample is inclusive
+    m_samplerefids=new DataTypes::dim_t[numsamples];
+    for (DataTypes::dim_t i=0; i<numsamples; ++i) {
         m_samplerefids[i]=i+10; // the +10 is arbitrary.
     }                           // so these ids look different from others
+    mytags.push_back(0);
+    resetTagAssignments();
 }
 
 TestDomain::~TestDomain()
@@ -72,10 +84,19 @@ bool TestDomain::onMasterProcessor() const
     return getMPIRank() == 0;
 }
 
+/*
 MPI_Comm TestDomain::getMPIComm() const
 {
     return MPI_COMM_WORLD;
 }
+*/
+
+escript::JMPI TestDomain::getMPI() const
+{
+    return myworld;
+  
+}
+
 
 bool TestDomain::isValidFunctionSpaceType(int functionSpaceType) const
 {
@@ -176,17 +197,23 @@ int TestDomain::getDiracDeltaFunctionsCode() const
     return TestDomainFS;
 }
 
-std::pair<int,dim_t> TestDomain::getDataShape(int functionSpaceCode) const
+std::pair<int,DataTypes::dim_t> TestDomain::getDataShape(int functionSpaceCode) const
 {
-    return std::pair<int,dim_t>(m_dpps,m_samples);
+    return std::pair<int,DataTypes::dim_t>(m_dpps,m_samples);
 }
 
-int TestDomain::getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const
+int TestDomain::getTagFromSampleNo(int functionSpaceType, DataTypes::index_t sampleNo) const
 {
-    return 0;
+    if (sampleNo>=tag_assignment.size())
+    {
+	std::ostringstream oss;
+	oss << "invalid sample number " << sampleNo << " of " << tag_assignment.size();
+	throw DataException(oss.str());
+    }
+    return tag_assignment[sampleNo];
 }
 
-const dim_t* TestDomain::borrowSampleReferenceIDs(int functionSpaceType) const
+const DataTypes::dim_t* TestDomain::borrowSampleReferenceIDs(int functionSpaceType) const
 {
     return m_samplerefids;
 }
@@ -214,24 +241,17 @@ bool TestDomain::canTag(int functionSpaceCode) const
 
 int TestDomain::getNumberOfTagsInUse(int functionSpaceCode) const
 {
-    // this is not arbitrary.
-    // It allows us to report that the default tag is in use
-    return 1;
-}
-
-const int* TestDomain::borrowListOfTagsInUse(int functionSpaceCode) const
-{
-    return defaultList;
+    return mytags.size();
 }
 
 escript::Data TestDomain::getX() const
 {
     if (m_dpsize<2) {
         Data res(0,DataTypes::scalarShape,FunctionSpace( getPtr(), getDefaultCode()),true);
-        DataTypes::ValueType& vec=res.getReady()->getVectorRW();
-        for (dim_t i=0; i<m_samples; ++i) {
+        DataTypes::RealVectorType& vec=res.getReady()->getVectorRW();
+        for (DataTypes::dim_t i=0; i<m_samples; ++i) {
             for (int j=0; j<m_dpps; ++j) {
-                vec[i*m_dpps+j]=i+(1.0*j)/m_dpps;
+                vec[i*m_dpps+j]=m_originsample+i+(1.0*j)/m_dpps;
             }
         }
         return res;
@@ -239,13 +259,13 @@ escript::Data TestDomain::getX() const
     DataTypes::ShapeType p;
     p.push_back(m_dpsize);
     Data res(0,p,FunctionSpace( getPtr(), getDefaultCode()),true);
-    DataTypes::ValueType& vec=res.getReady()->getVectorRW();
+    DataTypes::RealVectorType& vec=res.getReady()->getVectorRW();
     double majorstep=double(1)/m_dpps;
     double minorstep=majorstep*0.9/m_dpsize;
-    for (dim_t i=0; i<m_samples; ++i) {
+    for (DataTypes::dim_t i=0; i<m_samples; ++i) {
         for (int j=0; j<m_dpps; ++j) {
             for (int k=0; k<m_dpsize; ++k) {
-                vec[i*m_dpsize*m_dpps+j*m_dpsize+k]=i+j*majorstep+k*minorstep;
+                vec[i*m_dpsize*m_dpps+j*m_dpsize+k]=(i+m_originsample)+j*majorstep+k*minorstep;
             }
         }
     }
@@ -258,18 +278,69 @@ escript::Data TestDomain::randomFill(const DataTypes::ShapeType& shape,
     escript::Data towipe(0, shape, what, true);
     // since we just made this object, no sharing is possible and we don't
     // need to check for exclusive write
-    escript::DataTypes::ValueType& dv=towipe.getExpandedVectorReference();
+    escript::DataTypes::RealVectorType& dv=towipe.getExpandedVectorReference();
     const size_t dvsize=dv.size();
-    esysUtils::randomFillArray(seed, &(dv[0]), dvsize);
+    escript::randomFillArray(seed, &(dv[0]), dvsize);
     return towipe;
 }
 
-FunctionSpace getTestDomainFunctionSpace(int dpps, dim_t samples, int dpsize)
+void TestDomain::addUsedTag(int t)
+{
+    for (auto i=mytags.begin();i!=mytags.end();++i)
+    {
+	if (*i==t)
+	{
+	    return;
+	}
+    }
+    mytags.push_back(t);
+}
+
+void TestDomain::clearUsedTags()
+{
+    mytags.clear();
+    mytags.push_back(0);
+}
+
+const int* TestDomain::borrowListOfTagsInUse(int functionSpaceCode) const
+{
+    return &mytags[0];
+}
+
+void TestDomain::assignTags(std::vector<int> t)
+{
+    if (t.size()!=m_totalsamples)
+    {
+	throw DataException("Programming error - Tag vector must be the same size as the number of samples.");
+    }
+    tag_assignment=std::vector<int>(m_samples);
+    for (int i=m_originsample;i<=m_endsample;++i)
+    {
+	tag_assignment[i-m_originsample]=t[i];
+    }
+}
+
+
+void TestDomain::resetTagAssignments()
+{
+    tag_assignment=std::vector<int>(m_samples);
+    for (size_t i=0;i<m_samples;++i)
+    {
+	tag_assignment[i]=0;
+    }
+}
+
+
+FunctionSpace getTestDomainFunctionSpace(int dpps, DataTypes::dim_t samples, int dpsize)
 {
     TestDomain* td=new TestDomain(dpps, samples, dpsize);
     Domain_ptr p=Domain_ptr(td);
     return FunctionSpace(p, td->getDefaultCode());
 }
 
+
+
+
+
 }  // end of namespace
 
diff --git a/escriptcore/src/TestDomain.h b/escriptcore/src/TestDomain.h
index 80cbdd3..09463da 100644
--- a/escriptcore/src/TestDomain.h
+++ b/escriptcore/src/TestDomain.h
@@ -18,8 +18,8 @@
 #define __ESCRIPT_TESTDOMAIN_H__
 
 #include "system_dep.h"
-
 #include "NullDomain.h"
+#include <vector>
 
 namespace escript {
 
@@ -49,7 +49,8 @@ public:
     virtual int getMPIRank() const;
     virtual void MPIBarrier() const;
     virtual bool onMasterProcessor() const;
-    virtual MPI_Comm getMPIComm() const;
+    
+    virtual escript::JMPI getMPI() const;
 
     virtual bool isValidFunctionSpaceType(int functionSpaceType) const;
 
@@ -79,11 +80,11 @@ public:
     virtual int getReducedSolutionCode() const;
     virtual int getDiracDeltaFunctionsCode() const;
 
-    virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
+    virtual std::pair<int,DataTypes::dim_t> getDataShape(int functionSpaceCode) const;
 
-    virtual int getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const;
+    virtual int getTagFromSampleNo(int functionSpaceType, DataTypes::index_t sampleNo) const;
 
-    virtual const dim_t* borrowSampleReferenceIDs(int functionSpaceType) const;
+    virtual const DataTypes::dim_t* borrowSampleReferenceIDs(int functionSpaceType) const;
 
     virtual int getDim() const;
 
@@ -100,16 +101,32 @@ public:
     virtual escript::Data randomFill(const DataTypes::ShapeType& shape,
                                      const FunctionSpace& what, long seed,
                                      const boost::python::tuple& filter) const;
+				     
+    void addUsedTag(int t);
+    void clearUsedTags();
+    void assignTags(std::vector<int> t);
+    void resetTagAssignments();
 
 private:
-    dim_t m_samples;       // number of samples
+    DataTypes::dim_t m_totalsamples;	// samples in all worlds  
+    DataTypes::dim_t m_samples;       // number of samples
+    DataTypes::dim_t m_originsample;
+    DataTypes::dim_t m_endsample;
+
+    
     int m_dpps;            // data points per sample
     int m_dpsize;          // how big are the datapoints?
-    dim_t* m_samplerefids; // sample reference ids
+    DataTypes::dim_t* m_samplerefids; // sample reference ids
+    
+    std::vector<int> mytags;
+    std::vector<int> tag_assignment; 	// which tag is assigned to each sample
+				// to make testing easier, the tags in use list is 
+				// controlled separately
+    escript::JMPI myworld;
 };
 
 ESCRIPT_DLL_API
-FunctionSpace getTestDomainFunctionSpace(int dpps, dim_t samples, int dpsize);
+FunctionSpace getTestDomainFunctionSpace(int dpps, DataTypes::dim_t samples, int dpsize);
 
 } // end of namespace
 
diff --git a/escriptcore/src/TransportProblemException.cpp b/escriptcore/src/TransportProblemException.cpp
deleted file mode 100644
index 3911cd1..0000000
--- a/escriptcore/src/TransportProblemException.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "TransportProblemException.h"
-
-
-using namespace escript;
-
-
-const std::string 
-TransportProblemException::exceptionNameValue("TransportProblemException");
-
-
-const std::string &
-TransportProblemException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/escriptcore/src/TransportProblemException.h b/escriptcore/src/TransportProblemException.h
index 4123860..0f2b2a4 100644
--- a/escriptcore/src/TransportProblemException.h
+++ b/escriptcore/src/TransportProblemException.h
@@ -19,84 +19,16 @@
 #define escript_TransportProblemException_20040608_H
 
 #include "system_dep.h"
-#include "esysUtils/EsysException.h"
+#include "EsysException.h"
 
 namespace escript
 {
 
-  /**
-  \brief
-  TransportProblemException exception class.
-
-  Description:
-  TransportProblemException exception class.
-  The class provides a public function returning the exception name
-  */
-  class TransportProblemException : public esysUtils::EsysException {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    TransportProblemException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    TransportProblemException(const char *cstr) :
-    Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    TransportProblemException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESCRIPT_DLL_API
-    TransportProblemException(const TransportProblemException &other) :
-    Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESCRIPT_DLL_API
-    inline TransportProblemException &
-    operator=(const TransportProblemException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-    /// Destructor
-    ESCRIPT_DLL_API
-    virtual ~TransportProblemException() THROW(NO_ARG) {}
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESCRIPT_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
+class TransportProblemException : public EsysException
+{
+public:
+    TransportProblemException(const std::string& str) : EsysException(str) {}
+};
 
 } // end of namespace
 
diff --git a/escriptcore/src/UnaryFuncs.h b/escriptcore/src/UnaryFuncs.h
deleted file mode 100644
index 002b824..0000000
--- a/escriptcore/src/UnaryFuncs.h
+++ /dev/null
@@ -1,167 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_UnaryFuncs_20041124_H
-#define escript_UnaryFuncs_20041124_H
-#include "system_dep.h"
-
-namespace escript {
-
-#ifndef FP_NAN
-#define FP_NAN IEEE_NaN()
-#endif
-
-#ifndef INFINITY
-#define INFINITY IEEE_Infy()
-#endif
-
-//======================================================================
-
-inline
-double log1p (const double x)
-{
-  volatile double y;
-  y = 1 + x;
-  return log(y) - ((y-1)-x)/y ;
-}
-
-//======================================================================
-
-inline
-float IEEE_NaN()
-{
-   static unsigned char nan[4]={ 0, 0, 0xc0, 0x7f };
-   return *( float *)nan;
-}
-
-//======================================================================
-
-inline
-double IEEE_Infy()
-{
-   static unsigned char infy[8]={ 0, 0, 0, 0, 0, 0, 0xf0, 0x7f };
-   return *( double *)infy;
-}
-
-
-//======================================================================
-
-#if defined (_WIN32) && !defined(__INTEL_COMPILER)
-inline
-double
-acosh_substitute (const double x)
-{
-  if (x > 1.0 / SQRT_DBL_EPSILON)
-    {
-      return log (x) + M_LN2;
-    }
-  else if (x > 2)
-    {
-      return log (2 * x - 1 / (sqrt (x * x - 1) + x));
-    }
-  else if (x > 1)
-    {
-      double t = x - 1;
-      return log1p (t + sqrt (2 * t + t * t));
-    }
-  else if (x == 1)
-    {
-      return 0;
-    }
-  else
-    {
-      return FP_NAN;
-    }
-}
-
-
-//======================================================================
-
-inline
-double
-asinh_substitute (const double x)
-{
-  double a = fabs (x);
-  double s = (x < 0) ? -1 : 1;
-
-  if (a > 1 / SQRT_DBL_EPSILON)
-    {
-      return s * (log (a) + M_LN2);
-    }
-  else if (a > 2)
-    {
-      return s * log (2 * a + 1 / (a + sqrt (a * a + 1)));
-    }
-  else if (a > SQRT_DBL_EPSILON)
-    {
-      double a2 = a * a;
-      return s * log1p (a + a2 / (1 + sqrt (1 + a2)));
-    }
-  else
-    {
-      return x;
-    }
-}
-
-
-//======================================================================
-
-inline
-double
-atanh_substitute (const double x)
-{
-  double a = fabs (x);
-  double s = (x < 0) ? -1 : 1;
-
-  if (a > 1)
-    {
-      return FP_NAN;
-    }
-  else if (a == 1)
-    {
-      return (x < 0) ? -INFINITY : INFINITY;
-    }
-  else if (a >= 0.5)
-    {
-      return s * 0.5 * log1p (2 * a / (1 - a));
-    }
-  else if (a > DBL_EPSILON)
-    {
-      return s * 0.5 * log1p (2 * a + 2 * a * a / (1 - a));
-    }
-  else
-    {
-      return x;
-    }
-}
-#endif  // windows substitutes for stupid microsoft compiler.
-
-
-inline
-double
-fsign(double x)
-{
-  if (x == 0) {
-    return 0;
-  } else {
-    return x/fabs(x);
-  }
-}
-
-}
-
-#endif
diff --git a/escriptcore/src/UnaryOp.h b/escriptcore/src/UnaryOp.h
deleted file mode 100644
index 825f2bf..0000000
--- a/escriptcore/src/UnaryOp.h
+++ /dev/null
@@ -1,87 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_UnaryOp_20040315_H
-#define escript_UnaryOp_20040315_H
-#include "system_dep.h"
-
-#include "DataConstant.h"
-#include "DataTagged.h"
-#include "DataExpanded.h"
-#include "DataTypes.h"
-
-namespace escript {
-
-/**
-   \brief
-   Perform the given unary operation on each data point of the given Data object.
-   Called by Data::unaryOp.
-   Calls DataArrayView::unaryOp.
-   For DataExpanded objects, operation is done in parallel.
-   \param data Input/Output - The data.
-   \param operation Input - The operation to perform.
-*/
-
-template <class UnaryFunction>
-inline
-void
-unaryOp(DataExpanded& data,
-        UnaryFunction operation)
-{
-  int i,j;
-  DataTypes::ValueType::size_type numDPPSample=data.getNumDPPSample();
-  DataTypes::ValueType::size_type numSamples=data.getNumSamples();
-  DataTypes::ValueType& left=data.getVectorRW();
-  const DataTypes::ShapeType& shape=data.getShape();
-  #pragma omp parallel for private(i,j) schedule(static)
-  for (i=0;i<numSamples;i++) {
-    for (j=0;j<numDPPSample;j++) {
-      DataMaths::unaryOp(left,shape,data.getPointOffset(i,j),operation);
-    }
-  }
-}
-
-template <class UnaryFunction>
-inline
-void
-unaryOp(DataTagged& data,
-        UnaryFunction operation)
-{
-  // perform the operation on each tagged value
-  const DataTagged::DataMapType& lookup=data.getTagLookup();
-  DataTagged::DataMapType::const_iterator i;
-  DataTagged::DataMapType::const_iterator lookupEnd=lookup.end();
-  DataTypes::ValueType& left=data.getVectorRW();
-  const DataTypes::ShapeType& shape=data.getShape();
-  for (i=lookup.begin();i!=lookupEnd;i++) {
-    DataMaths::unaryOp(left,shape,i->second,operation);
-  }
-  // perform the operation on the default value
-  DataMaths::unaryOp(left,shape,data.getDefaultOffset(),operation);
-}
-
-template <class UnaryFunction>
-inline
-void
-unaryOp(DataConstant& data,
-        UnaryFunction operation)
-{
-  DataMaths::unaryOp(data.getVectorRW(),data.getShape(),0,operation);
-}
-
-} // end of namespace
-#endif
diff --git a/escriptcore/src/UtilC.h b/escriptcore/src/UtilC.h
deleted file mode 100644
index 5e1b707..0000000
--- a/escriptcore/src/UtilC.h
+++ /dev/null
@@ -1,25 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  escript_UtilC_20040611_H
-#define escript_UtilC_20040611_H
-#include "system_dep.h"
-#include "SolverOptions.h"
-
-#define ESCRIPT_MAX_DATA_RANK 4
-
-#endif
diff --git a/escriptcore/src/Utils.cpp b/escriptcore/src/Utils.cpp
index 58e0f40..5b9e77a 100644
--- a/escriptcore/src/Utils.cpp
+++ b/escriptcore/src/Utils.cpp
@@ -14,24 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#ifndef OVERLORDPATH
-#define OVERLORDPATH ""
-#endif
-
 #include "Data.h"
 #include "DataVector.h"
+#include "FileWriter.h"
 #include "Utils.h"
 
-#include <esysUtils/Esys_MPI.h>
-#include <esysUtils/esysFileWriter.h>
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
 #include <cstring>
 #include <fstream>
 #include <unistd.h>
@@ -40,7 +27,10 @@
 #include <boost/scoped_array.hpp>
 
 namespace bp = boost::python;
-using esysUtils::FileWriter;
+
+#ifndef OVERLORDPATH
+#define OVERLORDPATH ""
+#endif
 
 namespace escript {
 
@@ -194,6 +184,7 @@ int prepareSocket(unsigned short *port, int *key)
     addr.sin_family = AF_INET;
     addr.sin_port = htons(0);
     addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    memset(&addr.sin_zero, 0, sizeof(addr.sin_zero));
 
     if (bind(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
         perror("bind failure");
@@ -314,47 +305,29 @@ int waitForCompletion(int sfd, int key)
 int runMPIProgram(bp::list args)
 {
 #ifdef ESYS_MPI
-    MPI_Comm intercomm;
-    MPI_Info info;
-    int errors;
-    int nargs = bp::extract<int>(args.attr("__len__")());
-    std::string cmd = bp::extract<std::string>(args[0]);
-#ifdef _WIN32
-    char** c_args = new char*[nargs];
-    char* c_cmd = const_cast<char*>(cmd.c_str());;
-    // skip command name in argument list
-    for (int i=1; i<nargs; i++) {
-        cpp_args[i-1]=bp::extract<std::string>(args[i]);
-        c_args[i-1]=const_cast<char*>(cpp_args[i-1].c_str());
-    }
-    MPI_Info_create(&info);
-    MPI_Comm_spawn(c_cmd, c_args, 1, info, 0, MPI_COMM_WORLD, &intercomm, &errors);
-    MPI_Info_free(&info);
-    delete[] c_args;
-
-    return errors;
-#else // not _WIN32
-    char** c_args = new char*[2+nargs];
-    std::vector<std::string> cpp_args(2+nargs);//allow for wrapper, port, and key
-    char c_cmd[] = OVERLORDPATH"escript-overlord";
-    // skip command name in argument list
-    for (int i=1; i<nargs; i++) {
-        cpp_args[i+1] = bp::extract<std::string>(args[i]);
-        c_args[i+1] = const_cast<char*>(cpp_args[i+1].c_str());
-    }
     unsigned short port = 0;
     int key = 0;
     int sock = prepareSocket(&port, &key);
     if (getMPIWorldSum(sock) < 0)
         return -1;
-    c_args[nargs+1] = NULL;
     char portstr[20] = {'\0'}, keystr[20] = {'\0'};
     sprintf(portstr, "%d", port);
     sprintf(keystr, "%d", key);
+
+    int nargs = bp::extract<int>(args.attr("__len__")());
+    // make room for port, key and terminator
+    char** c_args = new char*[nargs + 3];
     c_args[0] = portstr;
     c_args[1] = keystr;
-    c_args[2] = const_cast<char*>(cmd.c_str());
+    std::vector<std::string> cpp_args(nargs);
+    // skip command name in argument list
+    for (int i = 0; i < nargs; i++) {
+        cpp_args[i] = bp::extract<std::string>(args[i]);
+        c_args[i+2] = const_cast<char*>(cpp_args[i].c_str());
+    }
+    c_args[nargs+2] = NULL;
 
+    MPI_Info info;
     MPI_Info_create(&info);
     // force the gmsh process to run on this host as well for network comm
     char hostname[MPI_MAX_PROCESSOR_NAME];
@@ -362,19 +335,21 @@ int runMPIProgram(bp::list args)
     MPI_Get_processor_name(hostname, &temp);
     char hoststr[] = "host"; //for warnings
     MPI_Info_set(info, hoststr, hostname);
+    MPI_Comm intercomm;
+    int errors;
+    char c_cmd[] = OVERLORDPATH"escript-overlord";
     MPI_Comm_spawn(c_cmd, c_args, 1, info, 0, MPI_COMM_WORLD, &intercomm, &errors);
     MPI_Info_free(&info);
     delete[] c_args;
     if (errors != MPI_SUCCESS)
         return errors;
     return getMPIWorldMax(waitForCompletion(sock, key));
-#endif //#ifdef _WIN32/else
 #else //#ifdef ESYS_MPI
     std::string cmd;
     int nargs = bp::extract<int>(args.attr("__len__")());
-    for (int i=0; i<nargs; i++) {
-        cmd+=bp::extract<std::string>(args[i]);
-        cmd+=" ";
+    for (int i = 0; i < nargs; i++) {
+        cmd += bp::extract<std::string>(args[i]);
+        cmd += " ";
     }
     return system(cmd.c_str());
 #endif //#ifdef ESYS_MPI/else
@@ -384,21 +359,21 @@ int runMPIProgram(bp::list args)
 
 double getMachinePrecision()
 {
-    return DBL_EPSILON;
+    return std::numeric_limits<double>::epsilon();
 }
 
 double getMaxFloat()
 {
-    return DBL_MAX;
+    return std::numeric_limits<double>::max();
 }
 
 void MPIBarrierWorld()
 {
 #ifdef ESYS_MPI
-    if (!esysUtils::NoCOMM_WORLD::active()) {
+    if (!NoCOMM_WORLD::active()) {
         MPI_Barrier(MPI_COMM_WORLD );
     } else {
-        throw esysUtils::EsysException("Attempt to use MPI_COMM_WORLD while it is blocked.");
+        throw EsysException("Attempt to use MPI_COMM_WORLD while it is blocked.");
     }
 #endif
 }
@@ -562,7 +537,7 @@ void saveDataCSV(const std::string& filename, bp::dict arg,
     int error = 0;
     try {
         std::vector<int> offset(numdata);
-        std::vector<const DataAbstract::ValueType::value_type*> samples(numdata);
+        std::vector<const DataTypes::real_t*> samples(numdata);
 
         for (int i=0; i<numsamples; ++i) {
             if (!best.ownSample(i)) {
diff --git a/escriptcore/src/WrappedArray.cpp b/escriptcore/src/WrappedArray.cpp
index 7acb8ce..edc5d0b 100644
--- a/escriptcore/src/WrappedArray.cpp
+++ b/escriptcore/src/WrappedArray.cpp
@@ -14,18 +14,23 @@
 *
 *****************************************************************************/
 
-#include <boost/python/tuple.hpp>
+#include "Data.h"
 #include "WrappedArray.h"
 #include "DataException.h"
-#if HAVE_NUMPY_H
+
+#if ESYS_HAVE_NUMPY_H
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <numpy/ndarrayobject.h>
 #endif
 
 #include <iostream>
 
+#include <boost/python/tuple.hpp>
+
 using namespace escript;
 using namespace boost::python;
+using DataTypes::cplx_t;
+using DataTypes::real_t;
 
 namespace
 {
@@ -54,6 +59,69 @@ void checkFeatures(const boost::python::object& obj)
 	}
 }
 
+
+// This should not be called on anything which does
+// not have a __len__
+bool checkForComplex(const boost::python::object& obj)
+{
+    try
+    {
+	int len=extract<int>(obj.attr("__len__")());
+	for (int i=0;i<len;++i)
+	{
+	    const boost::python::object t=obj[i];
+	    bool haslen=false;
+	    try
+            {
+	        extract<int>(t.attr("__len__")());
+	        haslen=true;
+	    }
+	    catch(...)
+	    {
+		PyErr_Clear();
+	    }
+	    	// If it has a length, we dig down
+		// if not, we test for complex
+	    if (haslen)
+	    {
+                if (checkForComplex(t))
+		{
+		    return true;
+		}
+	    }
+	    else
+	    {
+		extract<DataTypes::real_t> er(t);
+		if (!er.check())
+		{
+		    // unfortunately, if this was a numpy object, that check my fail
+		    // even if it should succeed (eg numpy.int64 on python3)
+		    // instead, we will try to call __float__ and see what happens
+		    try
+		    {
+			t.attr("__float__")();
+			return false;			// if this check succeeds it isn't complex
+		    }
+		    catch (...)
+		    {
+			PyErr_Clear();
+			// at this point, we have no apparent way to get a real out so 
+			// we assume it must be complex
+			return true;
+		    }
+		}
+	    }
+	}
+	return false;
+    }
+    catch(...)
+    {
+        PyErr_Clear();
+	return false;
+    }
+    return false;
+}
+
 void getObjShape(const boost::python::object& obj, DataTypes::ShapeType& s)
 {
 	int len=0;
@@ -82,14 +150,25 @@ void getObjShape(const boost::python::object& obj, DataTypes::ShapeType& s)
 }
 
 WrappedArray::WrappedArray(const boost::python::object& obj_in)
-:obj(obj_in)
+:obj(obj_in),converted(false),iscomplex(false),scalar_r(nan("")),scalar_c(nan(""))
 {
-	dat=0;
+	dat_r=0;
+	dat_c=0;
 	// First we check for scalars
 	try
 	{
-	   double v=extract<double>(obj_in);
-	   m_scalar=v;
+	   extract<DataTypes::cplx_t> ec(obj_in);
+	   extract<real_t> er(obj_in);
+	   if (er.check())		// check for real_t first because complex will fail this
+	   {
+	      scalar_r=er();
+	   }
+	   else
+	   {
+	      scalar_c=ec();
+	      iscomplex=true;
+	     
+	   }
 	   rank=0;
 	   return;
 	} 
@@ -99,8 +178,19 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 	}
 	try
 	{
-	   double v=extract<double>(obj_in[make_tuple()]);
-	   m_scalar=v;
+	   const boost::python::object obj_in_t=obj_in[make_tuple()];
+	   extract<DataTypes::cplx_t> ec(obj_in_t);
+	   extract<real_t> er(obj_in_t);
+	   if (er.check())
+	   {	     
+	      scalar_r=er();
+	     
+	   }
+	   else
+	   {
+	      scalar_c=ec();
+	      iscomplex=true;
+	   }	   
 	   rank=0;
 	   return;
 	} 
@@ -110,12 +200,14 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 	}
 
 
-	m_scalar=0;
+	scalar_c=0;
+	scalar_r=0;
 	checkFeatures(obj_in);
 	getObjShape(obj,shape);
 	rank=shape.size();
+	iscomplex=checkForComplex(obj_in);
 
-#if HAVE_NUMPY_H
+#if ESYS_HAVE_NUMPY_H
 	// if obj is a numpy array it is much faster to copy the array through the
 	// __array_struct__ interface instead of extracting single values from the
 	// components via getElt(). For this to work we check below that
@@ -125,10 +217,19 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 	try
 	{
 		object o = (extract<object>(obj.attr("__array_struct__")));
+#ifdef ESPYTHON3
+		if (PyCapsule_CheckExact(o.ptr()))
+#else
 		if (PyCObject_Check(o.ptr()))
+#endif
 		{
 			PyObject* cobj=(PyObject*)o.ptr();
+#ifdef ESPYTHON3
+            const char* name = PyCapsule_GetName(cobj);
+			PyArrayInterface* arr=(PyArrayInterface*)PyCapsule_GetPointer(cobj, name);
+#else
 			PyArrayInterface* arr=(PyArrayInterface*)PyCObject_AsVoidPtr(cobj);
+#endif
 #ifndef NPY_1_7_API_VERSION
   #define NPY_ARRAY_IN_ARRAY NPY_IN_ARRAY
   #define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
@@ -144,9 +245,9 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 
 				if (arr->typekind == 'f')
 				{
-					if (arr->itemsize==sizeof(double))
+					if (arr->itemsize==sizeof(real_t))
 					{
-						convertNumpyArray<double>((const double*)arr->data, strides);
+						convertNumpyArray<real_t>((const real_t*)arr->data, strides);
 					}
 					else if (arr->itemsize==sizeof(float))
 			   		{
@@ -175,6 +276,15 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 						convertNumpyArray<unsigned long>((const unsigned long*)arr->data, strides);
 					}
 				}
+				else if (arr->typekind == 'c')
+				{
+					if (arr->itemsize==sizeof(cplx_t))
+				   	{
+						convertNumpyArrayC<DataTypes::cplx_t>((const cplx_t*)arr->data, strides);
+						iscomplex=true;
+					}
+					// not accomodating other types of complex values
+				}				
 			}
 		}
 	} catch (...)
@@ -184,20 +294,80 @@ WrappedArray::WrappedArray(const boost::python::object& obj_in)
 #endif
 }
 
+
+template<typename T>
+void WrappedArray::convertNumpyArrayC(const T* array, const std::vector<int>& strides) const
+{
+	// this method is only called by the constructor above which does the
+	// necessary checks and initialisations
+	int size=DataTypes::noValues(shape);
+	dat_c=new cplx_t[size];
+	switch (rank)
+	{
+		case 1:
+#pragma omp parallel for
+			for (int i=0;i<shape[0];i++)
+			{
+				dat_c[i]=array[i*strides[0]];
+			}
+		break;
+		case 2:
+#pragma omp parallel for
+			for (int i=0;i<shape[0];i++)
+			{
+				for (int j=0;j<shape[1];j++)
+				{
+					dat_c[DataTypes::getRelIndex(shape,i,j)]=array[i*strides[0]+j*strides[1]];
+				}
+			}
+		break;
+		case 3:
+#pragma omp parallel for
+			for (int i=0;i<shape[0];i++)
+			{
+				for (int j=0;j<shape[1];j++)
+				{
+					for (int k=0;k<shape[2];k++)
+					{
+						dat_c[DataTypes::getRelIndex(shape,i,j,k)]=array[i*strides[0]+j*strides[1]+k*strides[2]];
+					}
+				}
+			}
+		break;
+		case 4:
+#pragma omp parallel for
+			for (int i=0;i<shape[0];i++)
+			{
+				for (int j=0;j<shape[1];j++)
+				{
+					for (int k=0;k<shape[2];k++)
+					{
+						for (int m=0;m<shape[3];m++)
+						{
+							dat_c[DataTypes::getRelIndex(shape,i,j,k,m)]=array[i*strides[0]+j*strides[1]+k*strides[2]+m*strides[3]];
+						}
+					}
+				}
+			}
+		break;
+	}
+}
+
+
 template<typename T>
 void WrappedArray::convertNumpyArray(const T* array, const std::vector<int>& strides) const
 {
 	// this method is only called by the constructor above which does the
 	// necessary checks and initialisations
 	int size=DataTypes::noValues(shape);
-	dat=new double[size];
+	dat_r=new real_t[size];
 	switch (rank)
 	{
 		case 1:
 #pragma omp parallel for
 			for (int i=0;i<shape[0];i++)
 			{
-				dat[i]=array[i*strides[0]];
+				dat_r[i]=array[i*strides[0]];
 			}
 		break;
 		case 2:
@@ -206,7 +376,7 @@ void WrappedArray::convertNumpyArray(const T* array, const std::vector<int>& str
 			{
 				for (int j=0;j<shape[1];j++)
 				{
-					dat[DataTypes::getRelIndex(shape,i,j)]=array[i*strides[0]+j*strides[1]];
+					dat_r[DataTypes::getRelIndex(shape,i,j)]=array[i*strides[0]+j*strides[1]];
 				}
 			}
 		break;
@@ -218,7 +388,7 @@ void WrappedArray::convertNumpyArray(const T* array, const std::vector<int>& str
 				{
 					for (int k=0;k<shape[2];k++)
 					{
-						dat[DataTypes::getRelIndex(shape,i,j,k)]=array[i*strides[0]+j*strides[1]+k*strides[2]];
+						dat_r[DataTypes::getRelIndex(shape,i,j,k)]=array[i*strides[0]+j*strides[1]+k*strides[2]];
 					}
 				}
 			}
@@ -233,7 +403,7 @@ void WrappedArray::convertNumpyArray(const T* array, const std::vector<int>& str
 					{
 						for (int m=0;m<shape[3];m++)
 						{
-							dat[DataTypes::getRelIndex(shape,i,j,k,m)]=array[i*strides[0]+j*strides[1]+k*strides[2]+m*strides[3]];
+							dat_r[DataTypes::getRelIndex(shape,i,j,k,m)]=array[i*strides[0]+j*strides[1]+k*strides[2]+m*strides[3]];
 						}
 					}
 				}
@@ -242,14 +412,15 @@ void WrappedArray::convertNumpyArray(const T* array, const std::vector<int>& str
 	}
 }
 
-void WrappedArray::convertArray() const
+
+void WrappedArray::convertArrayR() const
 {
-	if ((dat!=0) || (rank<=0) || (rank>4))	// checking illegal rank here to avoid memory issues later
+	if ((converted) || (rank<=0) || (rank>4))	// checking illegal rank here to avoid memory issues later
 	{					// yes the failure is silent here but not doing the copy 
 	    return;				// will just cause an error to be raised later
 	}
 	int size=DataTypes::noValues(shape);
-	double* tdat=new double[size];
+	real_t* tdat=new real_t[size];
 	switch (rank)
 	{
 	case 1: for (int i=0;i<shape[0];i++)
@@ -294,15 +465,90 @@ void WrappedArray::convertArray() const
 		;  // do nothing
 		// can't happen. We've already checked the bounds above
 	}
-	dat=tdat;
-}
+	dat_r=tdat;    
+	converted=true;
+}  
 
-WrappedArray::~WrappedArray()
+
+void WrappedArray::convertArrayC() const
 {
-	if (dat!=0)
+	if ((converted) || (rank<=0) || (rank>4))	// checking illegal rank here to avoid memory issues later
+	{					// yes the failure is silent here but not doing the copy 
+	    return;				// will just cause an error to be raised later
+	}
+	int size=DataTypes::noValues(shape);
+	cplx_t* tdat=new cplx_t[size];
+	switch (rank)
 	{
-	    delete[] dat;
+	case 1: for (int i=0;i<shape[0];i++)
+		{
+			tdat[i]=getElt(i);
+		}
+		break;
+	case 2: for (int i=0;i<shape[0];i++)
+		{
+		    for (int j=0;j<shape[1];j++)
+		    {
+			tdat[DataTypes::getRelIndex(shape,i,j)]=getElt(i,j);
+		    }
+		}
+		break;
+	case 3: for (int i=0;i<shape[0];i++)
+		{
+		    for (int j=0;j<shape[1];j++)
+		    {
+			for (int k=0;k<shape[2];k++)
+			{
+			    tdat[DataTypes::getRelIndex(shape,i,j,k)]=getElt(i,j,k);
+			}
+		    }
+		}
+		break;
+	case 4: for (int i=0;i<shape[0];i++)
+		{
+		    for (int j=0;j<shape[1];j++)
+		    {
+			for (int k=0;k<shape[2];k++)
+			{
+			    for (int m=0;m<shape[3];m++)
+			    {
+			    	tdat[DataTypes::getRelIndex(shape,i,j,k,m)]=getElt(i,j,k,m);
+			    }
+			}
+		    }
+		}
+		break;
+	default:
+		;  // do nothing
+		// can't happen. We've already checked the bounds above
 	}
+	dat_c=tdat;    
+	converted=true;
+}  
+
+
+void WrappedArray::convertArray() const
+{
+    if (iscomplex)
+    {
+	convertArrayC();
+    }
+    else
+    {
+	convertArrayR();
+    }
+}
+
+WrappedArray::~WrappedArray()
+{
+    if (dat_r!=0)
+    {
+	delete[] dat_r;
+    }
+    if (dat_c!=0)
+    {
+	delete[] dat_c;
+    }
 }
 
 
diff --git a/escriptcore/src/WrappedArray.h b/escriptcore/src/WrappedArray.h
index 3ddd591..736c622 100644
--- a/escriptcore/src/WrappedArray.h
+++ b/escriptcore/src/WrappedArray.h
@@ -22,6 +22,8 @@
 #include "system_dep.h"
 #include "DataTypes.h"
 #include "boost/python/extract.hpp"
+#include "boost/python/object.hpp"
+#include <complex>
 
 namespace escript
 {
@@ -33,21 +35,43 @@ public:
 	~WrappedArray();
 	unsigned int getRank() const;
 	const DataTypes::ShapeType& getShape() const;
-	double getElt() const;
-	double getElt(unsigned int i) const;
-	double getElt(unsigned int i, unsigned int j) const;
-	double getElt(unsigned int i, unsigned int j, unsigned int k) const;
-	double getElt(unsigned int i, unsigned int j, unsigned int k, unsigned int m) const;
+	bool isComplex() const;
+	DataTypes::real_t getElt() const;
+	DataTypes::real_t getElt(unsigned int i) const;
+	DataTypes::real_t getElt(unsigned int i, unsigned int j) const;
+	DataTypes::real_t getElt(unsigned int i, unsigned int j, unsigned int k) const;
+	DataTypes::real_t getElt(unsigned int i, unsigned int j, unsigned int k, unsigned int m) const;
+	
+	DataTypes::cplx_t getEltC() const;
+	DataTypes::cplx_t getEltC(unsigned int i) const;
+	DataTypes::cplx_t getEltC(unsigned int i, unsigned int j) const;
+	DataTypes::cplx_t getEltC(unsigned int i, unsigned int j, unsigned int k) const;
+	DataTypes::cplx_t getEltC(unsigned int i, unsigned int j, unsigned int k, unsigned int m) const;
+	
+	
 	void convertArray() const;
 private:
+	void convertArrayR() const;
+	void convertArrayC() const;
 	template<typename T> void convertNumpyArray(const T* array, const std::vector<int>& strides) const;
+	template<typename T> void convertNumpyArrayC(const T* array, const std::vector<int>& strides) const;
 	const boost::python::object& obj;
-	int rank;
+	int rank;	
+	mutable bool converted;		// has the array been converted to a C array
+	bool iscomplex;		// is the wrapped array storing complex values?	
 	escript::DataTypes::ShapeType shape;
-	double m_scalar;
-	mutable double* dat;
+	DataTypes::real_t scalar_r;
+	DataTypes::cplx_t scalar_c;
+	mutable DataTypes::real_t* dat_r;			// real data
+	mutable DataTypes::cplx_t* dat_c;	// complex data   - only one of these members should be used
 };
 
+
+inline bool WrappedArray::isComplex() const
+{
+    return iscomplex;
+}
+
 inline unsigned int 
 WrappedArray::getRank() const
 {
@@ -60,40 +84,122 @@ WrappedArray::getShape() const
 	return shape;
 }
 
-inline double
+inline DataTypes::real_t
 WrappedArray::getElt() const
 {
-	return m_scalar;
+    if (iscomplex)
+    {
+      return nan("");
+    }  
+    return scalar_r;
 }
 
 
-inline double
+inline DataTypes::real_t
 WrappedArray::getElt(unsigned int i) const
 {  // __float__ added to deal with numpy. If this causes problems we may have to register a custom converter
-        return (dat!=0)?dat[i]:(boost::python::extract<double>(obj[i].attr("__float__")()));	
+    if (iscomplex)
+    {
+      return nan("");
+    }
+    return (dat_r!=0)?dat_r[i]:(boost::python::extract<DataTypes::real_t>(obj[i].attr("__float__")()));	
 }
 
 inline
-double 
+DataTypes::real_t 
 WrappedArray::getElt(unsigned int i, unsigned int j) const
 {
-	return (dat!=0)?dat[DataTypes::getRelIndex(shape,i,j)]:(boost::python::extract<double>(obj[i][j].attr("__float__")()));
+    if (iscomplex)
+    {
+      return nan("");
+    }  
+    return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j)]:(boost::python::extract<DataTypes::real_t>(obj[i][j].attr("__float__")()));
 }
 
 inline
-double 
+DataTypes::real_t 
 WrappedArray::getElt(unsigned int i, unsigned int j, unsigned int k) const
 {
-	return (dat!=0)?dat[DataTypes::getRelIndex(shape,i,j,k)]:(boost::python::extract<double>(obj[i][j][k].attr("__float__")()));
+    if (iscomplex)
+    {
+      return nan("");
+    }    
+    return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j,k)]:(boost::python::extract<DataTypes::real_t>(obj[i][j][k].attr("__float__")()));
 }
 
 inline
-double 
+DataTypes::real_t 
 WrappedArray::getElt(unsigned int i, unsigned int j, unsigned int k, unsigned int m) const
 {
-	return (dat!=0)?dat[DataTypes::getRelIndex(shape,i,j,k,m)]:(boost::python::extract<double>(obj[i][j][k][m].attr("__float__")()));
+    if (iscomplex)
+    {
+      return nan("");
+    }  
+    return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j,k,m)]:(boost::python::extract<DataTypes::real_t>(obj[i][j][k][m].attr("__float__")()));
+}
+
+
+
+
+
+inline DataTypes::cplx_t
+WrappedArray::getEltC() const
+{
+    if (!iscomplex)
+    {
+      return scalar_r;
+    }  
+    return scalar_c;
+}
+
+
+inline DataTypes::cplx_t
+WrappedArray::getEltC(unsigned int i) const
+{
+    if (!iscomplex)	// let's try to get a real value out instead
+    {
+      return (dat_r!=0)?dat_r[i]:(boost::python::extract<DataTypes::real_t>(obj[i]));
+    }
+    return (dat_c!=0)?dat_c[i]:(boost::python::extract<DataTypes::cplx_t>(obj[i]));	// don't know if this will work with numpy	
 }
 
+inline
+DataTypes::cplx_t 
+WrappedArray::getEltC(unsigned int i, unsigned int j) const
+{
+    if (!iscomplex)
+    {
+       return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j)]:(boost::python::extract<DataTypes::real_t>(obj[i][j]));
+    }  
+    return (dat_c!=0)?dat_c[DataTypes::getRelIndex(shape,i,j)]:(boost::python::extract<DataTypes::cplx_t>(obj[i][j]));
+}
+
+inline
+DataTypes::cplx_t 
+WrappedArray::getEltC(unsigned int i, unsigned int j, unsigned int k) const
+{
+    if (!iscomplex)
+    {
+      return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j,k)]:(boost::python::extract<DataTypes::real_t>(obj[i][j][k]));
+    }    
+    return (dat_c!=0)?dat_c[DataTypes::getRelIndex(shape,i,j,k)]:(boost::python::extract<DataTypes::cplx_t>(obj[i][j][k]));
+}
+
+inline
+DataTypes::cplx_t 
+WrappedArray::getEltC(unsigned int i, unsigned int j, unsigned int k, unsigned int m) const
+{
+    if (!iscomplex)
+    {
+      return (dat_r!=0)?dat_r[DataTypes::getRelIndex(shape,i,j,k,m)]:(boost::python::extract<DataTypes::real_t>(obj[i][j][k][m]));
+    }  
+    return (dat_c!=0)?dat_c[DataTypes::getRelIndex(shape,i,j,k,m)]:(boost::python::extract<DataTypes::cplx_t>(obj[i][j][k][m]));
+}
+
+
+
+
+
 }
 
 #endif
diff --git a/escriptcore/src/escriptcpp.cpp b/escriptcore/src/escriptcpp.cpp
index d29561c..be4c98f 100644
--- a/escriptcore/src/escriptcpp.cpp
+++ b/escriptcore/src/escriptcpp.cpp
@@ -14,45 +14,34 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Data.h"
-#include "FunctionSpace.h"
-#include "FunctionSpaceFactory.h"
-#include "DataFactory.h"
 #include "AbstractContinuousDomain.h"
-#include "AbstractDomain.h"
-#include "Utils.h"
+#include "AbstractReducer.h"
 #include "AbstractSystemMatrix.h"
 #include "AbstractTransportProblem.h"
+#include "Data.h"
+#include "DataFactory.h"
 #include "DataVector.h"
-#include "esysUtils/Esys_MPI.h"
 #include "EscriptParams.h"
-#include "TestDomain.h"
-#include "SubWorld.h"
-#include "SplitWorld.h"
-#include "AbstractReducer.h"
+#include "ExceptionTranslators.h"
+#include "FunctionSpace.h"
+#include "FunctionSpaceFactory.h"
 #include "MPIDataReducer.h"
 #include "MPIScalarReducer.h"
 #include "NonReducedVariable.h"
 #include "SolverOptions.h"
-#include "SolverOptionsException.h"
-
-#include "esysUtils/blocktimer.h"
-
-#include "esysUtils/esysExceptionTranslator.h"
+#include "SplitWorld.h"
+#include "SubWorld.h"
+#include "TestDomain.h"
+#include "Utils.h"
 
-#include <boost/version.hpp>
 #include <boost/python.hpp>
-#include <boost/python/module.hpp>
 #include <boost/python/def.hpp>
+#include <boost/python/errors.hpp>
+#include <boost/python/module.hpp>
 #include <boost/python/object.hpp>
 #include <boost/python/tuple.hpp>
 #include <boost/smart_ptr.hpp>
 #include <boost/version.hpp>
-#include <boost/python/errors.hpp>
 
 using namespace boost::python;
 
@@ -117,18 +106,19 @@ bool block_cmp_domains(const escript::AbstractDomain&, boost::python::object o)
     boost::python::throw_error_already_set();   
     return false;
 }
-  
 }
 
 BOOST_PYTHON_MODULE(escriptcpp)
 {
-
-  #if BOOST_VERSION >= 103500
+#if BOOST_VERSION >= 103500
 // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
-  docstring_options docopt(true,true,false);
-  #endif
+    docstring_options docopt(true,true,false);
+#endif
+
+    scope().attr("__doc__") = "To use this module, please import esys.escript";
 
-  scope().attr("__doc__") = "To use this module, please import esys.escript";      
+    // register escript's default translators
+    REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS;
 
 /* begin SubWorld things */
 
@@ -165,12 +155,7 @@ BOOST_PYTHON_MODULE(escriptcpp)
   def("setNumberOfThreads",escript::setNumberOfThreads,"Use of this method is strongly discouraged.");
   def("getNumberOfThreads",escript::getNumberOfThreads,"Return the maximum number of threads"
         " available to OpenMP.");
-  def("releaseUnusedMemory",escript::releaseUnusedMemory);
-  def("blocktimer_initialize",blocktimer_initialize);
-  def("blocktimer_reportSortByName",blocktimer_reportSortByName);
-  def("blocktimer_reportSortByTime",blocktimer_reportSortByTime);
-  def("blocktimer_increment",blocktimer_increment);
-  def("blocktimer_time",blocktimer_time);
+  def("releaseUnusedMemory",escript::DataTypes::releaseUnusedMemory);
   def("getVersion",escript::getSvnVersion,"This method will only report accurate version numbers for clean checkouts.");
   def("printParallelThreadCounts",escript::printParallelThreadCnt);
   def("getMPISizeWorld",escript::getMPISizeWorld,"Return number of MPI processes in the job.");
@@ -456,12 +441,9 @@ args("arg"), "assigns new location to the domain\n\n"
   //
   // Interface for Data
   //
-  class_<escript::Data>("Data"/*,shared_ptr<Data>*/, "Represents a collection of datapoints. It is used to store the values of a function. For more details please consult the c++ class documentation.",init<>() )
+  class_<escript::Data>("Data"/*,shared_ptr<Data>*/, "Represents a collection of datapoints. It is used to store the values of a function. For more details please consult the c++ class documentation.",init<>())
     // various constructors for Data objects
-    .def(init<const object&, optional<const escript::FunctionSpace&, bool> >(args("value","what","expand")))
-    .def(init<const double, const tuple&, optional<const escript::FunctionSpace&, bool> >(args("value","shape","what","expand")))
-    .def(init<const escript::Data&, const escript::FunctionSpace&>(args("value","what")))
-    .def(init<const escript::Data&>())
+    .def(init<object, optional<object, object, object>>(args("value", "p2", "p3", "p4")))    
     // Note for Lutz, Need to specify the call policy in order to return a
     // reference. In this case return_internal_reference.
     .def("__str__",&escript::Data::toString)
@@ -517,9 +499,11 @@ args("arg"), "assigns new location to the domain\n\n"
         ":return: True if this ``Data`` is lazy.")
     .def("isReady",&escript::Data::isReady,":rtype: ``bool``\n"
         ":return: True if this ``Data`` is not lazy.")
+    .def("isComplex", &escript::Data::isComplex,":rtype: ``bool``\n"
+	":return: True if this ``Data`` stores complex values.")
     .def("expand",&escript::Data::expand,"Convert the data to expanded representation if it is not expanded already.")
     .def("hasNaN",&escript::Data::hasNaN,"Returns return true if data contains NaN.")
-    .def("replaceNaN",&escript::Data::replaceNaN,args("value"),"Replaces NaN values with value")
+    .def("replaceNaN",&escript::Data::replaceNaNPython,args("value"),"Replaces NaN values with value")
     .def("tag",&escript::Data::tag,"Convert data to tagged representation if it is not already tagged or expanded")
     .def("resolve",&escript::Data::resolve,"Convert the data to non-lazy representation.")
     .def("copy",&escript::Data::copy,args("other"),"Make this object a copy of ``other``\n"
@@ -612,13 +596,17 @@ args("arg"), "assigns new location to the domain\n\n"
         ":param check_boundaries: If True, an exception will the thrown if the data object contains values"
         "outside the range given by ``in``.\n"
     )	 
-    .def("minGlobalDataPoint",&escript::Data::minGlobalDataPoint,"Please consider using getInfLocator() from pdetools instead.")
-    .def("maxGlobalDataPoint",&escript::Data::maxGlobalDataPoint, "Please consider using getSupLocator() from pdetools instead.")
+    .def("internal_minGlobalDataPoint",&escript::Data::minGlobalDataPoint,"Please consider using getInfLocator() from pdetools instead.")
+    .def("internal_maxGlobalDataPoint",&escript::Data::maxGlobalDataPoint, "Please consider using getSupLocator() from pdetools instead.")
     .def("getTagNumber",&escript::Data::getTagNumber,args("dpno"),"Return tag number for the specified datapoint\n\n"
         ":rtype: int\n"
         ":param dpno: datapoint number\n"
         ":type dpno: int")
     // Unary functions for Data
+    .def("conjugate", &escript::Data::conjugate)
+    .def("real", &escript::Data::real)
+    .def("imag", &escript::Data::imag)
+    .def("promote", &escript::Data::complicate)
     .def("_interpolate",&escript::Data::interpolate)
     .def("_grad",&escript::Data::gradOn)
     .def("_grad",&escript::Data::grad)
@@ -653,7 +641,9 @@ args("arg"), "assigns new location to the domain\n\n"
     .def("_log",&escript::Data::log)
     .def("_sign",&escript::Data::sign)
     .def("_symmetric",&escript::Data::symmetric)
-    .def("_nonsymmetric",&escript::Data::nonsymmetric)
+    .def("_antisymmetric",&escript::Data::antisymmetric)
+    .def("_hermitian",&escript::Data::hermitian)
+    .def("_antihermitian",&escript::Data::antihermitian)    
     .def("_trace",&escript::Data::trace)
     .def("_swap_axes",&escript::Data::swapaxes)
     .def("_eigenvalues",&escript::Data::eigenvalues)
@@ -948,8 +938,6 @@ args("source", "q", "r","factor"),
     .value("CUSP", escript::SO_PACKAGE_CUSP)
     .value("MKL", escript::SO_PACKAGE_MKL)
     .value("PASO", escript::SO_PACKAGE_PASO)
-    .value("PASTIX", escript::SO_PACKAGE_PASTIX)
-    .value("SUPER_LU", escript::SO_PACKAGE_SUPER_LU)
     .value("TRILINOS", escript::SO_PACKAGE_TRILINOS)
     .value("UMFPACK", escript::SO_PACKAGE_UMFPACK)
 
@@ -959,6 +947,10 @@ args("source", "q", "r","factor"),
     .value("CHOLEVSKY", escript::SO_METHOD_CHOLEVSKY)
     .value("CR", escript::SO_METHOD_CR)
     .value("DIRECT", escript::SO_METHOD_DIRECT)
+    .value("DIRECT_MUMPS", escript::SO_METHOD_DIRECT_MUMPS)
+    .value("DIRECT_PARDISO", escript::SO_METHOD_DIRECT_PARDISO)
+    .value("DIRECT_SUPERLU", escript::SO_METHOD_DIRECT_SUPERLU)
+    .value("DIRECT_TRILINOS", escript::SO_METHOD_DIRECT_TRILINOS)
     .value("GMRES", escript::SO_METHOD_GMRES)
     .value("HRZ_LUMPING", escript::SO_METHOD_HRZ_LUMPING)
     .value("ITERATIVE", escript::SO_METHOD_ITERATIVE)
@@ -1007,6 +999,17 @@ args("source", "q", "r","factor"),
 
 
   class_<escript::SolverBuddy, escript::SB_ptr >("SolverBuddy","",init<>())
+    .def("setTrilinosParameter", &escript::SolverBuddy::setTrilinosParameter,
+            "Sets a Trilinos preconditioner/solver parameter.\n\n"
+        ":note Escript does not check for validity of the parameter name\n"
+        "(e.g. spelling mistakes). Parameters are passed 1:1 to escript's\n"
+        "Trilinos wrapper and from there to the relevant Trilinos package.\n"
+        "See the relevant Trilinos documentation for valid parameter strings\n"
+        "and values."
+        ":note This method does nothing in a non-Trilinos build.")
+    .def("getTrilinosParameters", &escript::SolverBuddy::getTrilinosParameters,
+            "Returns a dictionary of set Trilinos parameters.\n\n"
+        ":note This method returns an empty dictionary in a non-Trilinos build.")
     .def("getSummary", &escript::SolverBuddy::getSummary,"Returns a string reporting the current settings")
     .def("__str__", &escript::SolverBuddy::getSummary)
     .def("getName", &escript::SolverBuddy::getName, args("key"),"Returns the name of a given key\n\n"
@@ -1014,7 +1017,7 @@ args("source", "q", "r","factor"),
     .def("resetDiagnostics", &escript::SolverBuddy::resetDiagnostics, args("all")=false,"Resets the diagnostics\n\n"
         ":param all: if ``all`` is ``True`` all diagnostics including accumulative counters are reset.\n"
         ":type all: ``bool``")
-    .def("_updateDiagnostics", &escript::SolverBuddy::updateDiagnostics, args("key", "value"),"Updates diagnostic information\n\n"
+    .def("_updateDiagnostics", &escript::SolverBuddy::updateDiagnosticsPy, args("key", "value"),"Updates diagnostic information\n\n"
         ":param name: name of  diagnostic information\n"
         ":type name: ``str`` in the list 'num_iter', 'num_level',\n"
         "'num_inner_iter', 'time', 'set_up_time', 'net_time',\n"
@@ -1076,10 +1079,10 @@ args("source", "q", "r","factor"),
         ":rtype: in the list `DEFAULT`, `DIRECT`, `CHOLEVSKY`, `PCG`, `CR`, `CGS`, `BICGSTAB`, `GMRES`, `PRES20`, `ROWSUM_LUMPING`, `HRZ_LUMPING`, `MINRES`, `ITERATIVE`, `NONLINEAR_GMRES`, `TFQMR`")
     .def("setPackage", &escript::SolverBuddy::setPackage, args("package"),"Sets the solver package to be used as a solver.\n\n"
         ":param package: key of the solver package to be used.\n"
-        ":type package: in `DEFAULT`, `PASO`, `CUSP`, `SUPER_LU`, `PASTIX`, `MKL`, `UMFPACK`, `TRILINOS`\n"
+        ":type package: in `DEFAULT`, `PASO`, `CUSP`, `MKL`, `UMFPACK`, `TRILINOS`\n"
         ":note: Not all packages are support on all implementation. An exception may be thrown on some platforms if a particular package is requested.")
     .def("getPackage", &escript::SolverBuddy::getPackage,"Returns the solver package key\n\n"
-        ":rtype: in the list `DEFAULT`, `PASO`, `CUSP`, `SUPER_LU`, `PASTIX`, `MKL`, `UMFPACK`, `TRILINOS`")
+        ":rtype: in the list `DEFAULT`, `PASO`, `CUSP`, `MKL`, `UMFPACK`, `TRILINOS`")
     .def("setSolverTarget", &escript::SolverBuddy::setSolverTarget, args("target"),"Sets the solver target to be used.\n\n"
         ":param target: key of the solver target to be used.\n"
         ":type target: in `TARGET_CPU`, `TARGET_GPU`\n")
@@ -1181,6 +1184,12 @@ args("source", "q", "r","factor"),
         ":note: RILU with a relaxation factor 0 is identical to ILU0")
     .def("getRelaxationFactor", &escript::SolverBuddy::getRelaxationFactor,"Returns the relaxation factor used to add dropped elements in RILU to the main diagonal.\n\n"
         ":rtype: ``float``")
+    .def("isComplex", &escript::SolverBuddy::isComplex,"Checks if the coefficient matrix is set to be complex-valued.\n\n"
+        ":return: True if a complex-valued PDE is indicated, False otherwise\n"
+        ":rtype: ``bool``")
+    .def("setComplex", &escript::SolverBuddy::setComplex, args("complex"),"Sets the complex flag for the coefficient matrix to ``flag``.\n\n"
+        ":param flag: If True, the complex flag is set otherwise reset.\n"
+        ":type flag: ``bool``")
     .def("isSymmetric", &escript::SolverBuddy::isSymmetric,"Checks if symmetry of the coefficient matrix is indicated.\n\n"
         ":return: True if a symmetric PDE is indicated, False otherwise\n"
         ":rtype: ``bool``")
@@ -1260,37 +1269,40 @@ args("source", "q", "r","factor"),
         ":type method: in `CRANK_NICOLSON`, `BACKWARD_EULER`, `LINEAR_CRANK_NICOLSON`");
 
 
-  // Functions to modify global parameters
-  def("setEscriptParamInt",escript::setEscriptParamInt,
+  // Functions to get/modify global parameters/features
+  def("setEscriptParamInt", escript::setEscriptParamInt,
       (arg("name"), arg("value")=0), "Modify the value of an escript tuning parameter\n\n"
         ":param name:\n"
         ":type name: ``string``\n"
         ":param value:\n"
         ":type value: ``int``");
-  def("getEscriptParamInt",escript::getEscriptParamInt,
+
+  def("getEscriptParamInt", escript::getEscriptParamInt,
       (arg("name"),arg("sentinel")=0), "Read the value of an escript tuning parameter\n\n"
         ":param name: parameter to lookup\n"
         ":type name: ``string``\n"
         ":param sentinel: Value to be returned if ``name`` is not a known parameter\n"
         ":type sentinel: ``int``");
-  def("listEscriptParams",escript::listEscriptParams,":return: A list of pairs (p,d) where p is the name of a parameter for escript and d is a description.");
 
+  def("listEscriptParams", escript::listEscriptParams,
+        ":return: A list of tuples (p,v,d) where p is the name of a parameter "
+        "for escript, v is its current value, and d is a description.");
+
+  def("hasFeature", escript::hasFeature,
+      (arg("name")), "Check if escript was compiled with a certain feature\n\n"
+        ":param name: feature to lookup\n"
+        ":type name: ``string``");
+  def("listFeatures", escript::listFeatures,
+        ":return: A list of strings representing the features escript supports.");
 
   def("resolveGroup", escript::resolveGroup);
 
 #ifdef IKNOWWHATIMDOING
-
-  def("applyBinaryCFunction", escript::applyBinaryCFunction, (arg("function"), arg("outshape"),
-arg("in1"), 
-arg("in2"))
-);
+  def("applyBinaryCFunction", escript::applyBinaryCFunction,
+          (arg("function"), arg("outshape"), arg("in1"), arg("in2"))
+  );
 #endif
 
   def("_condEval", escript::condEval, (arg("mask"), arg("trueval"), arg("falseval")));
-
-  //
-  // Register esysExceptionTranslator
-  //
-  register_exception_translator<esysUtils::EsysException>(&esysUtils::RuntimeErrorTranslator);
-  register_exception_translator<escript::SolverOptionsException>(&esysUtils::ValueErrorTranslator);
 }
+
diff --git a/dudley/src/CPPAdapter/system_dep.h b/escriptcore/src/index.h
similarity index 50%
rename from dudley/src/CPPAdapter/system_dep.h
rename to escriptcore/src/index.h
index 85ab30c..c43d706 100644
--- a/dudley/src/CPPAdapter/system_dep.h
+++ b/escriptcore/src/index.h
@@ -14,31 +14,20 @@
 *
 *****************************************************************************/
 
+#ifndef __ESYS_INDEX_H__
+#define __ESYS_INDEX_H__
 
-/**
-\file dudley/src/CPPAdapter/system_dep.h
-\ingroup Other
- */
-/*
-   @(#) system_dep.h
-*/
+// Macros for array indexing
 
-#ifndef dudley_system_dep_h
-#define dudley_system_dep_h
+#define INDEX2(_X1_,_X2_,_N1_) ((_X1_)+(_N1_)*(_X2_))
 
-#define DUDLEY_DLL_API
+#define INDEX3(_X1_,_X2_,_X3_,_N1_,_N2_) ((_X1_)+(_N1_)*INDEX2(_X2_,_X3_,_N2_))
 
-#ifdef _WIN32
+#define INDEX4(_X1_,_X2_,_X3_,_X4_,_N1_,_N2_,_N3_) ((_X1_)+(_N1_)*INDEX3(_X2_,_X3_,_X4_,_N2_,_N3_))
 
-#   ifndef DUDLEY_STATIC_LIB
-#      undef DUDLEY_DLL_API
-#      ifdef DUDLEY_EXPORTS
-#         define DUDLEY_DLL_API __declspec(dllexport)
-#      else
-#         define DUDLEY_DLL_API __declspec(dllimport)
-#      endif
-#   endif
-#endif
+#define INDEX5(_X1_,_X2_,_X3_,_X4_,_X5_,_N1_,_N2_,_N3_,_N4_) ((_X1_)+(_N1_)*INDEX4(_X2_,_X3_,_X4_,_X5_,_N2_,_N3_,_N4_))
 
-#endif
+#define INDEX6(_X1_,_X2_,_X3_,_X4_,_X5_,_X6_,_N1_,_N2_,_N3_,_N4_,_N5_) ((_X1_)+(_N1_)*INDEX5(_X2_,_X3_,_X4_,_X5_,_X6_,_N2_,_N3_,_N4_,_N5_))
+
+#endif // __ESYS_INDEX_H__
 
diff --git a/escriptcore/src/pyerr.cpp b/escriptcore/src/pyerr.cpp
new file mode 100644
index 0000000..ab2e62a
--- /dev/null
+++ b/escriptcore/src/pyerr.cpp
@@ -0,0 +1,75 @@
+/*****************************************************************************
+*
+* Copyright (c)2015-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "pyerr.h"
+
+#include <boost/python/object.hpp>
+#include <boost/python/import.hpp>
+#include <boost/python/list.hpp>
+#include <boost/python/extract.hpp>
+
+namespace escript {
+
+// Function factored out of SubWorld code
+void getStringFromPyException(boost::python::error_already_set e, std::string& errormsg)
+{
+    using namespace boost::python;
+
+    PyObject* ptype=0;
+    PyObject* pvalue=0;
+    PyObject* ptraceback=0;
+    PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+    PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
+    object tb = import("traceback"); 
+    object trace(handle<>(borrowed(ptraceback)));
+    object li=tb.attr("extract_tb")(trace);
+    object li2=tb.attr("format_list")(li);
+    list l=extract<list>(li2)();
+
+#ifdef ESPYTHON3    
+    std::string ss;
+    for (int i=0;i<len(l);++i) {
+        object o=l[i];
+        PyObject* rr=PyUnicode_AsASCIIString(o.ptr());
+        ss+=PyBytes_AsString(rr);
+        Py_XDECREF(rr);
+    }
+    
+    PyObject* errobj=PyObject_Str(pvalue);  
+    PyObject* rr=PyUnicode_AsASCIIString(errobj);
+    errormsg=PyBytes_AsString(rr);
+    errormsg+="\n";
+    Py_XDECREF(rr);
+    errormsg+=ss;
+#else
+    std::string ss;
+    for (int i=0;i<len(l);++i) {
+        ss+=extract<std::string>(l[i])();
+    }
+    
+    PyObject* errobj=PyObject_Str(pvalue);  
+
+    errormsg=PyString_AsString(errobj);
+    errormsg+="\n";
+    errormsg+=ss;
+#endif
+    Py_XDECREF(errobj);
+    Py_XDECREF(ptype);
+    Py_XDECREF(pvalue);
+    Py_XDECREF(ptraceback);
+}
+
+} // namespace escript
+
diff --git a/esysUtils/src/pyerr.h b/escriptcore/src/pyerr.h
similarity index 72%
rename from esysUtils/src/pyerr.h
rename to escriptcore/src/pyerr.h
index d159ac1..4d372a6 100644
--- a/esysUtils/src/pyerr.h
+++ b/escriptcore/src/pyerr.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 *
-* Copyright (c) 2015-2016 by The University of Queensland
+* Copyright (c)2015-2016 by The University of Queensland
 * http://www.uq.edu.au
 *
 * Primary Business: Queensland, Australia
@@ -13,17 +13,18 @@
 *
 *****************************************************************************/
 
-// Function factored out of SubWorld code
-#ifndef ESPYERR_H
-#define ESPYERR_H
+#ifndef __ESCRIPT_PYERR_H__
+#define __ESCRIPT_PYERR_H__
+
+#include <boost/python/errors.hpp>
 
 #include <string>
-#include "system_dep.h"
-#include "types.h"
-#include "boost/python/errors.hpp"
 
-ESYSUTILS_DLL_API
+namespace escript {
+
 void getStringFromPyException(boost::python::error_already_set e, std::string& errormsg);
 
-#endif
+} // namespace escript
+
+#endif // __ESCRIPT_PYERR_H__
 
diff --git a/escriptcore/src/system_dep.h b/escriptcore/src/system_dep.h
index 463d2b6..63b7586 100644
--- a/escriptcore/src/system_dep.h
+++ b/escriptcore/src/system_dep.h
@@ -23,52 +23,19 @@
  @(#) system_dep.h
 */
 
-
 #ifndef escript_system_dep_h
 #define escript_system_dep_h
 
-
-#ifdef NO_FLOAT_H
-#   define DBL_EPSILON 2.2204460492503131E-16
-#   define DBL_MAX 1.7976931348623157E+308
-#   define DBL_MIN 2.2250738585072014E-308
-#else /* for the rest of the world */
-#   include <float.h>
-#endif
-#include <limits.h>
-
-#  include <cmath>
-
-#ifndef M_PI
-#   define M_PI 3.14159265358979323846
-#endif
-
-#ifndef SQRT_DBL_EPSILON
-#   define SQRT_DBL_EPSILON   1.4901161193847656e-08
-#endif
-
-#ifndef M_LN2
-#   define M_LN2  0.69314718055994530942  /* log_e 2 */
-#endif
-
 #define ESCRIPT_DLL_API
 
 #ifdef _WIN32
-#   ifndef ESCRIPT_STATIC_LIB
-#      undef ESCRIPT_DLL_API
-#      ifdef ESCRIPT_EXPORTS
-#         define ESCRIPT_DLL_API __declspec(dllexport)
-#      else
-#         define ESCRIPT_DLL_API __declspec(dllimport)
-#      endif
-#   endif
+# undef ESCRIPT_DLL_API
+# ifdef ESCRIPT_EXPORTS
+#   define ESCRIPT_DLL_API __declspec(dllexport)
+# else
+#   define ESCRIPT_DLL_API __declspec(dllimport)
+# endif
 #endif
 
-#ifndef ESCRIPT_MAX_DATA_RANK
-#define ESCRIPT_MAX_DATA_RANK 4
-#endif
-
-#include <esysUtils/types.h>
-
 #endif
 
diff --git a/escriptcore/test/DataAlgorithmAdapterTestCase.cpp b/escriptcore/test/DataAlgorithmAdapterTestCase.cpp
deleted file mode 100644
index 2341144..0000000
--- a/escriptcore/test/DataAlgorithmAdapterTestCase.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include <cmath>
-
-#include "DataAlgorithmAdapterTestCase.h"
-#include "escript/DataExpanded.h"
-#include "escript/DataAlgorithm.h"
-#include "escript/DataTypes.h"
-
-#include <cppunit/TestCaller.h>
-#include <iostream>
-#include <algorithm>
-#include <limits>
-
-using namespace CppUnit;
-using namespace std;
-using namespace escript;
-using namespace escript::DataTypes;
-
-namespace
-{
-
-ValueType::const_reference
-getSRefRO(DataReady& data,int sample, int point)
-{
-   return data.getVectorRO()[data.getPointOffset(sample,point)];
-}
-
-
-}
-
-void DataAlgorithmAdapterTestCase::testAll() {
-
-  cout << endl;
-  cout << "\tTesting FMax." << endl;
-
-  FMax fmax;
-  CPPUNIT_ASSERT(std::abs(fmax(5,6)-6)<=REL_TOL*6);
-  CPPUNIT_ASSERT(std::abs(fmax(5,-6)-5)<=REL_TOL*5);
-  CPPUNIT_ASSERT(std::abs(fmax(0,0)-0)<=REL_TOL*0);
-  CPPUNIT_ASSERT(std::abs(fmax(15,-96)-15)<=REL_TOL*15);
-
-  DataAlgorithmAdapter<FMax> sup(numeric_limits<double>::max()*-1);
-  sup.resetResult();
-  sup(-1);
-  sup(-2);
-  sup(-14);
-  sup(3);
-  CPPUNIT_ASSERT(std::abs(sup.getResult()-3)<=REL_TOL*3);
-
-  cout << "\tTesting AbsMax." << endl;
-
-  AbsMax absmax;
-  CPPUNIT_ASSERT(std::abs(absmax(5,6)-6)<=REL_TOL*6);
-  CPPUNIT_ASSERT(std::abs(absmax(5,-6)-6)<=REL_TOL*6);
-  CPPUNIT_ASSERT(std::abs(absmax(0,0)-0)<=REL_TOL*6);
-  CPPUNIT_ASSERT(std::abs(absmax(15,-96)-96)<=REL_TOL*6);
-
-  DataAlgorithmAdapter<AbsMax> Lsup(0);
-  Lsup.resetResult();
-  Lsup(-2);
-  Lsup(2);
-  Lsup(5);
-  Lsup(-10);
-  CPPUNIT_ASSERT(std::abs(Lsup.getResult()-10)<=REL_TOL*10);
-
-  cout << "\tTesting FMin." << endl;
-
-  FMin fmin;
-  CPPUNIT_ASSERT(std::abs(fmin(5,6)-5)<=REL_TOL*5);
-  CPPUNIT_ASSERT(std::abs(fmin(5,-6)-(-6))<=REL_TOL*6);
-  CPPUNIT_ASSERT(std::abs(fmin(0,0)-0)<=REL_TOL*0);
-  CPPUNIT_ASSERT(std::abs(fmin(15,-96)-(-96))<=REL_TOL*96);
-
-  DataAlgorithmAdapter<FMin> inf(numeric_limits<double>::max());
-  inf.resetResult();
-  inf(1);
-  inf(12);
-  inf(2);
-  inf(99);
-  CPPUNIT_ASSERT(std::abs(inf.getResult()-1)<=REL_TOL*1);
-
-  cout << "\tTesting Length." << endl;
-
-  Length lngth;
-  CPPUNIT_ASSERT(std::abs(lngth(5,6)-std::sqrt(61.0))<=REL_TOL*std::sqrt(61.0));
-  CPPUNIT_ASSERT(std::abs(lngth(5,-6)-std::sqrt(61.0))<=REL_TOL*std::sqrt(61.0));
-  CPPUNIT_ASSERT(std::abs(lngth(0,0)-std::sqrt(0.0))<=REL_TOL*std::sqrt(61.0));
-  CPPUNIT_ASSERT(std::abs(lngth(15,-96)-std::sqrt(9441.0))<=REL_TOL*std::sqrt(61.0));
-
-  DataAlgorithmAdapter<Length> length(0);
-  length.resetResult();
-  length(2);
-  length(4);
-  length(6);
-  length(8);
-  CPPUNIT_ASSERT(std::abs(length.getResult()-std::sqrt(120.0))<=REL_TOL*std::sqrt(120.0));
-  length.resetResult();
-  length(1.5);
-  length(2.5);
-  length(3.5);
-  length(4.5);
-  CPPUNIT_ASSERT(std::abs(length.getResult()-std::sqrt(41.0))<=REL_TOL*std::sqrt(41.0));
-
-  cout << "\tTesting Trace." << endl;
-
-  Trace trce;
-  CPPUNIT_ASSERT(std::abs(trce(5,6)-11)<=REL_TOL*11);
-  CPPUNIT_ASSERT(std::abs(trce(5,-6)-(-1))<=REL_TOL*1);
-  CPPUNIT_ASSERT(std::abs(trce(0,0)-0)<=REL_TOL*0);
-  CPPUNIT_ASSERT(std::abs(trce(15,-96)-(-81))<=REL_TOL*81);
-
-  DataAlgorithmAdapter<Trace> trace(0);
-  trace.resetResult();
-  trace(1);
-  trace(2);
-  trace(3);
-  trace(4);
-  trace(5);
-  CPPUNIT_ASSERT(std::abs(trace.getResult()-15)<=REL_TOL*15);
-  trace.resetResult();
-  trace(1.5);
-  trace(2.5);
-  trace(3.5);
-  trace(4.5);
-  trace(5.5);
-  CPPUNIT_ASSERT(std::abs(trace.getResult()-17.5)<=REL_TOL*17.5);
-
-}
-
-void DataAlgorithmAdapterTestCase::testAlgorithm() {
-
-  cout << endl;
-
-  {
-
-    cout << "\tTest algorithm on Data objects with a single rank 2 data-point." << endl;
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-    shape.push_back(2);
-    shape.push_back(3);
-
-    // allocate the data for the DataArrayView
-    DataTypes::ValueType dataArray(DataTypes::noValues(shape),0);
-
-    // construct DataArrayView
-//     DataArrayView dataView(dataArray,shape);
-
-    // assign values to the data point
-    for (int i=0;i<shape[0];i++) {
-      for (int j=0;j<shape[1];j++) {
-        dataArray[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j);
-      }
-    }
-
-    // create a few Data objects from the created DataArrayView
-    DataExpanded dataExp(FunctionSpace(),shape,dataArray);
-    DataConstant dataCon(FunctionSpace(),shape,dataArray);
-    DataTagged   dataTag(dataCon);
-
-    // test algorithm on DataExpanded
-    FMin fmin_func;
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataExp,fmin_func,numeric_limits<double>::max())-0)<=REL_TOL*0);
-    FMax fmax_func;
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataExp,fmax_func,numeric_limits<double>::max()*-1)-5)<=REL_TOL*5);
-
-    // test algorithm on DataTagged
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataTag,fmin_func,numeric_limits<double>::max())-0)<=REL_TOL*0);
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataTag,fmax_func,numeric_limits<double>::max()*-1)-5)<=REL_TOL*5);
-
-    // test algorithm on DataConstant
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataCon,fmin_func,numeric_limits<double>::max())-0)<=REL_TOL*0);
-    CPPUNIT_ASSERT(std::abs(escript::algorithm(dataCon,fmax_func,numeric_limits<double>::max()*-1)-5)<=REL_TOL*5);
-
-  }
-
-}
-
-void DataAlgorithmAdapterTestCase::testDpAlgorithm() {
-
-  cout << endl;
-
-  {
-
-    cout << "\tTest dp_algorithm on Data objects with a single rank 2 data-point." << endl;
-
-    // define the shapes for the DataArrayViews
-    DataTypes::ShapeType shape;
-    shape.push_back(2);
-    shape.push_back(3);
-    DataTypes::ShapeType shape2;
-
-    // allocate the data for the DataArrayViews
-    DataTypes::ValueType dataArray(DataTypes::noValues(shape),0);
-    DataTypes::ValueType dataArray2(DataTypes::noValues(shape2),0);
-
-    // construct DataArrayViews
-//     DataArrayView dataView(dataArray,shape);
-//     DataArrayView dataView2(dataArray2,shape2);
-
-    // assign values to the data point
-    for (int i=0;i<shape[0];i++) {
-      for (int j=0;j<shape[1];j++) {
-        dataArray[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j);
-      }
-    }
-
-    // create a few Data objects from the created DataArrayViews
-    DataExpanded dataExp(FunctionSpace(),shape,dataArray);
-    DataConstant dataCon(FunctionSpace(),shape,dataArray);
-    DataTagged   dataTag(dataCon);
-
-    // and create Data objects to receive the results of the dp_algorithm calls
-    DataExpanded dataExp2(FunctionSpace(),shape2,dataArray2);
-    DataConstant dataCon2(FunctionSpace(),shape2,dataArray2);
-    DataTagged   dataTag2(dataCon2);
-
-    // test dp_algorithm on DataExpanded
-    FMin fmin_func;
-    escript::dp_algorithm(dataExp,dataExp2,fmin_func,numeric_limits<double>::max());
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataExp2,0,0)-0)<=REL_TOL*0);
-    FMax fmax_func;
-    escript::dp_algorithm(dataExp,dataExp2,fmax_func,numeric_limits<double>::max()*-1);
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataExp2,0,0)-5)<=REL_TOL*5);
-
-    // test dp_algorithm on DataTagged
-    escript::dp_algorithm(dataTag,dataTag2,fmin_func,numeric_limits<double>::max());
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataTag2,0,0)-0)<=REL_TOL*0);
-    escript::dp_algorithm(dataTag,dataTag2,fmax_func,numeric_limits<double>::max()*-1);
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataTag2,0,0)-5)<=REL_TOL*5);
-
-    // test dp_algorithm on DataConstant
-    escript::dp_algorithm(dataCon,dataCon2,fmin_func,numeric_limits<double>::max());
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataCon2,0,0)-0)<=REL_TOL*0);
-    escript::dp_algorithm(dataCon,dataCon2,fmax_func,numeric_limits<double>::max()*-1);
-    CPPUNIT_ASSERT(std::abs(getSRefRO(dataCon2,0,0)-5)<=REL_TOL*5);
-
-  }
-
-}
-
-TestSuite* DataAlgorithmAdapterTestCase::suite()
-{
-  TestSuite *testSuite = new TestSuite("DataAlgorithmAdapterTestCase");
-
-  testSuite->addTest(new TestCaller<DataAlgorithmAdapterTestCase>(
-              "testAll",&DataAlgorithmAdapterTestCase::testAll));
-  testSuite->addTest(new TestCaller<DataAlgorithmAdapterTestCase>(
-              "testAlgorithm",&DataAlgorithmAdapterTestCase::testAlgorithm));
-  testSuite->addTest (new TestCaller<DataAlgorithmAdapterTestCase>(
-              "testDpAlgorithm",&DataAlgorithmAdapterTestCase::testDpAlgorithm));
-  return testSuite;
-}
-
diff --git a/escriptcore/test/DataBlocks2DTestCase.cpp b/escriptcore/test/DataBlocks2DTestCase.cpp
deleted file mode 100644
index e966232..0000000
--- a/escriptcore/test/DataBlocks2DTestCase.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "DataBlocks2DTestCase.h"
-#include "escript/DataBlocks2D.h"
-#include "esysUtils/EsysException.h"
-
-#include <cppunit/TestCaller.h>
-#include <iostream>
-
-using namespace std;
-using namespace CppUnit;
-using namespace escript;
-using namespace esysUtils;
-
-void DataBlocks2DTestCase::testAll()
-{
-  cout << endl;
-  cout << "\tTest DataBlocks2D constructor for various dimension values:" << endl;
-
-  {
-    cout << "\t\tnumRows = 1, numCols = 1, blockSize = 20." << endl;
-    int numRows=1;
-    int numCols=1;
-    int blockSize=20;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    int i = numRows-1;
-    int j = numCols-1;
-    CPPUNIT_ASSERT(myData.index(i,j) == (i*numCols+j)*blockSize);
-    CPPUNIT_ASSERT(myData.size() == numRows*numCols*blockSize);
-  }
-
-  {
-    cout << "\t\tnumRows = 3, numCols = 5, blockSize = 20." << endl;
-    int numRows=3;
-    int numCols=5;
-    int blockSize=20;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    int i = numRows-1;
-    int j = numCols-1;
-    CPPUNIT_ASSERT(myData.index(i,j) == (i*numCols+j)*blockSize);
-    CPPUNIT_ASSERT(myData.size() == numRows*numCols*blockSize);
-  }
-
-  {
-    cout << "\t\tnumRows = 3, numCols = 5, blockSize = 1." << endl;
-    int numRows=3;
-    int numCols=5;
-    int blockSize=1;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    int i = numRows-1;
-    int j = numCols-1;
-    CPPUNIT_ASSERT(myData.index(i,j) == (i*numCols+j)*blockSize);
-    CPPUNIT_ASSERT(myData.size() == numRows*numCols*blockSize);
-  }
-
-  {
-    cout << "\t\tnumRows = 1, numCols = 1, blockSize = 1." << endl;
-    int numRows=1;
-    int numCols=1;
-    int blockSize=1;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    int i = numRows-1;
-    int j = numCols-1;
-    CPPUNIT_ASSERT(myData.index(i,j) == (i*numCols+j)*blockSize);
-    CPPUNIT_ASSERT(myData.size() == numRows*numCols*blockSize);
-  }
-
-  {
-    cout << "\tTest DataBlocks2D.index and DataBlocks2D operator[] for blockSize = 3." << endl;
-    int numRows=10;
-    int numCols=8;
-    int blockSize=3;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    int val=0;
-    for (int i=0; i<numRows; i++) {
-      for (int j=0; j<numCols; j++) {
-        for (int k=0; k<blockSize; k++) {
-	  myData[myData.index(i,j)+k] = val;
-          val++;
-        }
-      }
-    }
-    val=0;
-    for (int i=0; i<numRows; i++) {
-      for (int j=0; j<numCols; j++) {
-        for (int k=0; k<blockSize; k++) {
-	  CPPUNIT_ASSERT(myData[myData.index(i,j)+k] == val);
-          val++;
-        }
-      }
-    }
-  }
-
-  {
-    cout << "\tTest DataBlocks2D exception for numRows = 0." << endl;
-    int numRows=0;
-    int numCols=8;
-    int blockSize=10;
-    try {
-        DataBlocks2D myData(numRows,numCols,blockSize);
-        CPPUNIT_FAIL("Exception not thrown");
-    }
-    catch(EsysException&) {
-        CPPUNIT_ASSERT(true);
-    }
-  }
-
-  {
-    cout << "\tTest DataBlocks2D exception for numCols = 0." << endl;
-    int numRows=10;
-    int numCols=0;
-    int blockSize=10;
-    try {
-        DataBlocks2D myData(numRows,numCols,blockSize);
-        CPPUNIT_FAIL("Exception not thrown");
-    }
-    catch(EsysException&) {
-        CPPUNIT_ASSERT(true);
-    }
-  }
-
-  {
-    cout << "\tTest DataBlocks2D exception for blockSize = 0." << endl;
-    int numRows=10;
-    int numCols=8;
-    int blockSize=0;
-    try {
-        DataBlocks2D myData(numRows,numCols,blockSize);
-        CPPUNIT_FAIL("Exception not thrown");
-    }
-    catch(EsysException&) {
-        CPPUNIT_ASSERT(true);
-    }
-  }
-
-  {
-    cout << "\tTest getNumRows, getNumCols and getBlockSize." << endl;
-    int numRows=1;
-    int numCols=1;
-    int blockSize=1;
-    DataBlocks2D myData(numRows,numCols,blockSize);
-    CPPUNIT_ASSERT(myData.getNumRows() == numRows);
-    CPPUNIT_ASSERT(myData.getNumCols() == numCols);
-    CPPUNIT_ASSERT(myData.getBlockSize() == blockSize);
-  }
-
-  {
-    cout << "\tTest resize." << endl;
-    int numRows=1;
-    int numCols=1;
-    int blockSize=1;
-    DataBlocks2D myData;
-    myData.resize(numRows,numCols,blockSize);
-    CPPUNIT_ASSERT(myData.getNumRows() == numRows);
-    CPPUNIT_ASSERT(myData.getNumCols() == numCols);
-    CPPUNIT_ASSERT(myData.getBlockSize() == blockSize);
-  }
-
-  {
-    cout << "\tTest = operator, swap, and copy constructor." << endl;
-    DataBlocks2D myData1;
-    DataBlocks2D myData2(1, 1, 1);
-    int val=0;
-    for (int i=0; i<1; i++) {
-      for (int j=0; j<1; j++) {
-        for (int k=0; k<1; k++) {
-	  myData2[myData2.index(i,j)+k] = val;
-          val++;
-        }
-      }
-    }
-    myData1 = myData2;
-    for (int i=0; i<myData1.getNumRows(); i++) {
-      for (int j=0; j<myData1.getNumCols(); j++) {
-	CPPUNIT_ASSERT(myData1(i,j) == myData2(i,j));
-      }
-    }
-  }
-
-#if defined DOASSERT
-  {
-    cout << "\tTest DOASSERT exception." << endl;
-    DataBlocks2D myData;
-    CPPUNIT_ASSERT_THROW(myData.index(1,2), EsysException);
-  }
-#endif
-}
-
-TestSuite* DataBlocks2DTestCase::suite()
-{
-  TestSuite *testSuite = new TestSuite("DataBlocks2DTestCase");
-
-  testSuite->addTest(new TestCaller<DataBlocks2DTestCase>(
-              "testAll",&DataBlocks2DTestCase::testAll));
-  return testSuite;
-}
-
diff --git a/escriptcore/test/DataCombinationsTestCase.cpp b/escriptcore/test/DataCombinationsTestCase.cpp
new file mode 100644
index 0000000..81ddf00
--- /dev/null
+++ b/escriptcore/test/DataCombinationsTestCase.cpp
@@ -0,0 +1,876 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <escript/DataTypes.h>
+#include "DataCombinationsTestCase.h"
+
+#include <escript/Data.h>
+#include <escript/TestDomain.h>
+#include <cppunit/TestCaller.h>
+
+#include <escript/Utils.h>
+
+using namespace escript;
+using namespace DataTypes;
+using namespace std;
+
+namespace
+{
+  
+inline
+DataTypes::RealVectorType::const_reference
+getRef(Data& d, int x, int y)
+{
+	return d.getDataAtOffsetRO(getRelIndex(d.getDataPointShape(),x,y));
+}  
+
+
+DataTypes::RealVectorType getVector(const ShapeType& shape, double seed)
+{
+    DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
+
+    // assign values to the data
+    for (int i=0;i<shape[0];i++) {
+      for (int j=0;j<shape[1];j++) {
+	data[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j)+seed;	// so we get no zeros
+      }
+    }
+    return data;
+}  
+  
+  
+Data getConstant(FunctionSpace fs, bool rank0, double seed)
+{
+    if (rank0)
+    {
+        return Data(seed,  DataTypes::ShapeType(),  fs,false);  
+    }
+    DataTypes::ShapeType shape;
+    shape.push_back(2);
+    shape.push_back(3);    
+
+    DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
+
+    // assign values to the data
+    for (int i=0;i<shape[0];i++) {
+      for (int j=0;j<shape[1];j++) {
+	data[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j)+seed;	// so we get no zeros
+      }
+    }
+    return Data(data, shape, fs, false);
+}
+
+
+Data getTagged(FunctionSpace fs, bool rank0, double seed, int tag1, int tag2, int tag3)
+{
+    if (rank0)
+    {
+	Data d(seed, DataTypes::ShapeType(), fs, false);
+	d.tag();
+	DataTypes::RealVectorType data(1,0);
+	data[0]=seed*2;
+	d.setTaggedValueFromCPP(tag1, DataTypes::ShapeType(), data);
+	data[0]=seed*4;	
+	d.setTaggedValueFromCPP(tag2, DataTypes::ShapeType(), data);
+	data[0]=seed*8;		
+	d.setTaggedValueFromCPP(tag3, DataTypes::ShapeType(), data);
+	return d;
+    }
+    DataTypes::ShapeType shape;
+    shape.push_back(2);
+    shape.push_back(3);    
+
+    DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
+    // assign values to the data
+    for (int i=0;i<shape[0];i++) {
+      for (int j=0;j<shape[1];j++) {
+	data[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j)+seed;	// so we get no zeros
+      }
+    }
+    Data d(data, shape, fs, false);
+    d.tag();
+    for (int i=0;i<data.size();++i)
+    {
+	data[i]=data[i]*2;
+    }
+    d.setTaggedValueFromCPP(tag1,
+		      shape,
+		      data);
+    for (int i=0;i<data.size();++i)
+    {
+	data[i]=data[i]*2;
+    }
+    d.setTaggedValueFromCPP(tag2,
+		      shape,
+		      data);
+    for (int i=0;i<data.size();++i)
+    {
+	data[i]=data[i]*2;
+    }
+    d.setTaggedValueFromCPP(tag3,
+		      shape,
+		      data);
+    return d;
+}
+
+Data getExpanded(FunctionSpace fs, bool rank0, double seed)
+{
+    if (rank0)
+    {
+	Data z(seed, DataTypes::ShapeType(), fs,false);
+	Data d=fs.getDomain()->getX()*z;
+	return d;     
+    }
+    DataTypes::ShapeType shape;
+    shape.push_back(2);
+    shape.push_back(3);    
+
+    DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
+    // assign values to the data
+    for (int i=0;i<shape[0];i++) {
+      for (int j=0;j<shape[1];j++) {
+	data[getRelIndex(shape,i,j)]=getRelIndex(shape,i,j)+seed;	// so we get no zeros
+      }
+    }
+    Data d=fs.getDomain()->getX()*Data(data, shape, fs, true);
+    return d;
+}
+
+void createConsts(FunctionSpace& fs, Data& c0s, Data& c4s, Data& c1, Data& c5)
+{
+    c0s=getConstant(fs, true, 0);
+    c4s=getConstant(fs, true, 4);
+  
+    c1=getConstant(fs, false, 1);
+    c5=getConstant(fs, false, 5); 
+}
+
+void createTagged(FunctionSpace& fs, Data& t1s, Data& t2s, Data& t3s, Data& t4s, Data& t5s, Data& t1, Data& t2, Data& t3, Data& t4, Data& t5)
+{
+  t1s=getTagged(fs, true, 1, 1, 2, 3);	// to check for problems with tag ordering
+  t2s=getTagged(fs, true, 5, 1, 3, 2);
+  t3s=getTagged(fs, true, 7, 3, 2, 1);
+  t4s=getTagged(fs, true, 3, 2, 1, 3); 
+  t5s=getTagged(fs, true, 8, 2, 4, 5);     // ensure we test mismatching tags
+  
+  t1=getTagged(fs, false, 1, 1, 2, 3);	// to check for problems with tag ordering
+  t2=getTagged(fs, false, 5, 1, 2, 3);
+  t3=getTagged(fs, false, 7, 3, 2, 1);
+  t4=getTagged(fs, false, 3, 2, 1, 3);  
+  t5=getTagged(fs, false, 9, 5, 2, 4);	// ensure we test mismatching tags  
+}
+
+void createExpand(FunctionSpace& fs, Data& es1, Data& es2, Data& e1, Data& e2)
+{
+    es1=getExpanded(fs, true, 1);
+    es2=getExpanded(fs, true, 2);
+  
+    e1=getExpanded(fs, false, 1);
+    e2=getExpanded(fs, false, 2);
+}
+
+
+}
+
+
+void DataCombinationsTestCase::testUpdate()
+{
+  cout << endl;
+  TestDomain* tdp=new TestDomain(2,3,1);	// 2 points per sample, 3 samples, 1D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());  
+  
+  Data c0s;
+  Data c4s;
+  
+  Data c1;
+  Data c5;
+  
+  
+  Data t1s;
+  Data t2s;
+  Data t3s;
+  Data t4s; 
+  Data t5s;
+  
+  Data t1;
+  Data t2;
+  Data t3;
+  Data t4;  
+  Data t5;
+  
+  Data es1;
+  Data es2;
+  
+  Data e1;
+  Data e2;  
+  
+  
+  createConsts(fs, c0s, c4s, c1, c5);
+  
+  cout << "Constants (self):\n";
+  c0s+=c4s;
+  CPPUNIT_ASSERT(fabs(c0s.Lsup()-4)<0.01);
+  
+  c1+=c5;
+  CPPUNIT_ASSERT(fabs(c1.Lsup()-16)<0.01);  
+  
+  createConsts(fs, c0s, c4s, c1, c5);
+
+  c1+=c4s;
+  CPPUNIT_ASSERT(fabs(c1.Lsup()-10)<0.01);  
+
+  cout << "Tagged (self):\n";  
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5); 
+  
+  t1+=t2;
+  CPPUNIT_ASSERT(fabs(t1.inf()-6)<0.01);
+  CPPUNIT_ASSERT(fabs(t1.Lsup()-16)<0.01);
+  
+  tdp->addUsedTag(1);
+  CPPUNIT_ASSERT(fabs(t1.inf()-6)<0.01);
+  CPPUNIT_ASSERT(fabs(t1.Lsup()-32)<0.01);
+  
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(t1.inf()-6)<0.01);
+  CPPUNIT_ASSERT(fabs(t1.Lsup()-64)<0.01);
+
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(t1.inf()-6)<0.01);
+  CPPUNIT_ASSERT(fabs(t1.Lsup()-128)<0.01);
+ 
+  tdp->clearUsedTags();
+  
+  t2s+=t1s;
+  CPPUNIT_ASSERT(fabs(t2s.Lsup()-6)<0.01);
+  
+  tdp->addUsedTag(1);
+  CPPUNIT_ASSERT(fabs(t2s.inf()-6)<0.01);
+  CPPUNIT_ASSERT(fabs(t2s.Lsup()-12)<0.01);
+  
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(t2s.Lsup()-44)<0.01);
+
+  tdp->clearUsedTags();
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(t2s.Lsup()-28)<0.01);  
+  
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5); 
+
+  tdp->clearUsedTags();
+  t5+=t2s;
+
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-19)<0.01);
+  CPPUNIT_ASSERT(fabs(t5.inf()-14)<0.01);
+  tdp->addUsedTag(5);
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-33)<0.01);
+  CPPUNIT_ASSERT(fabs(t5.inf()-14)<0.01);
+  tdp->addUsedTag(3);
+  
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-34)<0.01);
+  CPPUNIT_ASSERT(fabs(t5.inf()-14)<0.01);  
+  tdp->addUsedTag(4);  
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-117)<0.01);
+  CPPUNIT_ASSERT(fabs(t5.inf()-14)<0.01);    
+  tdp->clearUsedTags();  
+  
+  cout << "Expanded (self):\n";
+  createExpand(fs, es1, es2, e1, e2);
+  
+  es2+=es1;
+  CPPUNIT_ASSERT(fabs(es2.inf())<0.01);    
+  CPPUNIT_ASSERT(fabs(es2.Lsup()-7.5)<0.01);
+  
+  e2+=e1;
+  
+  CPPUNIT_ASSERT(fabs(e2.inf())<0.01);    
+  CPPUNIT_ASSERT(fabs(e2.Lsup()-32.5)<0.01);
+  
+  createExpand(fs, es1, es2, e1, e2);
+
+  e1+=es2;
+  CPPUNIT_ASSERT(fabs(e1.inf())<0.01);    
+  CPPUNIT_ASSERT(fabs(e1.Lsup()-20)<0.01);
+  
+  cout << "Constant (self update by others):\n";
+  createConsts(fs, c0s, c4s, c1, c5);
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  createExpand(fs, es1, es2, e1, e2);
+  
+  c4s+=t1s;
+  CPPUNIT_ASSERT(fabs(c4s.Lsup()-5)<0.01);
+  createConsts(fs, c0s, c4s, c1, c5);
+
+  createConsts(fs, c0s, c4s, c1, c5);
+  c5+=t1s;
+  CPPUNIT_ASSERT(fabs(c5.Lsup()-11)<0.01);  
+  
+  createConsts(fs, c0s, c4s, c1, c5);
+  c5+=t3;
+  CPPUNIT_ASSERT(fabs(c5.Lsup()-22)<0.01);  
+
+  createExpand(fs, es1, es2, e1, e2);
+  c4s+=es1;
+  CPPUNIT_ASSERT(fabs(c4s.Lsup()-6.5)<0.01);
+  
+  createConsts(fs, c0s, c4s, c1, c5);
+  c5+=es1;
+  CPPUNIT_ASSERT(fabs(c5.Lsup()-12.5)<0.01);
+  
+  createConsts(fs, c0s, c4s, c1, c5);
+  c5+=e2;
+  CPPUNIT_ASSERT(fabs(c5.Lsup()-27.5)<0.01);
+  
+  cout << "Tagged (self update by others):\n";
+  createConsts(fs, c0s, c4s, c1, c5);
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  createExpand(fs, es1, es2, e1, e2);
+  
+  tdp->clearUsedTags();
+  t3s+=c4s;
+  CPPUNIT_ASSERT(fabs(t3s.Lsup()-11)<0.01); 
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t3s.Lsup()-60)<0.01);
+  tdp->clearUsedTags();
+  
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  t2+=c4s;
+  CPPUNIT_ASSERT(fabs(t2.Lsup()-14)<0.01);  
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t2.Lsup()-84)<0.01); 
+  tdp->clearUsedTags();
+  
+  
+  t4+=c4s;
+  CPPUNIT_ASSERT(fabs(t4.Lsup()-12)<0.01);  
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t4.Lsup()-68)<0.01);  
+  tdp->clearUsedTags();
+  
+  
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  t3s+=es1;
+  CPPUNIT_ASSERT(fabs(t3s.Lsup()-9.5)<0.01);  
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t3s.Lsup()-9.5)<0.01);  
+  tdp->clearUsedTags();
+  
+
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  t3+=es1;
+  
+  CPPUNIT_ASSERT(fabs(t3.Lsup()-14.5)<0.01);
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t3.Lsup()-14.5)<0.01);
+  tdp->clearUsedTags();
+  
+
+  t5+=es2;
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-19)<0.01);  
+  tdp->addUsedTag(1);
+  tdp->addUsedTag(2);
+  tdp->addUsedTag(3);  
+  CPPUNIT_ASSERT(fabs(t5.Lsup()-19)<0.01);
+  tdp->clearUsedTags();
+  
+  
+  cout << "Expanded (self update by others):\n";
+  createConsts(fs, c0s, c4s, c1, c5);
+  createTagged(fs, t1s, t2s, t3s, t4s, t5s, t1, t2, t3, t4, t5);
+  createExpand(fs, es1, es2, e1, e2);
+
+
+  es1+=c4s;
+  
+  CPPUNIT_ASSERT(fabs(es1.Lsup()-6.5)<0.01);
+
+  es2+=t4s;
+  createExpand(fs, es1, es2, e1, e2);
+  
+  std::vector<int> t;
+  t.push_back(1);
+  t.push_back(1);
+  t.push_back(0);  
+  tdp->assignTags(t);
+  es2+=t4s;
+  CPPUNIT_ASSERT(fabs(es2.Lsup()-15)<0.01);   
+
+  createExpand(fs, es1, es2, e1, e2);
+  t[0]=2;
+  t[1]=3;
+  t[2]=1;
+  tdp->assignTags(t);    
+  es2+=t4s;
+  CPPUNIT_ASSERT(fabs(es2.Lsup()-27)<0.01);   
+  
+  createExpand(fs, es1, es2, e1, e2);
+  
+  e2+=c4s;
+  CPPUNIT_ASSERT(fabs(e2.Lsup()-21.5)<0.01);  
+  
+  e1+=c5;
+  
+  CPPUNIT_ASSERT(fabs(e1.Lsup()-25)<0.01);
+
+  
+  createExpand(fs, es1, es2, e1, e2);
+
+  e2+=t3;
+
+  CPPUNIT_ASSERT(fabs(e2.Lsup()-113.5)<0.01);
+  
+
+  t[0]=0;
+  t[1]=1;
+  t[2]=1;   
+  tdp->assignTags(t);  
+  e2+=t3;
+  CPPUNIT_ASSERT(fabs(e2.Lsup()-209.5)<0.01);
+ 
+}
+
+// The purpose of this test is to check all the various combinations 
+// of DataReady interactions
+// These will test only the (Data + Data)
+// hopefully it includes all relevant combinations
+void DataCombinationsTestCase::testNonUpdate()
+{
+
+  cout << endl;
+  TestDomain* tdp=new TestDomain(2,3,1);	// 2 points per sample, 3 samples, 1D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());
+  
+    
+  Data c0s=getConstant(fs, true, 0);
+  Data c4s=getConstant(fs, true, 4);
+  
+  Data c1=getConstant(fs, false, 1);
+  Data c5=getConstant(fs, false, 5);
+  
+  
+  Data t1s=getTagged(fs, true, 1, 1, 2, 3);	// to check for problems with tag ordering
+  Data t2s=getTagged(fs, true, 5, 1, 3, 2);
+  Data t3s=getTagged(fs, true, 7, 3, 2, 1);
+  Data t4s=getTagged(fs, true, 3, 2, 1, 3); 
+  Data t5s=getTagged(fs, true, 8, 2, 4, 5);     // ensure we test mismatching tags
+  
+  Data t1=getTagged(fs, false, 1, 1, 2, 3);	// to check for problems with tag ordering
+  Data t2=getTagged(fs, false, 5, 1, 2, 3);
+  Data t3=getTagged(fs, false, 7, 3, 2, 1);
+  Data t4=getTagged(fs, false, 3, 2, 1, 3);  
+  Data t5=getTagged(fs, false, 9, 5, 2, 4);	// ensure we test mismatching tags
+  
+  Data es1=getExpanded(fs, true, 1);
+  Data es2=getExpanded(fs, true, 2);
+  
+  Data e1=getExpanded(fs, false, 1);
+  Data e2=getExpanded(fs, false, 2);
+  
+  real_t rr=0;
+  Data res, res1, res2;
+  cout << "Identical adds:\n";		// strictly, this only tests for inverse relationship between + and -
+ 
+  rr=(c0s+c0s).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  
+  res=c4s+c4s;
+  rr=(res-c4s-c4s).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  
+  res=(c1+c1);
+  rr=(res-c1-c1).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  res=(c5+c5);
+  rr=(res-c5-c5).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+  
+  res=(t1s+t1s);
+  rr=(res-t1s-t1s).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+
+  res=(t2s+t2s);
+  rr=(res-t2s-t2s).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+
+  res=(t3s+t3s);
+  rr=(res-t3s-t3s).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+
+  res=(t4s+t4s);
+  rr=(res-t4s-t4s).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+  
+  res=(es1+es1);
+  rr=(res-es1-es1).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+
+  res=(es2+es2);
+  rr=(res-es2-es2).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+
+  res=(e1+e1);
+  rr=(res-e1-e1).Lsup();;
+  CPPUNIT_ASSERT(rr<0.01);
+  cout << "Rank0 constant adds:\n";
+  res1=(c4s+c5);
+  res2=(c5+c4s);
+  rr=(res1-res2).Lsup();
+  
+  CPPUNIT_ASSERT(rr<0.01);
+
+    // so the answers are the same, are they correct?
+  
+  DataTypes::ShapeType shape;
+  shape.push_back(2);
+  shape.push_back(3);        
+  
+  DataTypes::RealVectorType dat=getVector(shape, 9);  
+  bool mismatch=false;
+  for (int i=0;i<2;++i)
+  {
+      for (int j=0;j<3;++j)
+      {
+	  if (!res1.hasNoSamples())
+	  {	
+	      if (getRef(res1,i,j)!=dat[getRelIndex(shape,i,j)]) {
+		  cout << "Mismatch at " << i << ',' << j << "::" << getRef(res1,i,j) << dat[getRelIndex(shape,i,j)] << endl;
+		  mismatch=true;
+	      }
+	  }
+      }
+  }
+  // need to do a global check to see if anyone mismatched
+  assert(getMPIWorldMax(mismatch)==0);  
+  
+  res1=(c1+c5);
+  res2=(c5+c1);
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  dat=getVector(shape, 1);
+  auto dat2=getVector(shape,5);
+  mismatch=false;
+  for (int i=0;i<2;++i)
+  {
+      for (int j=0;j<3;++j)
+      {
+	  if (!res1.hasNoSamples())
+	  {
+	      if (getRef(res1,i,j)!=(dat[getRelIndex(shape,i,j)]+dat2[getRelIndex(shape,i,j)])) {
+		  cout << "Mismatch at " << i << ',' << j << endl;
+		  mismatch=true;
+	      }
+	  }
+      }
+  }
+  // need to do a global check to see if anyone mismatched
+  assert(getMPIWorldMax(mismatch)==0);
+  
+  cout << "CCC looks good\n";
+
+  // will test EEE next
+  
+  cout << "EEE\n";
+  res1=e1+e2;
+  res2=e2+e1;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-32.5)<0.01);
+
+  
+  
+  res1=es1+es2;
+  res2=es2+es1;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-7.5)<0.01);  
+
+  res1=e1+es2;
+  res2=es2+e1;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-20)<0.01);    
+
+  res1=es1+e2;
+  res2=e2+es1;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-20)<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf())<0.000001);
+  cout << "EEE looks good\n";
+
+  cout << "E and C\n";
+  res1=c5+es2;
+  res2=es2+c5;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-15)<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-5)<0.000001);  
+
+  res1=c4s+e2;
+  res2=e2+c4s;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-21.5)<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-4)<0.000001);    
+  
+  res1=c5+e2;
+  res2=e2+c5;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-27.5)<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-5)<0.000001); 
+ 
+  cout << "TTT\n";
+  
+  res1=t1+t2;
+  res2=t2+t1;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-6)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-16)<0.001);  
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(res1.inf()-6)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-64)<0.001);
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-6)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-128)<0.001);
+  tdp->clearUsedTags();
+  
+  res1=t2+t3;		// tags in different orders
+  res2=t3+t2;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-12)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-104)<0.001);
+  tdp->clearUsedTags();
+
+  res1=t5s+t4;
+  res2=t4+t5s;
+  rr=(res1-res2).Lsup();
+  CPPUNIT_ASSERT(rr<0.01);
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-32)<0.001);  
+  tdp->clearUsedTags();
+  tdp->addUsedTag(1);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-40)<0.001);  
+  tdp->clearUsedTags();
+  tdp->addUsedTag(5);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-72)<0.001);  
+  tdp->clearUsedTags();
+  
+  res1=t2s+t5s;
+  res2=t5s+t2s;
+  rr=(res1-res2).Lsup();
+  tdp->addUsedTag(1);
+  CPPUNIT_ASSERT(fabs(res1.inf()-13)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-18)<0.001);    
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-13)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-28)<0.001);      
+  tdp->addUsedTag(5);
+  CPPUNIT_ASSERT(fabs(res1.inf()-13)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-69)<0.001);    
+  tdp->clearUsedTags();  
+  
+  cout << "TTT looks ok\n";
+  cout << "T and C\n";
+  
+  
+  res1=c4s+t4s;
+  res2=t4s+c4s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-7)<0.001);    
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-10)<0.001);      
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-28)<0.001);     
+  tdp->clearUsedTags();    
+  
+  res1=t4+c5;
+  res2=c5+t4;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-8)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-18)<0.001);    
+  tdp->addUsedTag(2);
+  CPPUNIT_ASSERT(fabs(res1.inf()-8)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-26)<0.001);      
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-8)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-74)<0.001);     
+  tdp->clearUsedTags();  
+  
+  res1=c4s+t4;
+  res2=t4+c4s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+ 
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-12)<0.001);    
+  tdp->addUsedTag(1);
+  
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-36)<0.001);      
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-7)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-68)<0.001);     
+  tdp->clearUsedTags();    
+  
+  res1=t4s+c5;
+  res2=c5+t4s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-8)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-13)<0.001);       
+  tdp->addUsedTag(3);
+  CPPUNIT_ASSERT(fabs(res1.inf()-8)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-34)<0.001);     
+  tdp->clearUsedTags();
+  
+  cout << "T and C look good\n";
+  cout << "T and E\n";
+  
+  // We need to try various combinations of tags
+  // with each combination of input types
+  
+  std::vector<int> t;
+  t.push_back(0);
+  t.push_back(1);
+  t.push_back(0);
+  
+  tdp->assignTags(t);
+
+  res1=t1+es2;
+  res2=es2+t1;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-1)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-15)<0.001);       
+
+  res1=t1+e2;
+  res2=e2+t1;  
+  
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-1)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-23.5)<0.001);   
+  
+  res1=t1s+e2;
+  res2=e2+t1s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-1)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-18.5)<0.001);     
+  
+  res1=t1s+es2;
+  res2=es2+t1s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-1)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-6)<0.001);     
+
+  cout << " round 0,1,0 OK\n";
+  
+  // now repeat with a different tag combination
+  t[0]=2;
+  t[1]=3;
+  t[2]=1;
+  tdp->assignTags(t);
+  
+  res1=t3+es2;
+  res2=es2+t3;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-16)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-101)<0.001);       
+  
+  res1=t3+e2;
+  res2=e2+t3;  
+  
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-16)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-113.5)<0.001);  
+  
+  res1=t3s+e2;
+  res2=e2+t3s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-16)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-73.5)<0.001); 
+  
+  res1=t3s+es2;
+  res2=es2+t3s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-16)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-61)<0.001);     
+
+  cout << " round 2,3,1 OK\n";  
+  
+  t[0]=3;
+  t[1]=1;
+  t[2]=0;
+  tdp->assignTags(t);
+  
+  res1=t3+es2;
+  res2=es2+t3;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-99)<0.001);       
+  
+  res1=t3+e2;
+  res2=e2+t3;  
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-106.5)<0.001); 
+  
+  res1=t3s+e2;
+  res2=e2+t3s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-66.5)<0.001); 
+  
+  res1=t3s+es2;
+  res2=es2+t3s;
+  CPPUNIT_ASSERT((res1-res2).Lsup()<0.01);
+  CPPUNIT_ASSERT(fabs(res1.inf()-11)<0.001);
+  CPPUNIT_ASSERT(fabs(res1.Lsup()-59)<0.001);     
+  cout << " round 3,1,0 OK\n";   
+
+  
+}
+
+using namespace CppUnit;
+
+CppUnit::TestSuite* DataCombinationsTestCase::suite()
+{
+  // create the suite of tests to perform.
+  CppUnit::TestSuite *testSuite = new TestSuite("DataCombinationsTestCase");
+  testSuite->addTest(new TestCaller<DataCombinationsTestCase>(
+              "testNonUpdate",&DataCombinationsTestCase::testNonUpdate));
+  testSuite->addTest(new TestCaller<DataCombinationsTestCase>(
+              "testUpdate",&DataCombinationsTestCase::testUpdate));
+  
+  return testSuite;
+}
diff --git a/escriptcore/test/DataAlgorithmAdapterTestCase.h b/escriptcore/test/DataCombinationsTestCase.h
similarity index 69%
copy from escriptcore/test/DataAlgorithmAdapterTestCase.h
copy to escriptcore/test/DataCombinationsTestCase.h
index 400d723..3015115 100644
--- a/escriptcore/test/DataAlgorithmAdapterTestCase.h
+++ b/escriptcore/test/DataCombinationsTestCase.h
@@ -1,7 +1,7 @@
 
 /*****************************************************************************
 *
-* Copyright (c) 2003-2016 by The University of Queensland
+* Copyright (c) 2016 by The University of Queensland
 * http://www.uq.edu.au
 *
 * Primary Business: Queensland, Australia
@@ -15,23 +15,23 @@
 *****************************************************************************/
 
 
-#if !defined DataAlgorithmAdapterTestCase_20040715_H
-#define DataAlgorithmAdapterTestCase_20040715_H
+#if !defined  DataCombinationsTestCase_20040624_H
+#define  DataCombinationsTestCase_20040624_H
 
 #include <cppunit/TestFixture.h>
 #include <cppunit/TestSuite.h>
 
 #define REL_TOL ((double)1.e-10)
 
-class DataAlgorithmAdapterTestCase : public CppUnit::TestFixture
+class DataCombinationsTestCase : public CppUnit::TestFixture
 {
 public:
-  void testAll();
-  void testAlgorithm();
-  void testDpAlgorithm();
+
+  void testNonUpdate();
+  void testUpdate();  
 
   static CppUnit::TestSuite* suite();
-};
 
-#endif
+};
 
+#endif
\ No newline at end of file
diff --git a/escriptcore/test/DataConstantTestCase.cpp b/escriptcore/test/DataConstantTestCase.cpp
index 65bab47..a1aca18 100644
--- a/escriptcore/test/DataConstantTestCase.cpp
+++ b/escriptcore/test/DataConstantTestCase.cpp
@@ -14,15 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataConstant.h>
 
 #include "DataConstantTestCase.h"
 
-#include "escript/DataConstant.h"
-#include "escript/FunctionSpace.h"
-#include "esysUtils/EsysException.h"
+#include <escript/FunctionSpace.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
@@ -30,13 +26,12 @@
 using namespace CppUnit;
 using namespace escript;
 using namespace std;
-using namespace esysUtils;
 using namespace escript::DataTypes;
 
 namespace
 {
 
-ValueType::const_reference
+RealVectorType::const_reference
 getRefRO(DataReady& data,int i, int j, int k)
 {
    return data.getVectorRO()[getRelIndex(data.getShape(),i,j,k)];
@@ -64,7 +59,7 @@ void DataConstantTestCase::testAll()
   //
   // Create a scalar pointData
   DataTypes::ShapeType shape;
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //  DataArrayView pointData(data,shape);
 
   //
@@ -84,7 +79,7 @@ void DataConstantTestCase::testAll()
   shape.push_back(21);
 
   cout << "\tTesting alternative constructor." << endl;
-  DataTypes::ValueType data1(DataTypes::noValues(shape),1.0);
+  DataTypes::RealVectorType data1(DataTypes::noValues(shape),1.0);
   // do not call the FunctionSpace constructor directly
   // in the argument of DataConstant
   // GCC chokes on it.
diff --git a/escriptcore/test/DataEmptyTestCase.cpp b/escriptcore/test/DataEmptyTestCase.cpp
index a9fe460..207302a 100644
--- a/escriptcore/test/DataEmptyTestCase.cpp
+++ b/escriptcore/test/DataEmptyTestCase.cpp
@@ -14,22 +14,18 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataEmpty.h>
 
 #include "DataEmptyTestCase.h"
 
-#include "escript/DataEmpty.h"
-#include "escript/FunctionSpace.h"
-#include "esysUtils/EsysException.h"
+#include <escript/FunctionSpace.h>
+#include <escript/EsysException.h>
 
 #include <cppunit/TestCaller.h>
 
 using namespace CppUnit;
 using namespace escript;
 using namespace std;
-using namespace esysUtils;
 
 void DataEmptyTestCase::testAll()
 {
diff --git a/escriptcore/test/DataExpandedTestCase.cpp b/escriptcore/test/DataExpandedTestCase.cpp
index f267b1f..a5c0e50 100644
--- a/escriptcore/test/DataExpandedTestCase.cpp
+++ b/escriptcore/test/DataExpandedTestCase.cpp
@@ -14,14 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataExpanded.h>
 #include "DataExpandedTestCase.h"
-#include "escript/FunctionSpace.h"
-#include "escript/DataExpanded.h"
-#include "esysUtils/EsysException.h"
-#include "escript/DataReady.h"
+#include <escript/DataReady.h>
+#include <escript/EsysException.h>
+#include <escript/FunctionSpace.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
@@ -29,32 +26,31 @@
 using namespace CppUnit;
 using namespace escript;
 using namespace std;
-using namespace esysUtils;
 using namespace escript::DataTypes;
 
 namespace
 {
 
-ValueType::const_reference
+RealVectorType::const_reference
 getRefRO(DataReady& data,int i, int j)
 {
    return data.getVectorRO()[getRelIndex(data.getShape(),i,j)];
 }
 
-ValueType::const_reference
+RealVectorType::const_reference
 getRefRO(DataReady& data,int i, int j,int k)
 {
    return data.getVectorRO()[getRelIndex(data.getShape(),i,j,k)];
 }
 
-ValueType::reference
-getDRef(ValueType& data,const ShapeType& shape,int i, int j)
+RealVectorType::reference
+getDRef(RealVectorType& data,const ShapeType& shape,int i, int j)
 {
    return data[getRelIndex(shape,i,j)];
 }
 
-ValueType::reference
-getDRef(ValueType& data,const ShapeType& shape,int i, int j, int k)
+RealVectorType::reference
+getDRef(RealVectorType& data,const ShapeType& shape,int i, int j, int k)
 {
    return data[getRelIndex(shape,i,j,k)];
 }
@@ -81,7 +77,7 @@ void DataExpandedTestCase::testAll()
   // Create a rank 1 pointData
   DataTypes::ShapeType shape;
   shape.push_back(3);
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //  DataArrayView pointData(data,shape);
 
   //
@@ -174,7 +170,7 @@ void DataExpandedTestCase::testSlicing() {
   // Create a rank 1 pointData
   DataTypes::ShapeType shape;
   shape.push_back(3);
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //   DataArrayView pointData(data,shape);
 
   //
@@ -211,7 +207,7 @@ void DataExpandedTestCase::testSlicing2() {
   DataTypes::ShapeType shape;
   shape.push_back(3);
   shape.push_back(3);
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //  DataArrayView pointData(data,shape);
 
   //
@@ -290,7 +286,7 @@ void DataExpandedTestCase::testSlicing3() {
   shape.push_back(3);
   shape.push_back(3);
   shape.push_back(3);
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //   DataArrayView pointData(data,shape);
 
   //
@@ -405,7 +401,7 @@ void DataExpandedTestCase::testSliceSetting() {
   DataTypes::ShapeType shape;
   shape.push_back(2);
   shape.push_back(2);
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //   DataArrayView pointData(data,shape);
 
   //
@@ -425,7 +421,7 @@ void DataExpandedTestCase::testSliceSetting() {
   DataTypes::ShapeType shape2;
   shape2.push_back(3);
   shape2.push_back(3);
-  DataTypes::ValueType data2(DataTypes::noValues(shape2),0);
+  DataTypes::RealVectorType data2(DataTypes::noValues(shape2),0);
 //   DataArrayView pointData2(data2,shape2);
 
   //
@@ -478,7 +474,7 @@ void DataExpandedTestCase::testSliceSetting2() {
   //
   // Create a rank 0 pointData
   DataTypes::ShapeType shape;
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 //   DataArrayView pointData(data,shape);
 
   //
@@ -495,7 +491,7 @@ void DataExpandedTestCase::testSliceSetting2() {
   DataTypes::ShapeType shape2;
   shape2.push_back(3);
   shape2.push_back(3);
-  DataTypes::ValueType data2(DataTypes::noValues(shape2),0);
+  DataTypes::RealVectorType data2(DataTypes::noValues(shape2),0);
 //   DataArrayView pointData2(data2,shape2);
 
   //
diff --git a/escriptcore/test/DataFactoryTestCase.cpp b/escriptcore/test/DataFactoryTestCase.cpp
index b7be535..8290825 100644
--- a/escriptcore/test/DataFactoryTestCase.cpp
+++ b/escriptcore/test/DataFactoryTestCase.cpp
@@ -14,13 +14,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/Data.h>
 #include "DataFactoryTestCase.h"
 
-#include "escript/DataFactory.h"
-#include "escript/Data.h"
+#include <escript/DataFactory.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
diff --git a/escriptcore/test/DataLazyTestCase.cpp b/escriptcore/test/DataLazyTestCase.cpp
index a7a18da..b0b58d7 100644
--- a/escriptcore/test/DataLazyTestCase.cpp
+++ b/escriptcore/test/DataLazyTestCase.cpp
@@ -14,25 +14,19 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
+#include <escript/DataConstant.h>
 #include "DataLazyTestCase.h"
 
-#include "escript/DataConstant.h"
-#include "escript/DataLazy.h"
-#include "escript/FunctionSpace.h"
-#include "esysUtils/EsysException.h"
+#include <escript/DataLazy.h>
+#include <escript/FunctionSpace.h>
 
-#include <cppunit/TestCaller.h>
 #include <iostream>
+#include <cppunit/TestCaller.h>
 #include <boost/shared_ptr.hpp>	// for the cast operator
 
 using namespace CppUnit;
 using namespace escript;
 using namespace std;
-using namespace esysUtils;
 using namespace escript::DataTypes;
 using namespace boost;
 
@@ -59,7 +53,7 @@ DataAbstract_ptr
 getLazy(DataTypes::ShapeType& shape,bool minus=false)
 {
   int pts=DataTypes::noValues(shape);
-  DataTypes::ValueType data(pts,0);
+  DataTypes::RealVectorType data(pts,0);
   for (int i=0;i<pts;++i)
   {
 	data[i]=minus?-(i+1):i+1;
@@ -74,7 +68,7 @@ DataAbstract_ptr
 getLazyU(DataTypes::ShapeType& shape, ES_optype typ)
 {
   int pts=DataTypes::noValues(shape);
-  DataTypes::ValueType data(pts,0);
+  DataTypes::RealVectorType data(pts,0);
   for (int i=0;i<pts;++i)
   {
 	data[i]=(i+1);
@@ -89,7 +83,7 @@ DataAbstract_ptr
 getLazyUP(DataTypes::ShapeType& shape, ES_optype typ, int par)
 {
   int pts=DataTypes::noValues(shape);
-  DataTypes::ValueType data(pts,0);
+  DataTypes::RealVectorType data(pts,0);
   for (int i=0;i<pts;++i)
   {
 	data[i]=(i+1);
@@ -105,8 +99,8 @@ DataAbstract_ptr
 getLazyB(DataTypes::ShapeType& shape, ES_optype typ)
 {
   int pts=DataTypes::noValues(shape);
-  DataTypes::ValueType data(pts,0);
-  DataTypes::ValueType data2(pts,0);
+  DataTypes::RealVectorType data(pts,0);
+  DataTypes::RealVectorType data2(pts,0);
   for (int i=0;i<pts;++i)
   {
 	data[i]=(i+1);
@@ -124,8 +118,8 @@ DataAbstract_ptr
 getLazyGTP(DataTypes::ShapeType& shape, ES_optype typ, int ax, int tr)
 {
   int pts=DataTypes::noValues(shape);
-  DataTypes::ValueType data(pts,0);
-  DataTypes::ValueType data2(pts,0);
+  DataTypes::RealVectorType data(pts,0);
+  DataTypes::RealVectorType data2(pts,0);
   for (int i=0;i<pts;++i)
   {
 	data[i]=(i+1);
diff --git a/escriptcore/test/DataMathsTestCase.cpp b/escriptcore/test/DataMathsTestCase.cpp
index 5a308b2..0f2ac2b 100644
--- a/escriptcore/test/DataMathsTestCase.cpp
+++ b/escriptcore/test/DataMathsTestCase.cpp
@@ -14,25 +14,19 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
+#include <escript/DataTypes.h>
+#include <escript/DataVectorOps.h>
 #include "DataMathsTestCase.h"
-#include "escript/DataAlgorithm.h"
-#include "escript/DataTypes.h"
-#include "escript/DataVector.h"
-#include "esysUtils/EsysException.h"
+#include <escript/DataTypes.h>
+#include <escript/DataVector.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
 
 using namespace CppUnit;
-using namespace esysUtils;
 using namespace escript;
 using namespace std;
 using namespace escript::DataTypes;
-using namespace escript::DataMaths;
 
 
 void DataMathsTestCase::testMatMult()
@@ -43,22 +37,22 @@ void DataMathsTestCase::testMatMult()
     DataTypes::ShapeType leftShape;
     leftShape.push_back(1);
     leftShape.push_back(3);
-    DataTypes::ValueType leftData(DataTypes::noValues(leftShape),0);
+    DataTypes::RealVectorType leftData(DataTypes::noValues(leftShape),0);
 //     DataArrayView leftDataView(leftData,leftShape);
 
     DataTypes::ShapeType rightShape;
     rightShape.push_back(3);
     rightShape.push_back(2);
-    DataTypes::ValueType rightData(DataTypes::noValues(rightShape),0);
+    DataTypes::RealVectorType rightData(DataTypes::noValues(rightShape),0);
 //     DataArrayView rightDataView(rightData,rightShape);
 
-    DataTypes::ShapeType resultShape=DataMaths::determineResultShape(leftShape,rightShape);
+    DataTypes::ShapeType resultShape=determineResultShape(leftShape,rightShape);
 
     CPPUNIT_ASSERT(resultShape.size()==2);
     CPPUNIT_ASSERT(resultShape[0]==1);
     CPPUNIT_ASSERT(resultShape[1]==2);
 
-    DataTypes::ValueType resultData(DataTypes::noValues(resultShape),0);
+    DataTypes::RealVectorType resultData(DataTypes::noValues(resultShape),0);
 
     cout << "\tTest matrix multiplication.";
     double aValue=0.0;
@@ -74,428 +68,12 @@ void DataMathsTestCase::testMatMult()
       }
     }
 
-    DataMaths::matMult(leftData,leftShape,0,rightData,rightShape,0,resultData, resultShape);
+    matMult(leftData,leftShape,0,rightData,rightShape,0,resultData, resultShape);
     CPPUNIT_ASSERT((resultData[0]==22) && (resultData[1]==28));
 
     cout << endl;
 }
 
-void DataMathsTestCase::testUnaryOp()
-{
-
-  // This typedef allows function names to be cast to pointers
-  // to unary functions of the appropriate type.
-  typedef double (*UnaryDFunPtr)(double);
-
-  {
-    cout << endl;
-    cout << "\tTest unaryOp on scalar Data.";
-
-    // define the shape for the Data
-    DataTypes::ShapeType shape;
-
-    // allocate the data for the Data
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-    double tmp;
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      data[offset]=p;
-
-      // apply a unary operation to this data point
-      unaryOp(data,shape,offset,(UnaryDFunPtr)std::sin);
-
-      // check the results
-      tmp = std::sin((double)p);
-      CPPUNIT_ASSERT(std::abs(data[offset]-tmp)<=REL_TOL*std::abs(tmp));
-
-      if (p<npoints-1) {
-        offset++;
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest unaryOp on shape (2,3) Data.";
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-    shape.push_back(2);
-    shape.push_back(3);
-
-    // allocate the data for the DataArrayView
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          data[offset+getRelIndex(shape,i,j)]=offset+getRelIndex(shape,i,j);
-        }
-      }
-
-      // apply a unary operation to this data point
-//      dataView.unaryOp((UnaryDFunPtr)std::sqrt);
-      unaryOp(data,shape,offset,(UnaryDFunPtr)std::sqrt);
-
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-//           CPPUNIT_ASSERT(std::abs(dataView(i,j)-std::sqrt((double)dataView.index(i,j)))<=REL_TOL*std::sqrt((double)dataView.index(i,j)));
-          CPPUNIT_ASSERT(std::abs(data[offset+getRelIndex(shape,i,j)]-std::sqrt((double)offset+getRelIndex(shape,i,j)))<=REL_TOL*std::sqrt((double)offset+getRelIndex(shape,i,j)));
-        }
-      }
-
-      if (p<npoints-1) {
-        offset++;
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest unaryOp on shape (9,8,5,11) Data.";
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-    shape.push_back(9);
-    shape.push_back(8);
-    shape.push_back(5);
-    shape.push_back(11);
-
-    // allocate the data for the DataArrayView
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              data[offset+getRelIndex(shape,i,j,k,l)]=data[offset+getRelIndex(shape,i,j,k,l)]+1;
-            }
-          }
-        }
-      }
-
-      // apply a unary operation to this data point
-//       dataView.unaryOp((UnaryDFunPtr)std::log);
-      unaryOp(data,shape,offset,(UnaryDFunPtr)std::log);
-
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              CPPUNIT_ASSERT(std::abs(data[offset+getRelIndex(shape,i,j,k,l)]-std::log(1+(double)data[offset+getRelIndex(shape,i,j,k,l)]))<=REL_TOL*std::abs(std::log(1+(double)data[offset+getRelIndex(shape,i,j,k,l)])));
-            }
-          }
-        }
-      }
-
-      if (p<npoints-1) {
-        offset++;
-      }
-
-    }
-
-  }
-
-  cout << endl;
-
-}
-
-void DataMathsTestCase::testBinaryOp()
-{
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on scalar Data.";
-
-    // define the shape for the DataArrayViews
-    DataTypes::ShapeType shape;
-
-    // allocate the data for the DataArrayViews
-    int npoints=4;
-    DataTypes::ValueType data1(DataTypes::noValues(shape)*npoints,0);
-    DataTypes::ValueType data2(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the views along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data points
-      data1[offset]=p;
-      data2[offset]=p;
-
-      // apply a binary operation to these data points
-      binaryOp(data1,scalarShape,offset,data2,scalarShape,offset, plus<double>());
-
-      // check the results
-      CPPUNIT_ASSERT(data1[offset]==p+p);
-
-      if (p<npoints-1) {
-	++offset;
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on shape (2,3) Data.";
-
-    // define the shape for the DataArrayViews
-    DataTypes::ShapeType shape;
-    shape.push_back(2);
-    shape.push_back(3);
-
-    // allocate the data for the DataArrayViews
-    int npoints=4;
-    DataTypes::ValueType data1(DataTypes::noValues(shape)*npoints,0);
-    DataTypes::ValueType data2(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the views along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data points
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          data1[offset+getRelIndex(shape,i,j)]=offset+getRelIndex(shape,i,j);
-          data2[offset+getRelIndex(shape,i,j)]=offset+getRelIndex(shape,i,j);
-        }
-      }
-
-      // apply a binary operation to these data points
-/*      dataView1.binaryOp(dataView2,multiplies<double>());*/
-      binaryOp(data1,shape,offset,data2,shape,offset,multiplies<double>());
-
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          CPPUNIT_ASSERT(data1[offset+getRelIndex(shape,i,j)]==(offset+getRelIndex(shape,i,j))*(offset+getRelIndex(shape,i,j)));
-        }
-      }
-
-      if (p<npoints-1) {
-	offset+=noValues(shape);
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on shape (9,8,5,11) Data.";
-
-    // define the shape for the DataArrayViews
-    DataTypes::ShapeType shape;
-    shape.push_back(9);
-    shape.push_back(8);
-    shape.push_back(5);
-    shape.push_back(11);
-
-    // allocate the data for the DataArrayViews
-    int npoints=4;
-    DataTypes::ValueType data1(DataTypes::noValues(shape)*npoints,0);
-    DataTypes::ValueType data2(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the views along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data points
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              data1[offset+getRelIndex(shape,i,j,k,l)]=offset+getRelIndex(shape,i,j,k,l);
-              data2[offset+getRelIndex(shape,i,j,k,l)]=offset+getRelIndex(shape,i,j,k,l);
-            }
-          }
-        }
-      }
-
-      // apply a binary operation to these data points
-//      dataView1.binaryOp(dataView2,multiplies<double>());
-      binaryOp(data1,shape,offset,data2,shape,offset,multiplies<double>());
-
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              CPPUNIT_ASSERT(data1[offset+getRelIndex(shape,i,j,k,l)]==(offset+getRelIndex(shape,i,j,k,l))*(offset+getRelIndex(shape,i,j,k,l)));
-            }
-          }
-        }
-      }
-
-      if (p<npoints-1) {
-	offset+=noValues(shape);
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on scalar Data and single value.";
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-
-    // allocate the data for the DataArrayView
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      data[offset]=p;
-
-      // apply a binary operation to this data point
-//       dataView.binaryOp(4.9,plus<double>());
-      binaryOp(data,shape,offset,4.9,plus<double>());
-
-      // check the results
-      CPPUNIT_ASSERT(data[offset]==4.9+p);
-
-      if (p<npoints-1) {
-        ++offset;
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on shape (2,3) Data and single value.";
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-    shape.push_back(2);
-    shape.push_back(3);
-
-    // allocate the data for the DataArrayView
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          data[offset+getRelIndex(shape,i,j)]=offset+getRelIndex(shape,i,j);
-        }
-      }
-
-      // apply a binary operation to the data point
-//       dataView.binaryOp(5.8,multiplies<double>());
-      binaryOp(data,shape,offset,5.8,multiplies<double>());
-
-      double tmp;
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          tmp=5.8*(offset+getRelIndex(shape,i,j));
-          CPPUNIT_ASSERT(std::abs(data[offset+getRelIndex(shape,i,j)]-tmp)<=REL_TOL*std::abs(tmp));
-        }
-      }
-
-      if (p<npoints-1) {
-        offset+=noValues(shape);
-      }
-
-    }
-
-  }
-
-  {
-    cout << endl;
-    cout << "\tTest binaryOp on shape (9,8,5,11) Data and single value.";
-
-    // define the shape for the DataArrayView
-    DataTypes::ShapeType shape;
-    shape.push_back(9);
-    shape.push_back(8);
-    shape.push_back(5);
-    shape.push_back(11);
-
-    // allocate the data for the DataArrayView
-    int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
-
-    int offset=0;
-    // step the view along each data point in the underlying data
-    for (int p=0;p<npoints;p++) {
-
-      // assign values to the data point
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              data[offset+getRelIndex(shape,i,j,k,l)]=offset+getRelIndex(shape,i,j,k,l);
-            }
-          }
-        }
-      }
-
-      // apply a binary operation to the data point
-//       dataView.binaryOp(5.4,multiplies<double>());
-      binaryOp(data,shape,offset,5.4,multiplies<double>());
-
-      double tmp;
-      // check the results
-      for (int i=0;i<shape[0];i++) {
-        for (int j=0;j<shape[1];j++) {
-          for (int k=0;k<shape[2];k++) {
-            for (int l=0;l<shape[3];l++) {
-              tmp=5.4*(offset+getRelIndex(shape,i,j,k,l));
-              CPPUNIT_ASSERT(std::abs(data[offset+getRelIndex(shape,i,j,k,l)]-tmp)<=REL_TOL*std::abs(tmp));
-            }
-          }
-        }
-      }
-
-      if (p<npoints-1) {
-        offset+=noValues(shape);
-      }
-
-    }
-
-  }
-
-  cout << endl;
-
-}
-
 void DataMathsTestCase::testReductionOp()
 {
 
@@ -508,7 +86,7 @@ void DataMathsTestCase::testReductionOp()
 
     // allocate the data for the DataArrayView
     int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
+    DataTypes::RealVectorType data(DataTypes::noValues(shape)*npoints,0);
 
     int offset=0;
     // step the view along each data point in the underlying data
@@ -519,8 +97,8 @@ void DataMathsTestCase::testReductionOp()
 
       // apply a reduction operation to this data point and check the results
       FMax fmax_func;
-//       CPPUNIT_ASSERT(std::abs(dataView.reductionOp(fmax_func,numeric_limits<double>::max()*-1)-p)<=REL_TOL*p);
-      CPPUNIT_ASSERT(std::abs(reductionOp(data,shape,offset,fmax_func,numeric_limits<double>::max()*-1)-p)<=REL_TOL*p);
+//       CPPUNIT_ASSERT(std::abs(dataView.reductionOpVector(fmax_func,numeric_limits<double>::max()*-1)-p)<=REL_TOL*p);
+      CPPUNIT_ASSERT(std::abs(reductionOpVector(data,shape,offset,fmax_func,numeric_limits<double>::max()*-1)-p)<=REL_TOL*p);
 
 
 
@@ -543,7 +121,7 @@ void DataMathsTestCase::testReductionOp()
 
     // allocate the data for the DataArrayView
     int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
+    DataTypes::RealVectorType data(DataTypes::noValues(shape)*npoints,0);
 
     int offset=0;
     // step the view along each data point in the underlying data
@@ -558,7 +136,7 @@ void DataMathsTestCase::testReductionOp()
 
       // apply a reduction operation to this data point and check the results
       FMin fmin_func;
-      CPPUNIT_ASSERT(std::abs(reductionOp(data,shape,offset,fmin_func,numeric_limits<double>::max())-offset)<=REL_TOL*std::abs(offset));
+      CPPUNIT_ASSERT(std::abs(reductionOpVector(data,shape,offset,fmin_func,numeric_limits<double>::max())-offset)<=REL_TOL*std::abs(offset));
 
       if (p<npoints-1) {
         offset+=noValues(shape);
@@ -581,7 +159,7 @@ void DataMathsTestCase::testReductionOp()
 
     // allocate the data for the DataArrayView
     int npoints=4;
-    DataTypes::ValueType data(DataTypes::noValues(shape)*npoints,0);
+    DataTypes::RealVectorType data(DataTypes::noValues(shape)*npoints,0);
 
     int offset=0;
     // step the view along each data point in the underlying data
@@ -599,8 +177,8 @@ void DataMathsTestCase::testReductionOp()
       }
 
       // apply a reduction operation to this data point and check the results
-      AbsMax absmax_func;
-      CPPUNIT_ASSERT(reductionOp(data,shape,offset,absmax_func,0)==offset+getRelIndex(shape,8,7,4,10));
+      AbsMax<DataTypes::real_t> absmax_func;
+      CPPUNIT_ASSERT(reductionOpVector(data,shape,offset,absmax_func,0)==offset+getRelIndex(shape,8,7,4,10));
 
       if (p<npoints-1) {
         offset+=noValues(shape);
@@ -618,11 +196,6 @@ TestSuite* DataMathsTestCase::suite()
 {
   // create the suite of tests to perform.
   TestSuite *testSuite = new TestSuite("DataMathsTestCase");
-
-  testSuite->addTest(new TestCaller<DataMathsTestCase>(
-              "testUnaryOp",&DataMathsTestCase::testUnaryOp));
-  testSuite->addTest(new TestCaller<DataMathsTestCase>(
-              "testBinaryOp",&DataMathsTestCase::testBinaryOp));
   testSuite->addTest(new TestCaller<DataMathsTestCase>(
               "testReductionOp",&DataMathsTestCase::testReductionOp));
   testSuite->addTest(new TestCaller<DataMathsTestCase>(
diff --git a/escriptcore/test/DataTaggedTestCase.cpp b/escriptcore/test/DataTaggedTestCase.cpp
index f297e9c..860e041 100644
--- a/escriptcore/test/DataTaggedTestCase.cpp
+++ b/escriptcore/test/DataTaggedTestCase.cpp
@@ -14,913 +14,59 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <escript/DataTypes.h>
 
+#include "DataTaggedTestCase.h"
 
-#include "DataTaggedTestCase.h"
-
-#include "esysUtils/EsysException.h"
-
-#include "escript/BinaryOp.h"
-#include "escript/DataConstant.h"
-#include "escript/DataFactory.h"
-#include "escript/DataTagged.h"
-#include "escript/DataTypes.h"
-#include "escript/DataVector.h"
-#include "escript/FunctionSpace.h"
-#include "escript/FunctionSpaceFactory.h"
-#include "escript/UnaryOp.h"
-
-#include <cppunit/TestCaller.h>
-#include <iostream>
-#include <functional>
-#include <algorithm>
-
-
-using namespace CppUnit;
-using namespace escript;
-using namespace esysUtils;
-using namespace std;
-using namespace escript::DataTypes;
-
-// namespace {
-// std::string constr(FunctionSpace& fs)
-// {
-//    
-//    try
-//    {
-// 	int t[1];
-// 	DataTagged dt(fs,DataTypes::scalarShape,t,DataTypes::ValueType());
-// 	
-// 	return "DataTagged(const FunctionSpace& what, const DataTypes::ShapeType &shape, const int tags[], const ValueType& data) was supposed to throw.";
-//    } catch (DataException d){}
-//    try
-//    {
-// 	DataTagged t(fs,DataTypes::scalarShape,DataTagged::TagListType(),DataTypes::ValueType());
-// 	return "DataTagged(const FunctionSpace& what, const DataTypes::ShapeType &shape, const TagListType& tags, const ValueType& data) was supposed to throw.";
-//    } catch (DataException d){}
-//    try
-//    {
-// 	DataTagged t(fs,DataTypes::scalarShape,DataTypes::ValueType());
-// 	return "  DataTagged(const FunctionSpace& what, const DataTypes::ShapeType& shape, const DataTypes::ValueType& defaultvalue, const DataTagged* tagsource=0) was supposed to throw.";
-//    } catch (DataException d){}
-//    try
-//    {
-//     	DataTypes::ValueType viewData1(1);
-//     	viewData1[0]=0.0;
-// 	DataConstant c(fs,DataTypes::scalarShape, viewData1);
-// 	DataTagged t(c);
-// 	return "DataTagged(const DataConstant& other) was supposed to throw.";
-//    } catch (DataException d){}
-// 
-// }
-// 
-// }
-
-namespace {
-
-ValueType::const_reference
-getRefRO(DataTagged& data,int offset, int i, int j, int k)
-{
-   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i,j,k)];
-}
-
-//ValueType::const_reference
-//getRefRO(DataTagged& data,int offset, int i, int j, int k, int l)
-//{
-//   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i,j,k,l)];
-//}
-//
-//ValueType::const_reference
-//getRefRO(DataTagged& data,int offset, int i, int j)
-//{
-//   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i,j)];
-//}
-
-ValueType::const_reference
-getRefRO(const DataTagged& data,int offset, int i)
-{
-   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i)];
-}
-
-}
-
-void DataTaggedTestCase::testOperations() {
-
-  cout << endl;
-
-  {
-    cout << "\tTest binaryOp addition of two default DataTagged objects." << endl;
-
-    DataTagged myData;
-    DataTagged right;
-
-    binaryOp(myData,right,plus<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(!myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
-
-    CPPUNIT_ASSERT(myData.getLength()==1);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-#ifdef EXWRITECHK		
-		myData.exclusivewritecalled=true;
-#endif	    
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRW(0)==0.0);
-
-    // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(1);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
-    CPPUNIT_ASSERT(myData.getOffsetForTag(1)==0);
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRW(0)==0.0);
-
-//     myDataView = myData.getDefaultValue();
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
-    CPPUNIT_ASSERT(myData.getDefaultOffset()==0);
-//     CPPUNIT_ASSERT(myDataView.getRank()==0);		// there is no point in testing this again
-//     CPPUNIT_ASSERT(myDataView.noValues()==1);	// since we are not building DataArrayViews
-//     CPPUNIT_ASSERT(myDataView.getShape().size()==0);
-//     CPPUNIT_ASSERT(myDataView()==0.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==i);
-    }
-
-  }
-
-  {
-    cout << "\tTest binaryOp addition of two DataTagged objects with default values only." << endl;
-
-    DataTypes::ShapeType viewShape;
-    viewShape.push_back(3);
-
-//     DataTagged::TagListType keys;
-// 
-//     DataTagged::ValueListType values;
-
-    DataTypes::ValueType viewData(3);
-    for (int i=0;i<viewShape[0];i++) {
-      viewData[i]=i;
-    }
-
-//     DataTagged myData(keys,values,myView,FunctionSpace());
-//     DataTagged right(keys,values,myView,FunctionSpace());
-    DataTagged myData(FunctionSpace(),viewShape,viewData);
-    DataTagged right(FunctionSpace(),viewShape,viewData);
-
-
-    binaryOp(myData,right,plus<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(!myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
-
-    CPPUNIT_ASSERT(myData.getLength()==3);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-
-    CPPUNIT_ASSERT(myData.getRank()==1);
-    CPPUNIT_ASSERT(myData.getNoValues()==3);
-    CPPUNIT_ASSERT(myData.getShape().size()==1);
-
-
-    int offset=myData.getDefaultOffset();
-//     DataArrayView myDataView = myData.getDefaultValue();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,1)==2);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,2)==4);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==i*2);
-    }
-
-  }
-
-  {
-    cout << "\tTest binaryOp addition of two DataTagged objects with one identical tag each." << endl;
-
-    DataTagged myData;
-    DataTagged right;
-
-    DataVector vOneData(1, 1.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    right.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-
-    binaryOp(myData,right,plus<double>());
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==1);
-
-    CPPUNIT_ASSERT(myData.getLength()==2);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
-
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-
-
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getVectorRO()[offset]==2.0);
-
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getVectorRO()[offset]==0.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==i*2);
-    }
-
-  }
-
-  {
-    cout << "\tTest binaryOp addition of two DataTagged objects with one different tag each." << endl;
-
-    DataTagged myData;
-    DataTagged right;
-
-    // it's important that default values are different, as we need to be able to
-    // verify that the tag values in each object are being added to the correct
-    // default values - since the tag lists don't match, the default values will
-    // be used for missing tags in each object
-//     myData.getDefaultValue()()=1.0;
-//     right.getDefaultValue()()=2.0;
-#ifdef EXWRITECHK		
-    myData.exclusivewritecalled=true;
-#endif	    
-    
-    myData.getVectorRW()[myData.getDefaultOffset()]=1.0;
-    right.getVectorRW()[right.getDefaultOffset()]=2.0;
-
-    DataVector vOneData(1, 3.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    DataVector vTwoData(1, 4.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vTwoView(vTwoData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    right.addTaggedValue(2,DataTypes::scalarShape,vTwoData);
-
-    //cout << myData.toString() << endl;
-    //cout << right.toString() << endl;
-
-    binaryOp(myData,right,plus<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-    CPPUNIT_ASSERT(myData.isCurrentTag(2));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==2);
-
-    CPPUNIT_ASSERT(myData.getLength()==3);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==5.0);
-
-    // check result value for tag "2"
-//     myDataView = myData.getDataPointByTag(2);
-    offset=myData.getOffsetForTag(2);
-    CPPUNIT_ASSERT(offset==2);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==5.0);
-
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==3.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    CPPUNIT_ASSERT(sampleData[0]==3);
-    CPPUNIT_ASSERT(sampleData[1]==5);
-    CPPUNIT_ASSERT(sampleData[2]==5);
-
-  }
-
-  {
-    cout << "\tTest binaryOp addition of two DataTagged objects with overlapping tag sets." << endl;
-
-    DataTagged myData;
-    DataTagged right;
-
-    // it's important that default values are different, as we need to be able to
-    // verify that the tag values in each object are being added to the correct
-    // default values - since the tag lists don't match, the default values will
-    // be used for missing tags in each object
-/*    myData.getDefaultValue()()=2.0;
-    right.getDefaultValue()()=3.0;*/
-#ifdef EXWRITECHK		
-    myData.exclusivewritecalled=true;
-#endif	
-    myData.getVectorRW()[myData.getDefaultOffset()]=2.0;
-    right.getVectorRW()[right.getDefaultOffset()]=3.0;
-
-
-    DataVector vOneData(1, 1.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    myData.addTaggedValue(2,DataTypes::scalarShape,vOneData);
-    right.addTaggedValue(2,DataTypes::scalarShape,vOneData);
-    right.addTaggedValue(3,DataTypes::scalarShape,vOneData);
-
-    //cout << myData.toString() << endl;
-    //cout << right.toString() << endl;
-
-    binaryOp(myData,right,plus<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-    CPPUNIT_ASSERT(myData.isCurrentTag(2));
-    CPPUNIT_ASSERT(myData.isCurrentTag(3));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==3);
-
-    CPPUNIT_ASSERT(myData.getLength()==4);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==4.0);
-
-    // check result value for tag "2"
-//     myDataView = myData.getDataPointByTag(2);
-    offset=myData.getOffsetForTag(2);
-    CPPUNIT_ASSERT(offset==2);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==2.0);
-
-    // check result value for tag "3"
-//     myDataView = myData.getDataPointByTag(3);
-    offset=myData.getOffsetForTag(3);
-    CPPUNIT_ASSERT(offset==3);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==3.0);
-
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==5.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    CPPUNIT_ASSERT(sampleData[0]==5);
-    CPPUNIT_ASSERT(sampleData[1]==4);
-    CPPUNIT_ASSERT(sampleData[2]==2);
-    CPPUNIT_ASSERT(sampleData[3]==3);
-
-  }
-
-  {
-    cout << "\tTest binaryOp multiplication of two DataTagged objects with default values only." << endl;
-
-    DataTypes::ShapeType viewShape;
-    viewShape.push_back(3);
-
-//     DataTagged::TagListType keys;
-
-//     DataTagged::ValueListType values;
-
-    DataTypes::ValueType viewData(3);
-    for (int i=0;i<viewShape[0];i++) {
-      viewData[i]=i;
-    }
-//     DataArrayView myView(viewData,viewShape);
-
-    DataTagged myData(FunctionSpace(),viewShape,viewData);
-    DataTagged right(FunctionSpace(),viewShape,viewData);
-
-    binaryOp(myData,right,multiplies<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(!myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
-
-    CPPUNIT_ASSERT(myData.getLength()==3);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-
-    CPPUNIT_ASSERT(myData.getRank()==1);
-    CPPUNIT_ASSERT(myData.getNoValues()==3);
-    CPPUNIT_ASSERT(myData.getShape().size()==1);
-
-
-//     DataArrayView myDataView = myData.getDefaultValue();
-    int offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,2)==4);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==i*i);
-    }
-
-  }
-
-  {
-
-    cout << "\tTest binaryOp multiplication of DataTagged object with a scalar." << endl;
-
-    DataTagged myData;
-
-    DataVector vOneData(1, 1.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    DataVector vTwoData(1, 2.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vTwoView(vTwoData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    myData.addTaggedValue(2,DataTypes::scalarShape,vTwoData);
-
-    DataVector vThreeData(1, 3.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vThreeView(vThreeData,DataTypes::ShapeType());
-
-//     DataArrayView right=vThreeView;
-
-    //cout << myData.toString() << endl;
-    //cout << right.toString() << endl;
-
-    binaryOp(myData,vThreeData, DataTypes::scalarShape,multiplies<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-    CPPUNIT_ASSERT(myData.isCurrentTag(2));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==2);
-
-    CPPUNIT_ASSERT(myData.getLength()==3);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==3.0);
-
-    // check result value for tag "2"
-//     myDataView = myData.getDataPointByTag(2);
-    offset=myData.getOffsetForTag(2);
-    CPPUNIT_ASSERT(offset==2);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==6.0);
-
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    CPPUNIT_ASSERT(sampleData[0]==0);
-    CPPUNIT_ASSERT(sampleData[1]==3);
-    CPPUNIT_ASSERT(sampleData[2]==6);
-
-  }
-
-  {
-    cout << "\tTest binaryOp multiplication of two DataTagged objects with overlapping tag sets." << endl;
-
-    DataTagged myData;
-    DataTagged right;
-
-    // it's important that default values are different, as we need to be able to
-    // verify that the tag values in each object are being added to the correct
-    // default values - since the tag lists don't match, the default values will
-    // be used for missing tags in each object
-//     myData.getDefaultValue()()=2.0;
-//     right.getDefaultValue()()=3.0;
-#ifdef EXWRITECHK		
-    myData.exclusivewritecalled=true;
-#endif	    
-    myData.getVectorRW()[myData.getDefaultOffset()]=2.0;
-    right.getVectorRW()[right.getDefaultOffset()]=3.0;
-
-    DataVector vOneData(1, 1.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    DataVector vTwoData(1, 2.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vTwoView(vTwoData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    myData.addTaggedValue(2,DataTypes::scalarShape,vOneData);
-    right.addTaggedValue(2,DataTypes::scalarShape,vTwoData);
-    right.addTaggedValue(3,DataTypes::scalarShape,vTwoData);
-
-    //cout << myData.toString() << endl;
-    //cout << right.toString() << endl;
-
-    binaryOp(myData,right,multiplies<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-    CPPUNIT_ASSERT(myData.isCurrentTag(2));
-    CPPUNIT_ASSERT(myData.isCurrentTag(3));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==3);
-
-    CPPUNIT_ASSERT(myData.getLength()==4);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==3.0);
-
-    // check result value for tag "2"
-//     myDataView = myData.getDataPointByTag(2);
-    offset=myData.getOffsetForTag(2);
-    CPPUNIT_ASSERT(offset==2);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==2.0);
-
-    // check result value for tag "3"
-//     myDataView = myData.getDataPointByTag(3);
-    offset=myData.getOffsetForTag(3);
-    CPPUNIT_ASSERT(offset==3);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==4.0);
-
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==6.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    CPPUNIT_ASSERT(sampleData[0]==6);
-    CPPUNIT_ASSERT(sampleData[1]==3);
-    CPPUNIT_ASSERT(sampleData[2]==2);
-    CPPUNIT_ASSERT(sampleData[3]==4);
-
-  }
-
-  {
-    cout << "\tTest unaryOp negate on default DataTagged object." << endl;
-
-    DataTagged myData;
-
-    unaryOp(myData,negate<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(!myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
-
-    CPPUNIT_ASSERT(myData.getLength()==1);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
-
-
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
-    int offset=myData.getPointOffset(0,0);
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
-
-    // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(1);
-    offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
-
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==i);
-    }
-
-  }
-
-  {
-    cout << "\tTest unaryOp negate on DataTagged object with default value only." << endl;
-
-    DataTypes::ShapeType viewShape;
-    viewShape.push_back(3);
-
-//     DataTagged::TagListType keys;
-
-//     DataTagged::ValueListType values;
-
-    DataTypes::ValueType viewData(3);
-    for (int i=0;i<viewShape[0];i++) {
-      viewData[i]=i;
-    }
-//     DataArrayView myView(viewData,viewShape);
-
-    DataTagged myData(FunctionSpace(),viewShape,viewData);
-
-    unaryOp(myData,negate<double>());
-
-    //cout << myData.toString() << endl;
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(!myData.isCurrentTag(1));
-
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
-
-    CPPUNIT_ASSERT(myData.getLength()==3);
-
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
-
-    CPPUNIT_ASSERT(myData.getRank()==1);
-    CPPUNIT_ASSERT(myData.getNoValues()==3);
-    CPPUNIT_ASSERT(myData.getShape().size()==1);
-
-
-    int offset=myData.getDefaultOffset();
-//     DataArrayView myDataView = myData.getDefaultValue();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,1)==-1);
-    CPPUNIT_ASSERT(getRefRO(myData,offset,2)==-2);
-
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==0-i);
-    }
-
-  }
-
-  {
-    cout << "\tTest unnaryOp negate on DataTagged object with two tags." << endl;
-
-    DataTagged myData;
-
-    DataVector vOneData(1, 1.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vOneView(vOneData,DataTypes::ShapeType());
-
-    DataVector vTwoData(1, 2.0 ,1);
-    // create a view with an empty shape, a scalar.
-//     DataArrayView vTwoView(vTwoData,DataTypes::ShapeType());
-
-    myData.addTaggedValue(1,DataTypes::scalarShape,vOneData);
-    myData.addTaggedValue(2,DataTypes::scalarShape,vTwoData);
-
-    unaryOp(myData,negate<double>());
-
-    CPPUNIT_ASSERT(myData.getNumSamples()==1);
-    CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
-
-    CPPUNIT_ASSERT(myData.validSamplePointNo(0));
-    CPPUNIT_ASSERT(myData.validSampleNo(0));
-    CPPUNIT_ASSERT(!myData.validSamplePointNo(1));
-    CPPUNIT_ASSERT(!myData.validSampleNo(1));
-
-    // data-point 0 has tag number 1 by default
-    CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
-
-    CPPUNIT_ASSERT(myData.isCurrentTag(1));
-    CPPUNIT_ASSERT(myData.isCurrentTag(2));
+#include <escript/BinaryDataReadyOps.h>
+#include <escript/DataConstant.h>
+#include <escript/DataFactory.h>
+#include <escript/DataTagged.h>
+#include <escript/DataVector.h>
+#include <escript/EsysException.h>
+#include <escript/FunctionSpace.h>
+#include <escript/FunctionSpaceFactory.h>
 
-    CPPUNIT_ASSERT(myData.getTagLookup().size()==2);
+#include <cppunit/TestCaller.h>
 
-    CPPUNIT_ASSERT(myData.getLength()==3);
+#include <algorithm>
+#include <functional>
+#include <iostream>
 
-    CPPUNIT_ASSERT(myData.getPointOffset(0,0)==1);
 
-    CPPUNIT_ASSERT(myData.getRank()==0);
-    CPPUNIT_ASSERT(myData.getNoValues()==1);
-    CPPUNIT_ASSERT(myData.getShape().size()==0);
+using namespace CppUnit;
+using namespace escript;
+using namespace std;
+using namespace escript::DataTypes;
 
+namespace {
 
-    // check result value for tag "1"
-//     DataArrayView myDataView = myData.getDataPointByTag(1);
-    int offset=myData.getOffsetForTag(1);
-    CPPUNIT_ASSERT(offset==1);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==-1.0);
+RealVectorType::const_reference
+getRefRO(DataTagged& data,int offset, int i, int j, int k)
+{
+   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i,j,k)];
+}
 
-    // check result value for tag "2"
-//     myDataView = myData.getDataPointByTag(2);
-    offset=myData.getOffsetForTag(2);
-    CPPUNIT_ASSERT(offset==2);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==-2.0);
+RealVectorType::const_reference
+getRefRO(const DataTagged& data,int offset, int i)
+{
+   return data.getVectorRO()[offset+getRelIndex(data.getShape(),i)];
+}
 
-    // check result for default value
-//     myDataView = myData.getDefaultValue();
-    offset=myData.getDefaultOffset();
-    CPPUNIT_ASSERT(offset==0);
-    CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
+}
 
-    // use a non-existent tag so we get a pointer to
-    // the first element of the data array
-    double* sampleData=myData.getSampleDataByTag(9);
-    for (int i=0; i<myData.getLength(); i++) {
-      CPPUNIT_ASSERT(sampleData[i]==0-i);
+namespace
+{
+    DataTagged makeTagged()
+    {
+        int a[1]={0};
+	DataTypes::RealVectorType v;
+	v.resize(1, 0.0,1);
+        return DataTagged(FunctionSpace(), DataTypes::scalarShape, a, v);
     }
-
-  }
-
 }
 
+
 void DataTaggedTestCase::testAddTaggedValues() {
 
   cout << endl;
@@ -928,12 +74,12 @@ void DataTaggedTestCase::testAddTaggedValues() {
   {
 
     cout << "\tTest adding one key with empty value list to default DataTagged." << endl;
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     myData.addTaggedValues(keys,values,DataTypes::scalarShape);
 
@@ -953,17 +99,14 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
@@ -980,17 +123,14 @@ void DataTaggedTestCase::testAddTaggedValues() {
   {
 
     cout << "\tTest adding one key with one value to default DataTagged." << endl;
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
-/*    DataTypes::ValueType viewData(1);
-    viewData[0]=1.0;*/
-//     DataArrayView myView(viewData,viewShape);
     values.push_back(1.0);
 
     myData.addTaggedValues(keys,values,viewShape);
@@ -1011,17 +151,14 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
@@ -1038,17 +175,17 @@ void DataTaggedTestCase::testAddTaggedValues() {
   {
 
     cout << "\tTest adding three keys with one value to default DataTagged." << endl;
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
     DataTagged::TagListType keys;
     keys.push_back(1);
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
-/*    DataTypes::ValueType viewData(1);
+/*    DataTypes::RealVectorType viewData(1);
     viewData[0]=1.0;
     DataArrayView myView(viewData,viewShape);*/
     values.push_back(1.0);
@@ -1073,27 +210,22 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(2);
     offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==2);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(3);
     offset=myData.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
@@ -1114,21 +246,21 @@ void DataTaggedTestCase::testAddTaggedValues() {
   {
 
     cout << "\tTest adding three keys with three values to default DataTagged." << endl;
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
     DataTagged::TagListType keys;
     keys.push_back(1);
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
-/*    DataTypes::ValueType viewData1(1);
+/*    DataTypes::RealVectorType viewData1(1);
     viewData1[0]=1.0;
-    DataTypes::ValueType viewData2(1);
+    DataTypes::RealVectorType viewData2(1);
     viewData2[0]=2.0;
-    DataTypes::ValueType viewData3(1);
+    DataTypes::RealVectorType viewData3(1);
     viewData3[0]=3.0;
     DataArrayView myView1(viewData1,viewShape);
     DataArrayView myView2(viewData2,viewShape);
@@ -1157,27 +289,22 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDataPointByTag(2);
     offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==2);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==2.0);
 
-//     myDataView = myData.getDataPointByTag(3);
     offset=myData.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==3.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
@@ -1200,9 +327,9 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
@@ -1230,21 +357,18 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -1269,25 +393,20 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     DataTagged myData(FunctionSpace(),viewShape,viewData);
 
     keys.push_back(1);
 
-//     DataTypes::ValueType viewData1(3);
     for (int i=0;i<viewShape[0];i++) {
-//       viewData1[i]=i+3;
 	values.push_back(i+3);
     }
-//     DataArrayView myView1(viewData1,viewShape);
-//     values.push_back(myView1);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1308,20 +427,17 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
 
     int offset=myData.getPointOffset(0,0);
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==5);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==5);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -1346,13 +462,12 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     DataTagged myData(FunctionSpace(),viewShape,viewData);
 
@@ -1360,13 +475,9 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-//     DataTypes::ValueType viewData1(3);
     for (int i=0;i<viewShape[0];i++) {
-//       viewData1[i]=3;
 	values.push_back(3);
     }
-//     DataArrayView myView1(viewData1,viewShape);
-//     values.push_back(myView1);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1387,35 +498,30 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(2);
     offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(3);
     offset=myData.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -1444,13 +550,12 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     DataTagged myData(FunctionSpace(),viewShape,viewData);
 
@@ -1458,29 +563,17 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-//     DataTypes::ValueType viewData1(3);
     for (int i=0;i<viewShape[0];i++) {
-//       viewData1[i]=i+1;
 	values.push_back(i+1);
     }
-//     DataArrayView myView1(viewData1,viewShape);
-//     values.push_back(myView1);
 
-//     DataTypes::ValueType viewData2(3);
     for (int i=0;i<viewShape[0];i++) {
-//       viewData2[i]=i+2;
 	values.push_back(i+2);
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
-//     DataTypes::ValueType viewData3(3);
     for (int i=0;i<viewShape[0];i++) {
-//       viewData3[i]=i+3;
 	values.push_back(i+3);
     }
-//     DataArrayView myView3(viewData3,viewShape);
-//     values.push_back(myView3);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1502,35 +595,30 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(2);
     offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==4);
 
-//     myDataView = myData.getDataPointByTag(3);
     offset=myData.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==5);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -1563,44 +651,31 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eOneView(i)=i+1.0;
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
 	viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+3.0;
 	viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
@@ -1620,7 +695,6 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
-//     DataArrayView myDataView = myData.getDataPointByTag(4);
     int offset=myData.getOffsetForTag(4);
     CPPUNIT_ASSERT(offset==12);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -1655,44 +729,31 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eOneView(i)=i+1.0;
 	viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
 	viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+3.0;
 	viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
@@ -1701,12 +762,9 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     values.clear();
     // value for tag "4"
-//     DataTypes::ValueType eFourData(viewData);
-//     DataArrayView eFourView(eFourData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       values.push_back(i+4.0);
     }
-//     values.push_back(eFourView);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1721,7 +779,6 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
 
-//     DataArrayView myDataView = myData.getDataPointByTag(4);
     int offset=myData.getOffsetForTag(4);
     CPPUNIT_ASSERT(offset==12);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==4);
@@ -1756,43 +813,31 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
 	viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+3.0;
 	viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
@@ -1803,13 +848,9 @@ void DataTaggedTestCase::testAddTaggedValues() {
 
     values.clear();
     // value for tags "4", "5" and "6"
-//     DataTypes::ValueType eFourData(viewData);
-//     DataArrayView eFourView(eFourData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eFourView(i)=i+4.0;
 	values.push_back(i+4.0);
     }
-//     values.push_back(eFourView);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1825,21 +866,18 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
-//     DataArrayView myDataView = myData.getDataPointByTag(4);
     int offset=myData.getOffsetForTag(4);
     CPPUNIT_ASSERT(offset==12);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==5);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==6);
 
-//     myDataView = myData.getDataPointByTag(5);
     offset=myData.getOffsetForTag(5);
     CPPUNIT_ASSERT(offset==15);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==5);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==6);
 
-//     myDataView = myData.getDataPointByTag(6);
     offset=myData.getOffsetForTag(6);
     CPPUNIT_ASSERT(offset==18);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==4);
@@ -1878,42 +916,31 @@ void DataTaggedTestCase::testAddTaggedValues() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
 	viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
@@ -1925,29 +952,19 @@ void DataTaggedTestCase::testAddTaggedValues() {
     values.clear();
 
     // value for tag "4"
-//     DataTypes::ValueType eFourData(viewData);
-//     DataArrayView eFourView(eFourData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       values.push_back(i+4.0);
     }
-//     values.push_back(eFourView);
 
     // value for tag "5"
-//     DataTypes::ValueType eFiveData(viewData);
-//     DataArrayView eFiveView(eFiveData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       values.push_back(i+5.0);
     }
-//     values.push_back(eFiveView);
 
     // value for tag "6"
-//     DataTypes::ValueType eSixData(viewData);
-//     DataArrayView eSixView(eSixData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eSixView(i)=i+6.0;
 	values.push_back(i+6.0);
     }
-//     values.push_back(eSixView);
 
     myData.addTaggedValues(keys,values,viewShape);
 
@@ -1963,21 +980,18 @@ void DataTaggedTestCase::testAddTaggedValues() {
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
-//     DataArrayView myDataView = myData.getDataPointByTag(4);
     int offset=myData.getOffsetForTag(4);
     CPPUNIT_ASSERT(offset==12);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==4);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==5);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==6);
 
-//     myDataView = myData.getDataPointByTag(5);
     offset=myData.getOffsetForTag(5);
     CPPUNIT_ASSERT(offset==15);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==5);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==6);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==7);
 
-//     myDataView = myData.getDataPointByTag(6);
     offset=myData.getOffsetForTag(6);
     CPPUNIT_ASSERT(offset==18);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==6);
@@ -2022,48 +1036,35 @@ void DataTaggedTestCase::testSetTaggedValue() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
       viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
-
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+3.0;
 	viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
     // new value for tag "2"
-    ValueType tmp(viewShape[0]);
+    RealVectorType tmp(viewShape[0]);
     for (int i=0;i<viewShape[0];i++) {
       tmp[i]=i+5.0;
     }
@@ -2079,7 +1080,6 @@ void DataTaggedTestCase::testSetTaggedValue() {
     CPPUNIT_ASSERT(myData.getRank()==1);
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
-//     DataArrayView myDataView = myData.getDataPointByTag(2);
     int offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==5);
@@ -2112,9 +1112,8 @@ void DataTaggedTestCase::testAll() {
   {
 
     cout << "\tTest default DataTagged." << endl;
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
-    //cout << myData.toString() << endl;
 
     CPPUNIT_ASSERT(myData.getNumSamples()==1);
     CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
@@ -2131,6 +1130,10 @@ void DataTaggedTestCase::testAll() {
 
     CPPUNIT_ASSERT(myData.getTagLookup().size()==0);
 
+    int ll=myData.getLength();
+    cout << "\t" << ll << endl;
+    
+    
     CPPUNIT_ASSERT(myData.getLength()==1);
 
     CPPUNIT_ASSERT(myData.getPointOffset(0,0)==0);
@@ -2140,18 +1143,15 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==0.0);
@@ -2178,15 +1178,13 @@ void DataTaggedTestCase::testAll() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
     DataTagged myData(FunctionSpace(),viewShape, viewData);
-    //cout << myData.toString() << endl;
 
     CPPUNIT_ASSERT(myData.getNumSamples()==1);
     CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
@@ -2211,7 +1209,6 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(myData.getNoValues()==3);
     CPPUNIT_ASSERT(myData.getShape().size()==1);
 
-//    DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -2219,14 +1216,12 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -2255,30 +1250,23 @@ void DataTaggedTestCase::testAll() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*2);
+    DataTypes::RealVectorType viewData(3*2);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eOneView(i)=i+1.0;
 	viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    //cout << myData.toString() << endl;
-
     CPPUNIT_ASSERT(myData.getNumSamples()==1);
     CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
 
@@ -2305,13 +1293,11 @@ void DataTaggedTestCase::testAll() {
 
 
     int offset=myData.getPointOffset(0,0);
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
@@ -2319,14 +1305,12 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(9);
     offset=myData.getOffsetForTag(9);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -2361,48 +1345,34 @@ void DataTaggedTestCase::testAll() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-/*      eOneView(i)=i+1.0;*/
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-/*      eThreeView(i)=i+3.0;*/
       viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    //cout << myData.toString() << endl;
-
     CPPUNIT_ASSERT(myData.getNumSamples()==1);
     CPPUNIT_ASSERT(myData.getNumDPPSample()==1);
 
@@ -2431,13 +1401,11 @@ void DataTaggedTestCase::testAll() {
 
 
     int offset=myData.getPointOffset(0,0);
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==1);
@@ -2445,14 +1413,12 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==3);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(0);
     offset=myData.getOffsetForTag(0);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==0);
@@ -2460,14 +1426,12 @@ void DataTaggedTestCase::testAll() {
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==2);
 
     // Test data-points held for remaining tags
-//     myDataView = myData.getDataPointByTag(2);
     offset=myData.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==2);
     CPPUNIT_ASSERT(getRefRO(myData,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myData,offset,2)==4);
 
-//     myDataView = myData.getDataPointByTag(3);
     offset=myData.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData,offset,0)==3);
@@ -2512,50 +1476,36 @@ void DataTaggedTestCase::testCopyConstructors() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       viewData[viewShape[0]+i]=i+1.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+2.0;
 	viewData[2*viewShape[0]+i]=i+2.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+3.0;
 	viewData[3*viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
     DataTagged myDataCopy(myData);
 
-    //cout << myDataCopy.toString() << endl;
-
     CPPUNIT_ASSERT(myDataCopy.getNumSamples()==1);
     CPPUNIT_ASSERT(myDataCopy.getNumDPPSample()==1);
 
@@ -2583,13 +1533,11 @@ void DataTaggedTestCase::testCopyConstructors() {
     CPPUNIT_ASSERT(myDataCopy.getShape().size()==1);
 
     int offset=myDataCopy.getPointOffset(0,0);
-//     DataArrayView myDataView = myDataCopy.getDataPoint(0,0);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==1);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,1)==2);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,2)==3);
 
-//     myDataView = myDataCopy.getDataPointByTag(1);
     offset=myDataCopy.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==1);
@@ -2597,14 +1545,12 @@ void DataTaggedTestCase::testCopyConstructors() {
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,2)==3);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myDataCopy.getDataPointByTag(0);
     offset=myDataCopy.getOffsetForTag(0);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,2)==2);
 
-    //myDataView = myDataCopy.getDefaultValue();
     offset=myDataCopy.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==0);
@@ -2612,14 +1558,12 @@ void DataTaggedTestCase::testCopyConstructors() {
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,2)==2);
 
     // Test data-points held for remaining tags
-//     myDataView = myDataCopy.getDataPointByTag(2);
     offset=myDataCopy.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==2);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,1)==3);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,2)==4);
 
-//     myDataView = myDataCopy.getDataPointByTag(3);
     offset=myDataCopy.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myDataCopy,offset,0)==3);
@@ -2649,8 +1593,7 @@ void DataTaggedTestCase::testCopyConstructors() {
 
     // Create a DataConstant
     DataTypes::ShapeType shape;
-    DataTypes::ValueType data(DataTypes::noValues(shape),0);
-//     DataArrayView pointData(data,shape);
+    DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
     data[0]=1.0;
     DataConstant myConstantData(FunctionSpace(),shape,data);
 
@@ -2683,18 +1626,15 @@ void DataTaggedTestCase::testCopyConstructors() {
     CPPUNIT_ASSERT(myData.getShape().size()==0);
 
 
-//     DataArrayView myDataView = myData.getDataPoint(0,0);
     int offset=myData.getPointOffset(0,0);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
     // Test non-existent tag returns the default value.
-//     myDataView = myData.getDataPointByTag(1);
     offset=myData.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
 
-//     myDataView = myData.getDefaultValue();
     offset=myData.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData.getDataAtOffsetRO(offset)==1.0);
@@ -2718,21 +1658,18 @@ void DataTaggedTestCase::testGetSlice() {
 
     cout << "\tTest slicing default DataTagged." << endl;
 
-    DataTagged myData;
+    DataTagged myData=makeTagged();
 
     DataTypes::RegionType region;
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    // cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==0);
 
     CPPUNIT_ASSERT(myDataSliced->getLength()==1);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getRank()==0);
@@ -2752,13 +1689,12 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     DataTagged myData(FunctionSpace(),viewShape,viewData);
 
@@ -2772,8 +1708,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==0);
@@ -2784,7 +1718,6 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==3);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
 
@@ -2803,7 +1736,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -2811,7 +1743,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     CPPUNIT_ASSERT(myDataSliced->getLength()==1);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getRank()==0);
@@ -2833,17 +1764,15 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
-    DataTypes::ValueType viewData(27);
+    DataTypes::RealVectorType viewData(27);
     for (int i=0;i<viewData.size();i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     DataTagged myData(FunctionSpace(),viewShape,viewData);
 
-    //cout << myData.toString() << endl;
 
     // full slice
 
@@ -2857,15 +1786,12 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==0);
 
     CPPUNIT_ASSERT(myDataSliced->getLength()==27);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getRank()==3);
@@ -2884,7 +1810,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -2896,7 +1821,6 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==3);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==0.0);
@@ -2916,7 +1840,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -2929,7 +1852,6 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[0]==26);
@@ -2943,24 +1865,19 @@ void DataTaggedTestCase::testGetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
 
     // default value
-    DataTypes::ValueType viewData(1*2);
+    DataTypes::RealVectorType viewData(1*2);
     viewData[0]=0.0;
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     viewData[1]=1.0;
-//     values.push_back(eOneView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys, viewData);
 
-    //cout << myData.toString() << endl;
 
     // full slice
 
@@ -2968,7 +1885,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -2976,7 +1892,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     CPPUNIT_ASSERT(myDataSliced->getLength()==2);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getRank()==0);
@@ -2984,7 +1899,6 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==0);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==1);
@@ -3003,28 +1917,21 @@ void DataTaggedTestCase::testGetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     // default value
-    DataTypes::ValueType viewData(3*2);
+    DataTypes::RealVectorType viewData(3*2);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eOneView(i)=i+3.0;
        viewData[viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eOneView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    //cout << myData.toString() << endl;
-
     // full slice
 
     std::pair<int, int> region_element;
@@ -3035,8 +1942,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==1);
@@ -3045,14 +1950,12 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getRank()==1);
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==3);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==2);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==3);
@@ -3070,8 +1973,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==1);
@@ -3083,12 +1984,10 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==1);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==4);
@@ -3107,27 +2006,21 @@ void DataTaggedTestCase::testGetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     // default value
-    DataTypes::ValueType viewData(27*2);
+    DataTypes::RealVectorType viewData(27*2);
     for (int i=0;i<noValues(viewShape);i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType viewData1(27);
     for (int i=0;i<noValues(viewShape);i++) {
       viewData[noValues(viewShape)+i]=i+27.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
-//     values.push_back(myView1);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    //cout << myData.toString() << endl;
-
     // full slice
 
     std::pair<int, int> region_element;
@@ -3140,7 +2033,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3152,11 +2044,9 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==27);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==3);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
 
@@ -3171,7 +2061,6 @@ void DataTaggedTestCase::testGetSlice() {
     delete slicedDefault;
 
     slicedDefault = myData.getSlice(region);
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3183,14 +2072,12 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==3);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==2);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==27);
@@ -3209,7 +2096,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3221,12 +2107,10 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==1);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==13);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==40);
@@ -3243,48 +2127,31 @@ void DataTaggedTestCase::testGetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
 
     // default value
-    DataTypes::ValueType viewData(1*4);
+    DataTypes::RealVectorType viewData(1*4);
     viewData[0]=0.0;
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
-//     eOneView()=1.0;
     viewData[1]=1.0;
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
-//     eTwoView()=2.0;
     viewData[2]=2.0;
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
-//     eThreeView()=3.0;
     viewData[3]=3.0;
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    // cout << myData.toString() << endl;
-
     // full slice
 
     DataTypes::RegionType region;
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==3);
@@ -3295,22 +2162,18 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==1);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==0);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==1);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==2);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==2);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==3);
@@ -3330,45 +2193,31 @@ void DataTaggedTestCase::testGetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     // default value
-    DataTypes::ValueType viewData(3*4);
+    DataTypes::RealVectorType viewData(3*4);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType eOneData(viewData);
-//     DataArrayView eOneView(eOneData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eOneView(i)=i+3.0;
 	viewData[viewShape[0]+i]=i+3.0;
     }
-//     values.push_back(eOneView);
 
     // value for tag "2"
-//     DataTypes::ValueType eTwoData(viewData);
-//     DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eTwoView(i)=i+6.0;
 	viewData[2*viewShape[0]+i]=i+6.0;
     }
-//     values.push_back(eTwoView);
 
     // value for tag "3"
-//     DataTypes::ValueType eThreeData(viewData);
-//     DataArrayView eThreeView(eThreeData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
-//       eThreeView(i)=i+9.0;
 	viewData[3*viewShape[0]+i]=i+9.0;
     }
-//     values.push_back(eThreeView);
 
     DataTagged myData(FunctionSpace(),viewShape, keys, viewData);
 
-    //cout << myData.toString() << endl;
 
     // full slice
 
@@ -3380,8 +2229,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
-
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
     CPPUNIT_ASSERT(myDataSliced->getTagLookup().size()==3);
@@ -3393,28 +2240,24 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
 
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==2);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==3);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==4);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==5);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==6);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==7);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==8);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==9);
@@ -3432,7 +2275,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3444,22 +2286,18 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==1);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==1);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==4);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==2);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==7);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==10);
@@ -3481,43 +2319,32 @@ void DataTaggedTestCase::testGetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     int nvals=27;
     // default value
-    DataTypes::ValueType viewData(27*4);
+    DataTypes::RealVectorType viewData(27*4);
     for (int i=0;i<nvals;i++) {
       viewData[i]=i;
     }
-//     DataArrayView myView(viewData,viewShape);
 
     // value for tag "1"
-//     DataTypes::ValueType viewData1(27);
     for (int i=0;i<nvals;i++) {
       viewData[nvals+i]=i+27.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
-//     values.push_back(myView1);
 
     // value for tag "2"
-//     DataTypes::ValueType viewData2(27);
     for (int i=0;i<nvals;i++) {
       viewData[2*nvals+i]=i+54.0;
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     // value for tag "3"
-//     DataTypes::ValueType viewData3(27);
     for (int i=0;i<nvals;i++) {
       viewData[3*nvals+i]=i+81.0;
     }
-//     DataArrayView myView3(viewData3,viewShape);
-//     values.push_back(myView3);
 
     DataTagged myData(FunctionSpace(),viewShape,keys,viewData);
 
-    //cout << myData.toString() << endl;
 
     // full slice
 
@@ -3531,7 +2358,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     DataAbstract* slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     const DataTagged* myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3543,19 +2369,15 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==27);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==3);
 
-//     DataArrayView myDataView = myDataSliced->getDefaultValue();
     int offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==54);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==81);
 
@@ -3571,7 +2393,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    // cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3583,28 +2404,24 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==3);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==1);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==0);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==1);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==2);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==27);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==28);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==29);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==54);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,1)==55);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,2)==56);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(*myDataSliced,offset,0)==81);
@@ -3624,7 +2441,6 @@ void DataTaggedTestCase::testGetSlice() {
 
     slicedDefault = myData.getSlice(region);
 
-    //cout << slicedDefault->toString() << endl;
 
     myDataSliced=dynamic_cast<const DataTagged*>(slicedDefault);
 
@@ -3635,22 +2451,18 @@ void DataTaggedTestCase::testGetSlice() {
     CPPUNIT_ASSERT(myDataSliced->getNoValues()==1);
     CPPUNIT_ASSERT(myDataSliced->getShape().size()==0);
 
-//     myDataView = myDataSliced->getDefaultValue();
     offset=myDataSliced->getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==13);
 
-//     myDataView = myDataSliced->getDataPointByTag(1);
     offset=myDataSliced->getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==40);
 
-//     myDataView = myDataSliced->getDataPointByTag(2);
     offset=myDataSliced->getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==2);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==67);
 
-//     myDataView = myDataSliced->getDataPointByTag(3);
     offset=myDataSliced->getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myDataSliced->getVectorRO()[offset]==94);
@@ -3668,14 +2480,13 @@ void DataTaggedTestCase::testSetSlice() {
 
     cout << "\tTest slicing default DataTagged." << endl;
 
-    DataTagged myData1;
-    DataTagged myData2;
+    DataTagged myData1=makeTagged();
+    DataTagged myData2=makeTagged();
 
     DataTypes::RegionType region;
 
     myData2.getDataAtOffsetRW(myData2.getDefaultOffset())=1.0;
     myData1.setSlice(&myData2, region);
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
@@ -3684,7 +2495,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==1);
     CPPUNIT_ASSERT(myData1.getShape().size()==0);
 
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRW(offset)==1.0);
@@ -3697,23 +2507,21 @@ void DataTaggedTestCase::testSetSlice() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
-    DataTypes::ValueType viewData1(3);
+    DataTypes::RealVectorType viewData1(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData1[i]=i;
     }
-//     DataArrayView myView1(viewData1,viewShape);
     DataTagged myData1(FunctionSpace(),viewShape,viewData1);
 
-    DataTypes::ValueType viewData2(3);
+    DataTypes::RealVectorType viewData2(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData2[i]=i+3;
     }
-//     DataArrayView myView2(viewData2,viewShape);
     DataTagged myData2(FunctionSpace(),viewShape,viewData2);
 
     // full slice
@@ -3725,7 +2533,6 @@ void DataTaggedTestCase::testSetSlice() {
     region.push_back(region_element);
 
     myData1.setSlice(&myData2, region);
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
@@ -3735,7 +2542,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==3.0);
@@ -3747,9 +2553,8 @@ void DataTaggedTestCase::testSetSlice() {
     viewShape.clear();
     viewShape.push_back(1);
 
-    DataTypes::ValueType viewData3(1);
+    DataTypes::RealVectorType viewData3(1);
     viewData3[0]=6.0;
-//     DataArrayView myView3(viewData3,viewShape);
     DataTagged myData3(FunctionSpace(),viewShape,viewData3);
 
     region.clear();
@@ -3758,7 +2563,6 @@ void DataTaggedTestCase::testSetSlice() {
     region.push_back(region_element);
 
     myData1.setSlice(&myData3, region);
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
@@ -3766,7 +2570,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getRank()==1);
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==3.0);
@@ -3780,14 +2583,11 @@ void DataTaggedTestCase::testSetSlice() {
     region_element.second=0;
     region.push_back(region_element);
 
-    DataTagged myData4;
+    DataTagged myData4=makeTagged();
     myData4.getDataAtOffsetRW(myData4.getDefaultOffset())=7.0;
-//     myData4.getDefaultValue()()=7.0;
 
     myData1.setSlice(&myData4, region);
 
-    //cout << myData3.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
     CPPUNIT_ASSERT(myData1.getLength()==3);
@@ -3795,7 +2595,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==7.0);
@@ -3810,25 +2609,23 @@ void DataTaggedTestCase::testSetSlice() {
 
     DataTagged::TagListType keys;
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
     viewShape.push_back(3);
     viewShape.push_back(3);
 
-    DataTypes::ValueType viewData1(27);
+    DataTypes::RealVectorType viewData1(27);
     for (int i=0;i<viewData1.size();i++) {
       viewData1[i]=i;
     }
-//     DataArrayView myView1(viewData1,viewShape);
     DataTagged myData1(FunctionSpace(),viewShape,viewData1);
 
-    DataTypes::ValueType viewData2(27);
+    DataTypes::RealVectorType viewData2(27);
     for (int i=0;i<viewData2.size();i++) {
       viewData2[i]=i+27;
     }
-//     DataArrayView myView2(viewData2,viewShape);
     DataTagged myData2(FunctionSpace(),viewShape,viewData2);
 
     // full slice
@@ -3843,7 +2640,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
@@ -3852,7 +2648,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==27);
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
 
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
 
@@ -3862,11 +2657,10 @@ void DataTaggedTestCase::testSetSlice() {
     viewShape.clear();
     viewShape.push_back(3);
 
-    DataTypes::ValueType viewData3(3);
+    DataTypes::RealVectorType viewData3(3);
     for (int i=0;i<viewData3.size();i++) {
       viewData3[i]=i+60;
     }
-//     DataArrayView myView3(viewData3,viewShape);
     DataTagged myData3(FunctionSpace(),viewShape,viewData3);
 
     region.clear();
@@ -3878,7 +2672,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData3, region);
 
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
@@ -3886,7 +2679,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getRank()==3);
     CPPUNIT_ASSERT(myData1.getNoValues()==27);
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==60.0);
@@ -3902,13 +2694,11 @@ void DataTaggedTestCase::testSetSlice() {
     region.push_back(region_element);
     region.push_back(region_element);
 
-    DataTagged myData4;
+    DataTagged myData4=makeTagged();
     myData4.getDataAtOffsetRW(myData4.getDefaultOffset())=70.0;
 
     myData1.setSlice(&myData4, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==0);
 
     CPPUNIT_ASSERT(myData1.getLength()==27);
@@ -3917,7 +2707,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==27);
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
 
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==70.0);
@@ -3931,35 +2720,27 @@ void DataTaggedTestCase::testSetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
 
     // default value for Data1
-    DataTypes::ValueType viewData1(1*2);
+    DataTypes::RealVectorType viewData1(1*2);
     viewData1[0]=0.0;
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(1);
     viewData1[1]=0.0;
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
 
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData3(1*2);
+    DataTypes::RealVectorType viewData3(1*2);
     viewData3[0]=1.0;
-//     DataArrayView myView3(viewData3,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData4(1);
     viewData3[1]=2.0;
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData3);
 
@@ -3969,20 +2750,16 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==1);
 
     CPPUNIT_ASSERT(myData1.getLength()==2);
     CPPUNIT_ASSERT(myData1.getRank()==0);
     CPPUNIT_ASSERT(myData1.getNoValues()==1);
     CPPUNIT_ASSERT(myData1.getShape().size()==0);
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData1.getVectorRO()[offset]==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData1.getVectorRO()[offset]==2.0);
@@ -3994,44 +2771,36 @@ void DataTaggedTestCase::testSetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
     int nvals=3;
     // default value for Data1
-    DataTypes::ValueType viewData1(3*2);
+    DataTypes::RealVectorType viewData1(3*2);
     for (int i=0;i<nvals;i++) {
       viewData1[i]=0.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(3);
     for (int i=0;i<nvals;i++) {
       viewData1[nvals+i]=0.0;
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData3(3*2);
+    DataTypes::RealVectorType viewData3(3*2);
     for (int i=0;i<nvals;i++) {
       viewData3[i]=1.0;
     }
-//     DataArrayView myView3(viewData3,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData4(3);
     for (int i=0;i<nvals;i++) {
       viewData3[nvals+i]=2.0;
     }
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData3);
 
@@ -4045,8 +2814,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==1);
 
     CPPUNIT_ASSERT(myData1.getLength()==6);
@@ -4054,14 +2821,12 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==2.0);
@@ -4073,16 +2838,12 @@ void DataTaggedTestCase::testSetSlice() {
     viewShape.clear();
     viewShape.push_back(1);
 
-    DataTypes::ValueType viewData5(1*2);
+    DataTypes::RealVectorType viewData5(1*2);
     viewData5[0]=3.0;
-//     DataArrayView myView5(viewData5,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData6(1);
     viewData5[1]=4.0;
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     DataTagged myData3(FunctionSpace(),viewShape,keys,viewData5);
 
@@ -4093,7 +2854,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData3, region);
 
-    //cout << myData1.toString() << endl;
 
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==1);
 
@@ -4101,14 +2861,12 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getRank()==1);
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==2.0);
@@ -4119,16 +2877,12 @@ void DataTaggedTestCase::testSetSlice() {
 
     viewShape.clear();
 
-    DataTypes::ValueType viewData7(1*2);
+    DataTypes::RealVectorType viewData7(1*2);
     viewData7[0]=5.0;
-//     DataArrayView myView7(viewData7,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData8(1);
     viewData7[1]=6.0;
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
 
     DataTagged myData4(FunctionSpace(),viewShape,keys,viewData7);
 
@@ -4139,19 +2893,16 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData4, region);
 
-    //cout << myData1.toString() << endl;
     CPPUNIT_ASSERT(myData1.getRank()==1);
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==5.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==6.0);
@@ -4166,7 +2917,7 @@ void DataTaggedTestCase::testSetSlice() {
     DataTagged::TagListType keys;
     keys.push_back(1);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
@@ -4175,38 +2926,30 @@ void DataTaggedTestCase::testSetSlice() {
 
     int nvals=27;
     // default value for Data1
-    DataTypes::ValueType viewData1(27*2);
+    DataTypes::RealVectorType viewData1(27*2);
     for (int i=0;i<nvals;i++) {
       viewData1[i]=0.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(27);
     for (int i=0;i<nvals;i++) {
       viewData1[nvals+i]=0.0;
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
 
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData3(27*2);
+    DataTypes::RealVectorType viewData3(27*2);
     for (int i=0;i<nvals;i++) {
       viewData3[i]=1.0;
     }
-//     DataArrayView myView3(viewData3,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData4(27);
     for (int i=0;i<nvals;i++) {
       viewData3[nvals+i]=2.0;
     }
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData3);
 
@@ -4222,8 +2965,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==1);
 
     CPPUNIT_ASSERT(myData1.getLength()==54);
@@ -4234,13 +2975,11 @@ void DataTaggedTestCase::testSetSlice() {
 
 
     int offset=myData1.getDefaultOffset();
-//     DataArrayView myDataView = myData1.getDefaultValue();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,1,1)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,2,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==2.0);
@@ -4254,20 +2993,16 @@ void DataTaggedTestCase::testSetSlice() {
   
     nvals=3;
 
-    DataTypes::ValueType viewData5(3*2);
+    DataTypes::RealVectorType viewData5(3*2);
     for (int i=0;i<nvals;i++) {
       viewData5[i]=3.0;
     }
-//     DataArrayView myView5(viewData5,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData6(3);
     for (int i=0;i<nvals;i++) {
       viewData5[nvals+i]=4.0;
     }
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     DataTagged myData3(FunctionSpace(),viewShape,keys,viewData5);
 
@@ -4280,8 +3015,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData3, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==1);
 
     CPPUNIT_ASSERT(myData1.getLength()==54);
@@ -4297,7 +3030,6 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==4.0);
@@ -4308,16 +3040,12 @@ void DataTaggedTestCase::testSetSlice() {
 
     viewShape.clear();
 
-    DataTypes::ValueType viewData7(1*2);
+    DataTypes::RealVectorType viewData7(1*2);
     viewData7[0]=5.0;
-//     DataArrayView myView7(viewData7,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData8(1);
     viewData7[1]=6.0;
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
 
     DataTagged myData4(FunctionSpace(),viewShape,keys,viewData7);
 
@@ -4330,19 +3058,15 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData4, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getRank()==3);
     CPPUNIT_ASSERT(myData1.getNoValues()==27);
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
 
 
     offset=myData1.getDefaultOffset();
-//     myDataView = myData1.getDefaultValue();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==5.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==6.0);
@@ -4357,59 +3081,39 @@ void DataTaggedTestCase::testSetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
 
     // default value for Data1
-    DataTypes::ValueType viewData1(1*4);
+    DataTypes::RealVectorType viewData1(1*4);
     viewData1[0]=0.0;
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(1);
     viewData1[1]=0.0;
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     // value for tag "2" for Data1
-//     DataTypes::ValueType viewData5(1);
     viewData1[2]=0.0;
-//     DataArrayView myView5(viewData5,viewShape);
-//     values.push_back(myView5);
 
     // value for tag "3" for Data1
-//     DataTypes::ValueType viewData6(1);
     viewData1[3]=0.0;
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
 
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData3(1*4);
+    DataTypes::RealVectorType viewData3(1*4);
     viewData3[0]=1.0;
-//     DataArrayView myView3(viewData3,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData4(1);
     viewData3[1]=2.0;
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     // value for tag "2" for Data2
-//     DataTypes::ValueType viewData7(1);
     viewData3[2]=3.0;
-//     DataArrayView myView7(viewData7,viewShape);
-//     values.push_back(myView7);
 
     // value for tag "3" for Data2
-//     DataTypes::ValueType viewData8(1);
     viewData3[3]=4.0;
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
 
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData3);
 
@@ -4419,8 +3123,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==3);
 
     CPPUNIT_ASSERT(myData1.getLength()==4);
@@ -4430,22 +3132,18 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getShape().size()==0);
 
     int offset=myData1.getDefaultOffset();
-//     DataArrayView myDataView = myData1.getDefaultValue();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(offset)==1.0);
 
     offset=myData1.getOffsetForTag(1);
-//     myDataView = myData1.getDataPointByTag(1);
     CPPUNIT_ASSERT(offset==1);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(offset)==2.0);
 
     offset=myData1.getOffsetForTag(2);
-//     myDataView = myData1.getDataPointByTag(2);
     CPPUNIT_ASSERT(offset==2);
 
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(offset)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(offset)==4.0);
@@ -4460,7 +3158,7 @@ void DataTaggedTestCase::testSetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
@@ -4468,35 +3166,25 @@ void DataTaggedTestCase::testSetSlice() {
     int nvals=3;
 
     // default value for Data1
-    DataTypes::ValueType viewData1(3*4);
+    DataTypes::RealVectorType viewData1(3*4);
     for (int i=0;i<viewData1.size();i++) {
       viewData1[i]=0.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(3);
     for (int i=0;i<nvals;i++) {
       viewData1[nvals+i]=0.0;
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     // value for tag "2" for Data1
-//     DataTypes::ValueType viewData3(3);
     for (int i=0;i<nvals;i++) {
       viewData1[2*nvals+i]=0.0;
     }
-//     DataArrayView myView3(viewData3,viewShape);
-//     values.push_back(myView3);
 
     // value for tag "3" for Data1
-//     DataTypes::ValueType viewData4(3);
     for (int i=0;i<nvals;i++) {
       viewData1[3*nvals+i]=0.0;
     }
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
 
@@ -4505,36 +3193,25 @@ void DataTaggedTestCase::testSetSlice() {
     nvals=3;
 
     // default value for Data2
-    DataTypes::ValueType viewData5(3*4);
+    DataTypes::RealVectorType viewData5(3*4);
     for (int i=0;i<nvals;i++) {
       viewData5[i]=1.0;
     }
-//     DataArrayView myView5(viewData5,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData6(3);
     for (int i=0;i<nvals;i++) {
       viewData5[nvals+i]=2.0;
     }
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     // value for tag "2" for Data2
-//     DataTypes::ValueType viewData7(3);
     for (int i=0;i<nvals;i++) {
       viewData5[2*nvals+i]=3.0;
     }
-//     DataArrayView myView7(viewData7,viewShape);
-//     values.push_back(myView7);
 
     // value for tag "3" for Data2
-//     DataTypes::ValueType viewData8(3);
     for (int i=0;i<nvals;i++) {
       viewData5[3*nvals+i]=4.0;
     }
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
-
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData5);
 
     // full slice
@@ -4547,8 +3224,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==3);
 
     CPPUNIT_ASSERT(myData1.getLength()==12);
@@ -4557,29 +3232,24 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
-
-//     DataArrayView myDataView = myData1.getDefaultValue();
     int offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==2.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==2.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==2.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
     offset=myData1.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==4.0);
@@ -4591,26 +3261,14 @@ void DataTaggedTestCase::testSetSlice() {
     viewShape.clear();
     viewShape.push_back(1);
 
-    DataTypes::ValueType viewData9(1*4);
+    DataTypes::RealVectorType viewData9(1*4);
     viewData9[0]=6.0;
-//     DataArrayView myView9(viewData9,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData10(1);
     viewData9[1]=7.0;
-//     DataArrayView myView10(viewData10,viewShape);
-//     values.push_back(myView10);
-
-//     DataTypes::ValueType viewData11(1);
     viewData9[2]=8.0;
-//     DataArrayView myView11(viewData11,viewShape);
-//     values.push_back(myView11);
-
-//     DataTypes::ValueType viewData12(1);
     viewData9[3]=9.0;
-//     DataArrayView myView12(viewData12,viewShape);
-//     values.push_back(myView12);
 
     DataTagged myData3(FunctionSpace(),viewShape, keys, viewData9);
 
@@ -4621,8 +3279,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData3, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==3);
 
     CPPUNIT_ASSERT(myData1.getLength()==12);
@@ -4633,27 +3289,23 @@ void DataTaggedTestCase::testSetSlice() {
 
 
     offset=myData1.getDefaultOffset();
-//     myDataView = myData1.getDefaultValue();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==6.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==2.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==7.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==2.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
     offset=myData1.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==8.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==4.0);
@@ -4664,26 +3316,14 @@ void DataTaggedTestCase::testSetSlice() {
 
     viewShape.clear();
 
-    DataTypes::ValueType viewData13(1*4);
+    DataTypes::RealVectorType viewData13(1*4);
     viewData13[0]=10.0;
-//     DataArrayView myView13(viewData13,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData14(1);
     viewData13[1]=11.0;
-//     DataArrayView myView14(viewData14,viewShape);
-//     values.push_back(myView14);
-
-//     DataTypes::ValueType viewData15(2);
     viewData13[2]=12.0;
-//     DataArrayView myView15(viewData15,viewShape);
-//     values.push_back(myView15);
-
-//     DataTypes::ValueType viewData16(3);
     viewData13[3]=13.0;
-//     DataArrayView myView16(viewData16,viewShape);
-//     values.push_back(myView16);
 
     DataTagged myData4(FunctionSpace(),viewShape,keys,viewData13);
 
@@ -4694,35 +3334,29 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData4, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getRank()==1);
     CPPUNIT_ASSERT(myData1.getNoValues()==3);
     CPPUNIT_ASSERT(myData1.getShape().size()==1);
 
 
     offset=myData1.getDefaultOffset();
-/*    myDataView = myData1.getDefaultValue();*/
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==10.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==6.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==3);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==11.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==7.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==2.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
     offset=myData1.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==6);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==12.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1)==8.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==9);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0)==13.0);
@@ -4740,7 +3374,7 @@ void DataTaggedTestCase::testSetSlice() {
     keys.push_back(2);
     keys.push_back(3);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
@@ -4749,71 +3383,50 @@ void DataTaggedTestCase::testSetSlice() {
     int nvals=noValues(viewShape);
 
     // default value for Data1
-    DataTypes::ValueType viewData1(27*4);
+    DataTypes::RealVectorType viewData1(27*4);
     for (int i=0;i<nvals;i++) {
       viewData1[i]=0.0;
     }
-//     DataArrayView myView1(viewData1,viewShape);
 
     // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(27);
     for (int i=0;i<nvals;i++) {
       viewData1[nvals+i]=0.0;
     }
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
 
     // value for tag "2" for Data1
-//     DataTypes::ValueType viewData3(27);
     for (int i=0;i<nvals;i++) {
       viewData1[2*nvals+i]=0.0;
     }
-//     DataArrayView myView3(viewData3,viewShape);
-//     values.push_back(myView3);
 
     // value for tag "3" for Data1
-//     DataTypes::ValueType viewData4(27);
     for (int i=0;i<nvals;i++) {
       viewData1[3*nvals+i]=0.0;
     }
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys,viewData1);
 
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData5(27*4);
+    DataTypes::RealVectorType viewData5(27*4);
     for (int i=0;i<nvals;i++) {
       viewData5[i]=1.0;
     }
-//     DataArrayView myView5(viewData5,viewShape);
 
     // value for tag "1" for Data2
-//     DataTypes::ValueType viewData6(27);
     for (int i=0;i<nvals;i++) {
       viewData5[nvals+i]=2.0;
     }
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     // value for tag "2" for Data2
-//     DataTypes::ValueType viewData7(27);
     for (int i=0;i<nvals;i++) {
       viewData5[2*nvals+i]=3.0;
     }
-//     DataArrayView myView7(viewData7,viewShape);
-//     values.push_back(myView7);
-
     // value for tag "3" for Data2
-    DataTypes::ValueType viewData8(27);
+    DataTypes::RealVectorType viewData8(27);
     for (int i=0;i<nvals;i++) {
       viewData5[3*nvals+i]=4.0;
     }
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
-
     DataTagged myData2(FunctionSpace(),viewShape,keys,viewData5);
 
     // full slice
@@ -4828,8 +3441,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==3);
 
     CPPUNIT_ASSERT(myData1.getLength()==108);
@@ -4839,27 +3450,23 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
 
     int offset=myData1.getDefaultOffset();
-//     DataArrayView myDataView = myData1.getDefaultValue();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==1.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==2.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==2.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==2.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
     offset=myData1.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==54);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==3.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==81);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==4.0);
@@ -4873,34 +3480,21 @@ void DataTaggedTestCase::testSetSlice() {
 
     nvals=3;
 
-    DataTypes::ValueType viewData9(3*4);
+    DataTypes::RealVectorType viewData9(3*4);
     for (int i=0;i<nvals;i++) {
       viewData9[i]=6.0;
     }
-//     DataArrayView myView9(viewData9,viewShape);
-
     values.clear();
 
-//     DataTypes::ValueType viewData10(3);
     for (int i=0;i<nvals;i++) {
       viewData9[nvals+i]=7.0;
     }
-//     DataArrayView myView10(viewData10,viewShape);
-//     values.push_back(myView10);
-
-//     DataTypes::ValueType viewData11(3);
     for (int i=0;i<nvals;i++) {
       viewData9[2*nvals+i]=8.0;
     }
-//     DataArrayView myView11(viewData11,viewShape);
-//     values.push_back(myView11);
-
-//     DataTypes::ValueType viewData12(3);
     for (int i=0;i<nvals;i++) {
       viewData9[3*nvals+i]=9.0;
     }
-//     DataArrayView myView12(viewData12,viewShape);
-//     values.push_back(myView12);
 
     DataTagged myData3(FunctionSpace(),viewShape,keys,viewData9);
 
@@ -4914,8 +3508,6 @@ void DataTaggedTestCase::testSetSlice() {
 
     myData1.setSlice(&myData3, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==3);
 
     CPPUNIT_ASSERT(myData1.getLength()==108);
@@ -4924,15 +3516,12 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getNoValues()==27);
     CPPUNIT_ASSERT(myData1.getShape().size()==3);
 
-//    myDataView = myData1.getDefaultValue();
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==6.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==6.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==6.0);
 
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==7.0);
@@ -4940,14 +3529,11 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==7.0);
 
     offset=myData1.getOffsetForTag(2);
-//     myDataView = myData1.getDataPointByTag(2);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(offset==54);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==8.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,1,0,0)==8.0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,2,0,0)==8.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==81);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==9.0);
@@ -4958,26 +3544,14 @@ void DataTaggedTestCase::testSetSlice() {
 
     viewShape.clear();
 
-    DataTypes::ValueType viewData13(1*4);
+    DataTypes::RealVectorType viewData13(1*4);
     viewData13[0]=10.0;
-//     DataArrayView myView13(viewData13,viewShape);
 
     values.clear();
 
-//     DataTypes::ValueType viewData14(1);
     viewData13[1]=11.0;
-//     DataArrayView myView14(viewData14,viewShape);
-//     values.push_back(myView14);
-
-//     DataTypes::ValueType viewData15(2);
     viewData13[2]=12.0;
-//     DataArrayView myView15(viewData15,viewShape);
-//     values.push_back(myView15);
-
-//     DataTypes::ValueType viewData16(3);
     viewData13[3]=13.0;
-//     DataArrayView myView16(viewData16,viewShape);
-//     values.push_back(myView16);
 
     DataTagged myData4(FunctionSpace(),viewShape,keys,viewData13);
 
@@ -4989,24 +3563,17 @@ void DataTaggedTestCase::testSetSlice() {
     region.push_back(region_element);
 
     myData1.setSlice(&myData4, region);
-
-    //cout << myData1.toString() << endl;
-
-//     myDataView = myData1.getDefaultValue();
     offset=myData1.getDefaultOffset();
     CPPUNIT_ASSERT(offset==0);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==10.0);
-//     myDataView = myData1.getDataPointByTag(1);
     offset=myData1.getOffsetForTag(1);
     CPPUNIT_ASSERT(offset==27);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==11.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
     offset=myData1.getOffsetForTag(2);
     CPPUNIT_ASSERT(offset==54);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==12.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
     offset=myData1.getOffsetForTag(3);
     CPPUNIT_ASSERT(offset==81);
     CPPUNIT_ASSERT(getRefRO(myData1,offset,0,0,0)==13.0);
@@ -5027,73 +3594,36 @@ void DataTaggedTestCase::testSetSlice() {
     keys2.push_back(4);
     keys2.push_back(5);
 
-    DataTagged::ValueBatchType values;
+    DataTagged::FloatBatchType values;
 
     DataTypes::ShapeType viewShape;
 
     // default value for Data1
-    DataTypes::ValueType viewData1(1*4);
+    DataTypes::RealVectorType viewData1(1*4);
     viewData1[0]=0.0;
-//     DataArrayView myView1(viewData1,viewShape);
-
-    // value for tag "1" for Data1
-//     DataTypes::ValueType viewData2(1);
     viewData1[1]=0.0;
-//     DataArrayView myView2(viewData2,viewShape);
-//     values.push_back(myView2);
-
-    // value for tag "2" for Data1
-//     DataTypes::ValueType viewData5(1);
     viewData1[2]=0.0;
-//     DataArrayView myView5(viewData5,viewShape);
-//     values.push_back(myView5);
-
-    // value for tag "3" for Data1
-//     DataTypes::ValueType viewData6(1);
     viewData1[3]=0.0;
-//     DataArrayView myView6(viewData6,viewShape);
-//     values.push_back(myView6);
 
     DataTagged myData1(FunctionSpace(),viewShape,keys1,viewData1);
 
     values.clear();
 
     // default value for Data2
-    DataTypes::ValueType viewData3(1*4);
+    DataTypes::RealVectorType viewData3(1*4);
     viewData3[0]=1.0;
-//     DataArrayView myView3(viewData3,viewShape);
-
-    // value for tag "3" for Data2
-//     DataTypes::ValueType viewData4(1);
     viewData3[1]=2.0;
-//     DataArrayView myView4(viewData4,viewShape);
-//     values.push_back(myView4);
-
-    // value for tag "4" for Data2
-//     DataTypes::ValueType viewData7(1);
     viewData3[2]=3.0;
-//     DataArrayView myView7(viewData7,viewShape);
-//     values.push_back(myView7);
-
-    // value for tag "5" for Data2
-//     DataTypes::ValueType viewData8(1);
     viewData3[3]=4.0;
-//     DataArrayView myView8(viewData8,viewShape);
-//     values.push_back(myView8);
 
     DataTagged myData2(FunctionSpace(),viewShape,keys2,viewData3);
 
-    //cout << myData1.toString() << endl;
-    //cout << myData2.toString() << endl;
-
     // full slice
 
     DataTypes::RegionType region;
 
     myData1.setSlice(&myData2, region);
 
-    //cout << myData1.toString() << endl;
-
     CPPUNIT_ASSERT(myData1.getTagLookup().size()==5);
 
     CPPUNIT_ASSERT(myData1.getLength()==6);
@@ -5104,94 +3634,28 @@ void DataTaggedTestCase::testSetSlice() {
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(0)==1.0);
 
 
-//     DataArrayView myDataView = myData1.getDefaultValue();
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getDefaultOffset()==0);
-//     CPPUNIT_ASSERT(myDataView.getOffset()==0);
 
 
-//     myDataView = myData1.getDataPointByTag(1);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getOffsetForTag(1)==1);
-//     CPPUNIT_ASSERT(myDataView.getRank()==0);
-//     CPPUNIT_ASSERT(myDataView.noValues()==1);
-//     CPPUNIT_ASSERT(myDataView.getShape().size()==0);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(1)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(2);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getOffsetForTag(2)==2);
-//     CPPUNIT_ASSERT(myDataView.getRank()==0);
-//     CPPUNIT_ASSERT(myDataView.noValues()==1);
-//     CPPUNIT_ASSERT(myDataView.getShape().size()==0);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(2)==1.0);
 
-//     myDataView = myData1.getDataPointByTag(3);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getOffsetForTag(3)==3);
-/*    CPPUNIT_ASSERT(myDataView.getRank()==0);
-    CPPUNIT_ASSERT(myDataView.noValues()==1);
-    CPPUNIT_ASSERT(myDataView.getShape().size()==0);*/
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(3)==2.0);
 
-//     myDataView = myData1.getDataPointByTag(4);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getOffsetForTag(4)==4);
-//     CPPUNIT_ASSERT(myDataView.getRank()==0);
-//     CPPUNIT_ASSERT(myDataView.noValues()==1);
-//     CPPUNIT_ASSERT(myDataView.getShape().size()==0);
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(4)==3.0);
 
-//     myDataView = myData1.getDataPointByTag(5);
-//     CPPUNIT_ASSERT(!myDataView.isEmpty());
     CPPUNIT_ASSERT(myData1.getOffsetForTag(5)==5);
-/*    CPPUNIT_ASSERT(myData1.getRank()==0);
-    CPPUNIT_ASSERT(myData1.noValues()==1);
-    CPPUNIT_ASSERT(myData1.getShape().size()==0);*/
     CPPUNIT_ASSERT(myData1.getDataAtOffsetRO(5)==4.0);
 
   }
 
 }
 
-/*
-// Testing to see if FunctionSpaces are checked for taggability before use
-void DataTaggedTestCase::testFunctionSpaces()
-{
-   {
-	cout << "\tTest Non-Taggable Degrees Of Freedom." << endl;
-	MeshAdapter d;
-	FunctionSpace fs=solution(d);
-	std::string res=constr(fs);
-	if (!res.empty())
-	{
-		cout << "\t\t" << res << endl;
-		CPPUNIT_ASSERT(false);
-	}
-   }
-   {
-	cout << "\tTest Non-Taggable Reduced Degrees Of Freedom." << endl;
-	MeshAdapter d;
-	FunctionSpace fs=reducedSolution(d);
-	std::string res=constr(fs);
-	if (!res.empty())
-	{
-		cout << "\t\t" << res << endl;
-		CPPUNIT_ASSERT(false);
-	}
-   }
-   {
-	cout << "\tTest Non-Taggable Reduced Degrees Of Freedom." << endl;
-	MeshAdapter d;
-	FunctionSpace fs(d,MeshAdapter::ReducedNodes);
-	std::string res=constr(fs);
-	if (!res.empty())
-	{
-		cout << "\t\t" << res << endl;
-		CPPUNIT_ASSERT(false);
-	}
-   }
-}*/
 
 TestSuite* DataTaggedTestCase::suite()
 {
@@ -5206,10 +3670,6 @@ TestSuite* DataTaggedTestCase::suite()
   testSuite->addTest(new TestCaller<DataTaggedTestCase>(
               "testCopyConstructors",&DataTaggedTestCase::testCopyConstructors));
   testSuite->addTest(new TestCaller<DataTaggedTestCase>(
-              "testOperations",&DataTaggedTestCase::testOperations));
-//   testSuite->addTest(new TestCaller<DataTaggedTestCase>(
-//              "testFunctionSpaces",&DataTaggedTestCase::testFunctionSpaces));
-  testSuite->addTest(new TestCaller<DataTaggedTestCase>(
               "testGetSlice",&DataTaggedTestCase::testGetSlice));
   testSuite->addTest(new TestCaller<DataTaggedTestCase>(
               "testSetSlice",&DataTaggedTestCase::testSetSlice));
diff --git a/escriptcore/test/DataTestCase.cpp b/escriptcore/test/DataTestCase.cpp
index b2031dd..5d5b14b 100644
--- a/escriptcore/test/DataTestCase.cpp
+++ b/escriptcore/test/DataTestCase.cpp
@@ -14,20 +14,17 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/Data.h>
 
 #include "DataTestCase.h"
 
-#include <cmath>
-
-#include "esysUtils/EsysException.h"
-#include "escript/Data.h"
-#include "escript/DataLazy.h"
-#include "escript/EscriptParams.h"
-#include "escript/FunctionSpace.h"
+#include <escript/DataLazy.h>
+#include <escript/EscriptParams.h>
+#include <escript/EsysException.h>
+#include <escript/FunctionSpace.h>
+#include <escript/TestDomain.h>
 
+#include <cmath>
 #include <cppunit/TestCaller.h>
 
 #define AUTOLAZYON setEscriptParamInt("AUTOLAZY",1);
@@ -40,22 +37,20 @@
 using namespace std;
 using namespace CppUnit;
 using namespace escript;
-using namespace esysUtils;
 using namespace escript::DataTypes;
 
-
 namespace
 {
 
 inline
-DataTypes::ValueType::const_reference
+DataTypes::RealVectorType::const_reference
 getRef(Data& d,int s1, int p1, int x, int y)
 {
 	return d.getDataAtOffsetRO(d.getDataOffset(s1,p1)+getRelIndex(d.getDataPointShape(),x,y));
 }
 
 inline
-DataTypes::ValueType::const_reference
+DataTypes::RealVectorType::const_reference
 getRef(Data& d, int x, int y)
 {
 	return d.getDataAtOffsetRO(getRelIndex(d.getDataPointShape(),x,y));
@@ -69,17 +64,19 @@ void DataTestCase::testCopyingWorker(bool delayed)
 
   using namespace escript::DataTypes;
   cout << endl;
-
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());  
   DataTypes::ShapeType shape;
   shape.push_back(2);
   shape.push_back(3);
-  DataTypes::ValueType data(DataTypes::noValues(shape),1);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),1);
   const int NUMDATS=3;
   Data* dats[NUMDATS];
   const char* strs[]={"DataConstant", "DataTagged", "DataExpanded"};
-  dats[0]=new Data(new DataConstant(FunctionSpace(),shape,data));
-  dats[1]=new Data(new DataTagged(FunctionSpace(),shape,data));
-  dats[2]=new Data(new DataExpanded(FunctionSpace(),shape,data));
+  dats[0]=new Data(new DataConstant(fs,shape,data));
+  dats[1]=new Data(new DataTagged(fs,shape,data));
+  dats[2]=new Data(new DataExpanded(fs,shape,data));
   if (delayed)
   {
     dats[0]->delaySelf();
@@ -96,10 +93,13 @@ void DataTestCase::testCopyingWorker(bool delayed)
 	{
 	  CPPUNIT_ASSERT(deep.isLazy());
 	}
-	for (int i=0;i<DataTypes::noValues(shape);++i)
+	if (!d->hasNoSamples())
 	{
-	  CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)==deep.getDataAtOffsetRO(i));
-    }
+	    for (int i=0;i<DataTypes::noValues(shape);++i)
+	    {
+	      CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)==deep.getDataAtOffsetRO(i));
+	    }
+	}
 	if (delayed)
 	{
 	   d->delaySelf();
@@ -109,11 +109,14 @@ void DataTestCase::testCopyingWorker(bool delayed)
 	{
 	  CPPUNIT_ASSERT(d->isLazy());
 	}
-	for (int i=0;i<DataTypes::noValues(shape);++i)
-	{
-	  CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)!=deep.getDataAtOffsetRO(i));
+	if (!d->hasNoSamples())
+	{	
+	    for (int i=0;i<DataTypes::noValues(shape);++i)
+	    {
+	      CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)!=deep.getDataAtOffsetRO(i));
+	    }
 	}
-    if (delayed)
+	if (delayed)
 	{
 	   d->delaySelf();
 	   deep.delaySelf();
@@ -123,14 +126,20 @@ void DataTestCase::testCopyingWorker(bool delayed)
 	{
 	  CPPUNIT_ASSERT(d->isLazy());
 	}
-	for (int i=0;i<DataTypes::noValues(shape);++i)
-	{
-	  CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)==deep.getDataAtOffsetRO(i));
+	if (!d->hasNoSamples())
+	{	
+	    for (int i=0;i<DataTypes::noValues(shape);++i)
+	    {
+	      CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)==deep.getDataAtOffsetRO(i));
+	    }
 	}
 	d->setToZero();
-	for (int i=0;i<DataTypes::noValues(shape);++i)
-	{
-	  CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)!=deep.getDataAtOffsetRO(i));
+	if (!d->hasNoSamples())
+	{	
+	    for (int i=0;i<DataTypes::noValues(shape);++i)
+	    {
+	      CPPUNIT_ASSERT(d->getDataAtOffsetRO(i)!=deep.getDataAtOffsetRO(i));
+	    }
 	}
 	delete dats[k];
   }
@@ -141,6 +150,9 @@ void DataTestCase::testSlicingWorker(bool delayed)
 {
 
   using namespace escript::DataTypes;
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());    
   cout << endl;
   {
    DataTypes::ShapeType viewShape;
@@ -153,7 +165,7 @@ void DataTestCase::testSlicingWorker(bool delayed)
    Data* dats[NUMDATS];
    for (int k=0;k<NUMDATS;++k)
    {
-    	dats[k]=new Data(1.3, viewShape);
+    	dats[k]=new Data(1.3, viewShape, fs, false);
    }
    dats[1]->tag();
    dats[2]->expand();
@@ -167,9 +179,11 @@ void DataTestCase::testSlicingWorker(bool delayed)
    {
 	cout << "\t\tTest get-slicing " << strs[k] << endl;
 	dats[k]->requireWrite();
-    	dats[k]->getDataAtOffsetRW(dats[k]->getDataOffset(0,0)+getRelIndex(viewShape,0,0))=1.0;
-    	dats[k]->getDataAtOffsetRW(dats[k]->getDataOffset(0,0)+getRelIndex(viewShape,1,1))=2.0;
-
+	if (!dats[k]->hasNoSamples())
+	{
+	    dats[k]->getDataAtOffsetRW(dats[k]->getDataOffset(0,0)+getRelIndex(viewShape,0,0))=1.0;
+	    dats[k]->getDataAtOffsetRW(dats[k]->getDataOffset(0,0)+getRelIndex(viewShape,1,1))=2.0;
+	}
     	DataTypes::RegionType region;
     	region.push_back(DataTypes::RegionType::value_type(0,0));
     	region.push_back(DataTypes::RegionType::value_type(0,0));
@@ -177,8 +191,11 @@ void DataTestCase::testSlicingWorker(bool delayed)
     	Data slice1(dats[k]->getSlice(region));
 
     	if (tags[k]) { CPPUNIT_ASSERT(slice1.isTagged()); }
-    	CPPUNIT_ASSERT(slice1.getDataPointRank()==0);
-    	CPPUNIT_ASSERT(slice1.getDataPointRO(0,0)==1.0);
+	if (!slice1.hasNoSamples())
+	{    	
+	    CPPUNIT_ASSERT(slice1.getDataPointRank()==0);
+	    CPPUNIT_ASSERT(slice1.getDataPointRO(0,0)==1.0);
+	}    
 
 	//
 	// create a rank 2 slice with one value
@@ -194,8 +211,10 @@ void DataTestCase::testSlicingWorker(bool delayed)
 	if (tags[k]) {CPPUNIT_ASSERT(slice2.isTagged());}
 	CPPUNIT_ASSERT(slice2.getDataPointRank()==2);
 	
-	CPPUNIT_ASSERT(slice2.getDataAtOffsetRO(slice2.getDataOffset(0,0)+getRelIndex(slice2.getDataPointShape(),0,0))==1.0);
-
+	if (!dats[k]->hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(slice2.getDataAtOffsetRO(slice2.getDataOffset(0,0)+getRelIndex(slice2.getDataPointShape(),0,0))==1.0);
+	}
 	//
 	// create a rank 2 slice with four values
 	
@@ -209,10 +228,13 @@ void DataTestCase::testSlicingWorker(bool delayed)
 	
 	if (tags[k]) {CPPUNIT_ASSERT(slice3.isTagged());}
 	CPPUNIT_ASSERT(slice3.getDataPointRank()==2);
-	CPPUNIT_ASSERT(getRef(slice3,0,0,0,0)==1.0);
-	CPPUNIT_ASSERT(getRef(slice3,0,0,0,1)==1.3);
-	CPPUNIT_ASSERT(getRef(slice3,0,0,1,0)==1.3);
-	CPPUNIT_ASSERT(getRef(slice3,0,0,1,1)==2.0);
+	if (!slice3.hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(getRef(slice3,0,0,0,0)==1.0);
+	    CPPUNIT_ASSERT(getRef(slice3,0,0,0,1)==1.3);
+	    CPPUNIT_ASSERT(getRef(slice3,0,0,1,0)==1.3);
+	    CPPUNIT_ASSERT(getRef(slice3,0,0,1,1)==2.0);
+	}
    }
 
    // now some extra tests for tagged data (dats[1])
@@ -220,7 +242,7 @@ void DataTestCase::testSlicingWorker(bool delayed)
    //
    // add a value for tag "1"
 
-   DataTypes::ValueType viewData(6);
+   DataTypes::RealVectorType viewData(6);
    for (int i=0;i<viewData.size();i++) {
     viewData[i]=i;
    }
@@ -234,16 +256,21 @@ void DataTestCase::testSlicingWorker(bool delayed)
    region.push_back(DataTypes::RegionType::value_type(0,3));
 
    Data slice4(dats[1]->getSlice(region));
-
+   tdp->addUsedTag(1);
+   vector<int> ts(3,1);
+   tdp->assignTags(ts);
+   
    CPPUNIT_ASSERT(slice4.isTagged());
    CPPUNIT_ASSERT(slice4.getDataPointRank()==2);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,0,0)==0);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,0,1)==2);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,0,2)==4);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,1,0)==1);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,1,1)==3);
-   CPPUNIT_ASSERT(getRef(slice4,0,0,1,2)==5);
-
+    if (!slice4.hasNoSamples())
+    {
+	CPPUNIT_ASSERT(getRef(slice4,0,0,0,0)==0);
+	CPPUNIT_ASSERT(getRef(slice4,0,0,0,1)==2);
+	CPPUNIT_ASSERT(getRef(slice4,0,0,0,2)==4);
+	CPPUNIT_ASSERT(getRef(slice4,0,0,1,0)==1);
+	CPPUNIT_ASSERT(getRef(slice4,0,0,1,1)==3);
+	CPPUNIT_ASSERT(getRef(slice4,0,0,1,2)==5);
+    }
    for (int k=0;k<NUMDATS;++k)
    {
 	delete dats[k];
@@ -261,8 +288,8 @@ void DataTestCase::testSlicingWorker(bool delayed)
   Data* src[NUMDATS];
   for (int k=0;k<NUMDATS;++k)
   {
- 	dats[k]=new Data(1.3, viewShape);
-    	src[k]=new Data(10,DataTypes::scalarShape);
+ 	dats[k]=new Data(1.3, viewShape,fs,false);
+    	src[k]=new Data(10,DataTypes::scalarShape,fs,false);
   }
   dats[1]->tag();
   src[1]->tag();
@@ -286,13 +313,16 @@ void DataTestCase::testSlicingWorker(bool delayed)
   for (int k=0;k<NUMDATS;++k)
   {
 	cout << "\t\tTest set-slicing " << strs[k] << endl;
-	Data target(1.3,viewShape);
+	Data target(1.3,viewShape,fs,false);
 	if (k==2) {target.expand();}
 	DataTypes::RegionType region;
 	region.push_back(DataTypes::RegionType::value_type(1,1));
 	region.push_back(DataTypes::RegionType::value_type(1,1));
 	target.setSlice(*(src[k]),region);
-	CPPUNIT_ASSERT(getRef(target,0,0,1,1)==src[k]->getDataPointRO(0,0));
+	if (!target.hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(getRef(target,0,0,1,1)==src[k]->getDataPointRO(0,0));
+	}
   }
   
   // some extra tests on tagged data
@@ -300,12 +330,12 @@ void DataTestCase::testSlicingWorker(bool delayed)
   //
   // add a value for tag "1" to target
 
-  DataTypes::ValueType viewData(6);
+  DataTypes::RealVectorType viewData(6);
   for (int i=0;i<viewData.size();i++) {
 	viewData[i]=i;
   }
 
-  Data target(1.3,viewShape,FunctionSpace(),false);
+  Data target(1.3,viewShape,fs,false);
   target.tag();
   target.setTaggedValueFromCPP(1, viewShape, viewData);
 
@@ -322,18 +352,20 @@ void DataTestCase::testSlicingWorker(bool delayed)
 
   CPPUNIT_ASSERT(target.isTagged());
   CPPUNIT_ASSERT(target.getDataPointRank()==2);
-  CPPUNIT_ASSERT(getRef(target,0,0,0,0)==0);
-  CPPUNIT_ASSERT(getRef(target,0,0,0,1)==src[1]->getDataPointRO(0,0));
-  CPPUNIT_ASSERT(getRef(target,0,0,0,2)==4);
-  CPPUNIT_ASSERT(getRef(target,0,0,1,0)==1);
-  CPPUNIT_ASSERT(getRef(target,0,0,1,1)==3);
-  CPPUNIT_ASSERT(getRef(target,0,0,1,2)==5);
-
+  if (!target.hasNoSamples())
+  {
+      CPPUNIT_ASSERT(getRef(target,0,0,0,0)==0);
+      CPPUNIT_ASSERT(getRef(target,0,0,0,1)==src[1]->getDataPointRO(0,0));
+      CPPUNIT_ASSERT(getRef(target,0,0,0,2)==4);
+      CPPUNIT_ASSERT(getRef(target,0,0,1,0)==1);
+      CPPUNIT_ASSERT(getRef(target,0,0,1,1)==3);
+      CPPUNIT_ASSERT(getRef(target,0,0,1,2)==5);
+  }
   //
   // add a value for tag "2" to source
 
   DataTypes::ShapeType viewShape2;
-  DataTypes::ValueType viewData2(1);
+  DataTypes::RealVectorType viewData2(1);
   viewData2[0]=6;
   src[1]->setTaggedValueFromCPP(2, viewShape2, viewData2);
 
@@ -348,7 +380,7 @@ void DataTestCase::testSlicingWorker(bool delayed)
 
     // use a non-existent tag so we get a pointer to the default value
     // i.e.: the first element in the data array
-  DataAbstract::ValueType::value_type* targetData=target.getSampleDataByTag(9);
+  DataTypes::real_t* targetData=target.getSampleDataByTag(9);
   for (int i=0; i<target.getLength(); i++) {
       CPPUNIT_ASSERT(targetData[i]>=0);
   }
@@ -403,6 +435,9 @@ void DataTestCase::testSlicing()
 void DataTestCase::testSomeDriver(bool autolazy)
 {
   cout << endl;
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());    
   SAVELAZYSTATE
   if (autolazy)
   {
@@ -413,14 +448,14 @@ void DataTestCase::testSomeDriver(bool autolazy)
 
   DataTypes::ShapeType viewShape;
   viewShape.push_back(3);
-  DataTypes::ValueType viewData(3);
+  DataTypes::RealVectorType viewData(3);
   for (int i=0;i<viewShape[0];++i) {
     viewData[i]=i;
   }
 
   bool expanded=true;
-  Data exData(viewData,viewShape,FunctionSpace(),expanded);
-  Data cData(viewData,viewShape);
+  Data exData(viewData,viewShape, fs,expanded);
+  Data cData(viewData,viewShape, fs,false);
   Data result;
 
   CPPUNIT_ASSERT(exData.isExpanded());
@@ -463,17 +498,20 @@ void DataTestCase::testResolveType()
 {
   cout << endl;
   cout << "\tTesting resolve()\n";
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());    
   DataTypes::ShapeType viewShape;
   viewShape.push_back(2);
   viewShape.push_back(3);
   viewShape.push_back(4);
-  DataTypes::ValueType viewData(2*3*4);
+  DataTypes::RealVectorType viewData(2*3*4);
   for (int i=0;i<DataTypes::noValues(viewShape);++i) {
     viewData[i]=i;
   }
-  Data c1(viewData,viewShape);
-  Data t1(viewData,viewShape);
-  Data e1(viewData,viewShape);
+  Data c1(viewData,viewShape,fs,false);
+  Data t1(viewData,viewShape,fs,false);
+  Data e1(viewData,viewShape,fs,false);
   t1.tag();
   e1.expand();
   c1.delaySelf();
@@ -513,9 +551,9 @@ void DataTestCase::testResolveType()
   CPPUNIT_ASSERT(t1.isTagged());
   CPPUNIT_ASSERT_THROW(e1.tag(), DataException);
   cout << "\tTesting expand()\n";
-  Data c2(viewData,viewShape);
-  Data t2(viewData,viewShape);
-  Data e2(viewData,viewShape);
+  Data c2(viewData,viewShape,fs,false);
+  Data t2(viewData,viewShape,fs,false);
+  Data e2(viewData,viewShape,fs,false);
   t2.tag();
   e2.expand();
   c2.delaySelf();
@@ -538,13 +576,13 @@ void DataTestCase::testDataConstant()
   viewShape.push_back(2);
   viewShape.push_back(3);
   viewShape.push_back(4);
-  DataTypes::ValueType viewData(2*3*4);
+  DataTypes::RealVectorType viewData(2*3*4);
   for (int i=0;i<DataTypes::noValues(viewShape);++i) {
     viewData[i]=i;
   }
 
-  Data left(viewData,viewShape);
-  Data right(viewData,viewShape);
+  Data left(viewData,viewShape,FunctionSpace(),false);
+  Data right(viewData,viewShape,FunctionSpace(),false);
   Data result;
 
   cout << "\tTest some basic operations" << endl;
@@ -577,7 +615,7 @@ void DataTestCase::testDataTagged()
     DataTypes::ShapeType viewShape;
     viewShape.push_back(3);
 
-    DataTypes::ValueType viewData(3);
+    DataTypes::RealVectorType viewData(3);
     for (int i=0;i<viewShape[0];i++) {
       viewData[i]=i;
     }
@@ -589,6 +627,12 @@ void DataTestCase::testDataTagged()
     CPPUNIT_ASSERT(myData.isTagged());
     CPPUNIT_ASSERT(myData.getTagNumber(0)==1);
     CPPUNIT_ASSERT(myData.getDataPointRank()==1);
+    
+
+    cerr << "\n\n\n\n" << myData.getLength() << endl;
+    cout << "\n\n\n\n" << myData.getLength() << endl;
+    cout.flush();
+    
     CPPUNIT_ASSERT(myData.getLength()==3);
     
     CPPUNIT_ASSERT(myData.getNoValues()==3);
@@ -613,7 +657,7 @@ void DataTestCase::testDataTagged()
     cout << "\tTest setting of a tag and associated value." << endl;
 
     // value for tag "1"
-    DataTypes::ValueType eTwoData(viewData);
+    DataTypes::RealVectorType eTwoData(viewData);
  //   DataArrayView eTwoView(eTwoData, viewShape);
     for (int i=0;i<viewShape[0];i++) {
       eTwoData[i]=i+2.0;
@@ -693,35 +737,41 @@ void DataTestCase::testDataTaggedExceptions()
   Data myData;
 
   CPPUNIT_ASSERT_THROW(myData.getSampleDataByTag(0), EsysException);
-  CPPUNIT_ASSERT_THROW(myData.setTaggedValueFromCPP(0,DataTypes::ShapeType(), DataTypes::ValueType()), EsysException);
+  CPPUNIT_ASSERT_THROW(myData.setTaggedValueFromCPP(0,DataTypes::ShapeType(), DataTypes::RealVectorType()), EsysException);
 }
 
 void DataTestCase::testConstructors()
 {
   cout << endl;
-
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());    
   DataTypes::ShapeType viewShape;
   {
     cout << "\tCreate an Empty Data object" << endl;
-    Data temp(1.3,viewShape,FunctionSpace(),false);
+    Data temp(1.3,viewShape,fs,false);
   }
   {
     cout << "\tCreate a rank 2 Data object" << endl;
     viewShape.push_back(2);
     viewShape.push_back(3);
-    Data temp(1.3,viewShape,FunctionSpace(),false);
+    Data temp(1.3,viewShape,fs,false);
   }
 }
 
 void DataTestCase::testMoreOperations()
 {
    cout << endl;
+  TestDomain* tdp=new TestDomain(2,3,2);	// 2 points per sample, 3 samples, 2D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode());    
+   
    DataTypes::ShapeType shape;
    shape.push_back(3);
    shape.push_back(3);
 
   // allocate the data 
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 
   // assign values to the data
   for (int i=0;i<shape[0];i++) {
@@ -732,12 +782,12 @@ void DataTestCase::testMoreOperations()
 
 
 
-  Data dats[]={Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),true),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),true)};
+  Data dats[]={Data(data,shape,fs,false),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,true),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,true)};
   const int NUMDATS=6;
 //  const int LAZY=3;		// where do the lazy objects start?
 
@@ -767,11 +817,14 @@ void DataTestCase::testMoreOperations()
   for (int z=0;z<NUMDATS;++z)
   {
 	tmp=0;
-	for (int i=0;i<shape[0];++i)
-	{
-	   tmp+=getRef(dats[z],i,i);
+	if (!dats[z].hasNoSamples())
+	{	
+	    for (int i=0;i<shape[0];++i)
+	    {
+	      tmp+=getRef(dats[z],i,i);
+	    }
+	    CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - tmp) <= REL_TOL*std::abs(tmp));
 	}
-	CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - tmp) <= REL_TOL*std::abs(tmp));
   }
 
 
@@ -781,14 +834,17 @@ void DataTestCase::testOperations()
 {
 
   cout << endl;
-
+  TestDomain* tdp=new TestDomain(1,1,1);	// 1 points per sample, 1 samples, 1D coords
+  Domain_ptr p(tdp);
+  FunctionSpace fs=FunctionSpace(p, tdp->getContinuousFunctionCode()); 
+  
   // define the shape for the test data
   DataTypes::ShapeType shape;
   shape.push_back(2);
   shape.push_back(3);
 
   // allocate the data 
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 
   // assign values to the data
   for (int i=0;i<shape[0];i++) {
@@ -797,20 +853,25 @@ void DataTestCase::testOperations()
     }
   }
 
+  DataTypes::RealVectorType data2(DataTypes::noValues(shape),0);
+  // assign values to the data
+  for (int i=0;i<shape[0];i++) {
+    for (int j=0;j<shape[1];j++) {
+      data2[getRelIndex(shape,i,j)]=12345678.8976+getRelIndex(shape,i,j);
+    }
+  }
 
 
-  Data dats[]={Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),true),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),false),
-		Data(data,shape,FunctionSpace(),true)};
+  Data dats[]={Data(data,shape,fs,false),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,true),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,false),
+		Data(data,shape,fs,true)
+  };
   const int NUMDATS=6;
   const int LAZY=3;		// where do the lazy objects start?
 
-//   Data baseEx(data,shape,FunctionSpace(),true);
-//   Data baseCon(data,shape,FunctionSpace(),false);
-//   Data baseTag(data,shape,FunctionSpace(),false);
   Data& baseCon=dats[0];
   Data& baseTag=dats[1];
   Data& baseEx=dats[2];
@@ -825,13 +886,10 @@ void DataTestCase::testOperations()
   CPPUNIT_ASSERT(baseTag.isTagged());
 
   Data results[NUMDATS];
-//   Data& resultEx=results[0];
-//   Data& resultCon=results[1];
-//   Data& resultTag=results[2];
 
   // create 0 <= smalldata <= 1 for testing trig functions
 
-  DataTypes::ValueType smalldata(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType smalldata(DataTypes::noValues(shape),0);
 
   // assign values to the data
   for (int i=0;i<shape[0];i++) {
@@ -839,12 +897,12 @@ void DataTestCase::testOperations()
       smalldata[getRelIndex(shape,i,j)]=(i==0 && j==0)?0:1.0/(getRelIndex(shape,i,j)+1);
     }
   }
-  Data sdats[]={Data(smalldata,shape,FunctionSpace(),false),
-		Data(smalldata,shape,FunctionSpace(),false),
-		Data(smalldata,shape,FunctionSpace(),true),
-		Data(smalldata,shape,FunctionSpace(),false),
-		Data(smalldata,shape,FunctionSpace(),false),
-		Data(smalldata,shape,FunctionSpace(),true)};
+  Data sdats[]={Data(smalldata,shape,fs,false),
+		Data(smalldata,shape,fs,false),
+		Data(smalldata,shape,fs,true),
+		Data(smalldata,shape,fs,false),
+		Data(smalldata,shape,fs,false),
+		Data(smalldata,shape,fs,true)};
   sdats[1].tag();
   sdats[4].tag();
   sdats[3].delaySelf();		// 3 is a lazy constant
@@ -857,7 +915,7 @@ void DataTestCase::testOperations()
 
   double tmp;
   cout << "\tTest Data::pow." << endl;
-  Data power(3.0,shape,FunctionSpace(),true);
+  Data power(3.0,shape,fs,true);
   for (int z=0;z<NUMDATS;++z)
   {
     results[z].copy(dats[z].powD(power));
@@ -869,13 +927,18 @@ void DataTestCase::testOperations()
   for (int i=0;i<shape[0];i++) {
     for (int j=0;j<shape[1];j++) {
       tmp=pow((double)data[getRelIndex(shape,i,j)],(double)3.0);
+cerr << tmp << endl;      
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= (REL_TOL*std::abs(tmp)+0.00000001));
+	}
       }
     }
   }
-
+cerr << "Ending pow" << endl;
   cout << "\tTest Data::sin." << endl;
   for (int z=0;z<NUMDATS;++z)
   {
@@ -885,12 +948,17 @@ void DataTestCase::testOperations()
 	CPPUNIT_ASSERT(results[z].isLazy());
     }
   }
+cerr << "Ending sin" << endl;  
   for (int i=0;i<shape[0];i++) {
     for (int j=0;j<shape[1];j++) {
       tmp=sin((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -909,7 +977,11 @@ void DataTestCase::testOperations()
       tmp=cos((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -928,7 +1000,11 @@ void DataTestCase::testOperations()
       tmp=tan((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -947,7 +1023,11 @@ void DataTestCase::testOperations()
       tmp=asin((double)smalldata[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -966,7 +1046,11 @@ void DataTestCase::testOperations()
       tmp=acos((double)smalldata[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -985,7 +1069,11 @@ void DataTestCase::testOperations()
       tmp=atan((double)smalldata[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1004,7 +1092,11 @@ void DataTestCase::testOperations()
       tmp=sinh((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1023,7 +1115,11 @@ void DataTestCase::testOperations()
       tmp=cosh((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1042,7 +1138,11 @@ void DataTestCase::testOperations()
       tmp=tanh((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}    
       }
     }
   }
@@ -1062,7 +1162,11 @@ void DataTestCase::testOperations()
       tmp=data[getRelIndex(shape,i,j)];
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1082,7 +1186,11 @@ void DataTestCase::testOperations()
       tmp=data[getRelIndex(shape,i,j)];
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1101,7 +1209,11 @@ void DataTestCase::testOperations()
       tmp=data[getRelIndex(shape,i,j)];
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1121,7 +1233,11 @@ void DataTestCase::testOperations()
       tmp=log((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1141,7 +1257,11 @@ void DataTestCase::testOperations()
       tmp=log10((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1163,7 +1283,11 @@ void DataTestCase::testOperations()
       tmp=erf((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1184,11 +1308,19 @@ void DataTestCase::testOperations()
       tmp=abs((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
-
+  {
+      Data inp(data2, shape, fs, true);
+      Data res=inp.abs();
+      CPPUNIT_ASSERT(res.inf()>12345678);
+  }
   cout << "\tTest Data::sign (positive)." << endl;
   for (int z=0;z<NUMDATS;++z)
   {
@@ -1203,7 +1335,11 @@ void DataTestCase::testOperations()
       tmp=(i==0 && j==0)?0:1;
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();	
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   } 
@@ -1222,7 +1358,11 @@ void DataTestCase::testOperations()
       tmp=(i==0 && j==0)?0:-1;
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   } 
@@ -1242,7 +1382,11 @@ void DataTestCase::testOperations()
       tmp=exp((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1261,7 +1405,11 @@ void DataTestCase::testOperations()
       tmp=sqrt((double)data[getRelIndex(shape,i,j)]);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1280,7 +1428,11 @@ void DataTestCase::testOperations()
       tmp=-data[getRelIndex(shape,i,j)];
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   }
@@ -1298,7 +1450,11 @@ void DataTestCase::testOperations()
     for (int j=0;j<shape[1];j++) {
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - getRelIndex(shape,i,j)) <= REL_TOL*std::abs(data[getRelIndex(shape,i,j)]));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - getRelIndex(shape,i,j)) <= REL_TOL*std::abs(data[getRelIndex(shape,i,j)]));
+	}
       }
     }
   }
@@ -1332,7 +1488,11 @@ void DataTestCase::testOperations()
   }
   for (int z=0;z<NUMDATS;++z)
   {
-    CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - 0) <= REL_TOL*0); 
+      results[z].resolve();		
+      if (!results[z].hasNoSamples())
+      {    
+	  CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - 0) <= REL_TOL*0); 
+      }
   }
   
 
@@ -1343,7 +1503,11 @@ void DataTestCase::testOperations()
   }
   for (int z=0;z<NUMDATS;++z)
   {
-    CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - 5) <= REL_TOL*5);
+    results[z].resolve();		
+    if (!results[z].hasNoSamples())
+    {    
+	CPPUNIT_ASSERT(std::abs(results[z].getDataAtOffsetRO(0) - 5) <= REL_TOL*5);
+    }
   }
 
   cout << "\tTest Data::whereZero." << endl;
@@ -1356,7 +1520,11 @@ void DataTestCase::testOperations()
       tmp=(getRelIndex(shape,i,j)<=2);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   } 
@@ -1371,7 +1539,11 @@ void DataTestCase::testOperations()
       tmp=!(getRelIndex(shape,i,j)<=2);
       for (int z=0;z<NUMDATS;++z)
       {
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{	
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],i,j) - tmp) <= REL_TOL*std::abs(tmp));
+	}
       }
     }
   } 
@@ -1385,8 +1557,12 @@ void DataTestCase::testOperations()
     for (int j=0;j<shape[1];j++) {
      for (int z=0;z<NUMDATS;++z)
      {
-        tmp=getRef(dats[z],i,j);
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],j,i) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{       
+	    tmp=getRef(dats[z],i,j);
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],j,i) - tmp) <= REL_TOL*std::abs(tmp));
+	}
      }
     }
   } 
@@ -1400,8 +1576,12 @@ void DataTestCase::testOperations()
     for (int j=0;j<shape[1];j++) {
      for (int z=0;z<NUMDATS;++z)
      {
-        tmp=getRef(dats[z],i,j);
-	CPPUNIT_ASSERT(std::abs(getRef(results[z],j,i) - tmp) <= REL_TOL*std::abs(tmp));
+	results[z].resolve();		
+	if (!results[z].hasNoSamples())
+	{       
+	    tmp=getRef(dats[z],i,j);
+	    CPPUNIT_ASSERT(std::abs(getRef(results[z],j,i) - tmp) <= REL_TOL*std::abs(tmp));
+	}
      }
     }
   } 
@@ -1420,7 +1600,7 @@ void DataTestCase::testBinary()
   shape.push_back(3);
 
   // allocate the data 
-  DataTypes::ValueType data(DataTypes::noValues(shape),0);
+  DataTypes::RealVectorType data(DataTypes::noValues(shape),0);
 
   // assign values to the data
   for (int i=0;i<shape[0];i++) {
@@ -1430,8 +1610,8 @@ void DataTestCase::testBinary()
   }
 
 
-  Data one(1.0,DataTypes::scalarShape,FunctionSpace());
-  Data two(2.0,DataTypes::scalarShape,FunctionSpace());
+  Data one(1.0,DataTypes::scalarShape,FunctionSpace(),false);
+  Data two(2.0,DataTypes::scalarShape,FunctionSpace(),false);
   Data dats[]={Data(data,shape,FunctionSpace(),false),
 		Data(data,shape,FunctionSpace(),false),
 		Data(data,shape,FunctionSpace(),true),
@@ -1467,6 +1647,30 @@ void DataTestCase::testBinary()
 }
 
 
+void DataTestCase::testComplexSamples()
+{
+    FunctionSpace fs=getTestDomainFunctionSpace(4,1,1);	// 4 points per sample, there is one sample and each point has one value in it
+    Data x(5, DataTypes::scalarShape, fs, false);
+    x.complicate();
+    
+    const DataTypes::cplx_t* r=x.getSampleDataRO(0, DataTypes::cplx_t(0));
+    CPPUNIT_ASSERT(r[0]==DataTypes::cplx_t(5,0));
+    
+    RealVectorType v(1);
+    Data t(0,DataTypes::scalarShape, fs, false);
+    t.tag();
+    for (int i=1;i<5;++i)
+    {
+        v[0]=i;
+        t.setTaggedValueFromCPP(i,DataTypes::scalarShape, v);
+    }	
+    t.complicate();
+    for (int i=1;i<5;++i)
+    {
+	CPPUNIT_ASSERT(t.getSampleDataByTag(i,DataTypes::cplx_t(0))[0]==DataTypes::cplx_t(i,0));
+    }
+}
+
 void DataTestCase::testMemAlloc()
 {
   //
@@ -1498,6 +1702,8 @@ TestSuite* DataTestCase::suite()
   // create the suite of tests to perform.
   TestSuite *testSuite = new TestSuite("DataTestCase");
   testSuite->addTest(new TestCaller<DataTestCase>(
+              "testComplexSamples",&DataTestCase::testComplexSamples));
+  testSuite->addTest(new TestCaller<DataTestCase>(
               "testCopying",&DataTestCase::testCopying));
   testSuite->addTest(new TestCaller<DataTestCase>(
               "testSome",&DataTestCase::testSome));
diff --git a/escriptcore/test/DataTestCase.h b/escriptcore/test/DataTestCase.h
index 1af8026..b3ff706 100644
--- a/escriptcore/test/DataTestCase.h
+++ b/escriptcore/test/DataTestCase.h
@@ -39,7 +39,7 @@ public:
   void testCopying();
   void testResolveType();
   void testBinary();
-
+  void testComplexSamples();
   static CppUnit::TestSuite* suite();
 
 private:
diff --git a/escriptcore/test/DataTypesTestCase.cpp b/escriptcore/test/DataTypesTestCase.cpp
index 6e40014..5f97262 100644
--- a/escriptcore/test/DataTypesTestCase.cpp
+++ b/escriptcore/test/DataTypesTestCase.cpp
@@ -14,21 +14,17 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataTypes.h>
 
 #include "DataTypesTestCase.h"
-#include "escript/DataAlgorithm.h"
-#include "escript/DataTypes.h"
-#include "escript/DataVector.h"
-#include "esysUtils/EsysException.h"
+
+#include <escript/DataVector.h>
+#include <escript/EsysException.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
 
 using namespace CppUnit;
-using namespace esysUtils;
 using namespace escript;
 using namespace escript::DataTypes;
 using namespace std;
@@ -88,6 +84,8 @@ void DataTypesTestCase::testShapeFns()
   CPPUNIT_ASSERT(rt==rl);
 
 
+/*  
+  
 #ifdef DOASSERT
 // The errors we are testing for are triggered by ESysAssert which is only defined when DOASSERT is.
 
@@ -105,6 +103,8 @@ void DataTypesTestCase::testShapeFns()
   CPPUNIT_ASSERT_THROW(getRelIndex(s1,10), EsysException);
   CPPUNIT_ASSERT_THROW(getRelIndex(s3,2,4,4), EsysException);
 #endif
+*/
+
 }
 
 void DataTypesTestCase::testResultSliceShape() {
@@ -149,9 +149,9 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //    DataArrayView sourceView(sourceData, sourceShape);
-    DataVector targetData(1, 2.0, 1);
+    RealVectorType targetData(1, 2.0, 1);
 //    DataArrayView targetView(targetData, DataTypes::ShapeType());
 
     // Copy source view to target view.
@@ -177,9 +177,9 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
-    DataVector targetData(1, 2.0, 1);
+    RealVectorType targetData(1, 2.0, 1);
 //     DataArrayView targetView(targetData, DataTypes::ShapeType());
 
     // Copy source view to target view.
@@ -205,14 +205,14 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
     //DataArrayView sourceView(sourceData, sourceShape);
     for (int i=0;i<sourceShape[0];i++) {
       sourceData[i]=i;
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
     //DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -242,7 +242,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
 
     for (int i=0;i<sourceShape[0];i++) {
@@ -250,7 +250,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -282,14 +282,14 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     for (int i=0;i<sourceShape[0];i++) {
       sourceData[i]=i;
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -320,12 +320,12 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     sourceData[0]=5;
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -356,7 +356,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -366,7 +366,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -399,7 +399,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -409,7 +409,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -442,7 +442,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -452,7 +452,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -488,7 +488,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -498,7 +498,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -533,12 +533,12 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     sourceData[0]=5;
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -573,7 +573,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -585,7 +585,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -626,7 +626,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -638,7 +638,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -678,7 +678,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -690,7 +690,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -727,7 +727,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -739,7 +739,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -776,7 +776,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -788,7 +788,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -825,12 +825,12 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     sourceData[0]=5;
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 
     // Copy source view to target view.
 //     targetView.copySliceFrom(sourceView,region);
@@ -868,7 +868,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -882,7 +882,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -928,7 +928,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -942,7 +942,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -985,7 +985,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -999,7 +999,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -1041,7 +1041,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -1055,7 +1055,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -1096,7 +1096,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
     //DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -1110,7 +1110,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -1151,7 +1151,7 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     int val=0;
     for (int i=0;i<sourceShape[0];i++) {
@@ -1165,7 +1165,7 @@ void DataTypesTestCase::testSlicing() {
     }
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
@@ -1206,12 +1206,12 @@ void DataTypesTestCase::testSlicing() {
 
     // Create source and target views.
     int len = DataTypes::noValues(sourceShape);
-    DataVector sourceData(len, 2.0, len);
+    RealVectorType sourceData(len, 2.0, len);
 //     DataArrayView sourceView(sourceData, sourceShape);
     sourceData[0]=5;
 
     len = DataTypes::noValues(targetShape);
-    DataVector targetData(len, 2.0, len);
+    RealVectorType targetData(len, 2.0, len);
 //     DataArrayView targetView(targetData, targetShape);
 
     // Copy source view to target view.
diff --git a/escriptcore/test/DataVectorTestCase.cpp b/escriptcore/test/DataVectorTestCase.cpp
index 976b034..0b6798d 100644
--- a/escriptcore/test/DataVectorTestCase.cpp
+++ b/escriptcore/test/DataVectorTestCase.cpp
@@ -14,14 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataVector.h>
 
 #include "DataVectorTestCase.h"
 
-#include "escript/DataVector.h"
-#include "esysUtils/EsysException.h"
+#include <escript/EsysException.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
@@ -29,7 +26,7 @@
 using namespace std;
 using namespace CppUnit;
 using namespace escript;
-using namespace esysUtils;
+using namespace escript::DataTypes;
 
 
 void DataVectorTestCase::testAll()
@@ -39,14 +36,14 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCreate and check an empty DataVector object." << endl;
 
-    DataVector vec;
+    RealVectorType vec;
     CPPUNIT_ASSERT(vec.size() == 0);
   }
  
   {
     cout << "\tCheck DataVector resize operation." << endl;
 
-    DataVector vec;
+    RealVectorType vec;
     CPPUNIT_ASSERT(vec.size() == 0);
 
     vec.resize(1,0,1);
@@ -62,20 +59,20 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCreate and check DataVector objects of various sizes." << endl;
 
-    DataVector vec1(0,0,1);
+    RealVectorType vec1(0,0,1);
     CPPUNIT_ASSERT(vec1.size() == 0);
 
-    DataVector vec2(1,0,1);
+    RealVectorType vec2(1,0,1);
     CPPUNIT_ASSERT(vec2.size() == 1);
 
-    DataVector vec3(1000,0,1);
+    RealVectorType vec3(1000,0,1);
     CPPUNIT_ASSERT(vec3.size() == 1000);
   }
 
   {
     cout << "\tAssign and check various elements to a DataVector." << endl;
 
-    DataVector vec(1000,0,1);
+    RealVectorType vec(1000,0,1);
 
     for (int i=0; i < 1000; i++) {
       vec[i] = i;
@@ -97,13 +94,13 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCheck DataVector copy constructor." << endl;
 
-    DataVector vec1(1000,0,1);
+    RealVectorType vec1(1000,0,1);
 
     for (int i=0; i < 1000; i++) {
       vec1[i] = i;
     }
 
-    DataVector vec2(vec1);
+    RealVectorType vec2(vec1);
 
     CPPUNIT_ASSERT(vec1.size() == vec2.size());
 
@@ -115,13 +112,13 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCheck DataVector = operator." << endl;
 
-    DataVector vec1(1000,0,1);
+    RealVectorType vec1(1000,0,1);
 
     for (int i=0; i < 1000; i++) {
       vec1[i] = i;
     }
 
-    DataVector vec2;
+    RealVectorType vec2;
 
     vec2 = vec1;
 
@@ -135,13 +132,13 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCheck DataVector == operator." << endl;
 
-    DataVector vec1(1000,0,1);
+    RealVectorType vec1(1000,0,1);
 
     for (int i=0; i < 1000; i++) {
       vec1[i] = i;
     }
 
-    DataVector vec2;
+    RealVectorType vec2;
 
     vec2 = vec1;
 
@@ -151,26 +148,27 @@ void DataVectorTestCase::testAll()
   {
     cout << "\tCheck DataVector != operator." << endl;
 
-    DataVector vec1(1000,0,1);
+    RealVectorType vec1(1000,0,1);
 
     for (int i=0; i < 1000; i++) {
       vec1[i] = i;
     }
 
-    DataVector vec2;
+    RealVectorType vec2;
 
     CPPUNIT_ASSERT(vec1 != vec2);
   }
+/*  
   #if defined DOASSERT
   {
     cout << "\tCheck DataVector index exception." << endl;
 
-    DataVector vec(1000,0,1);
+    RealVectorType vec(1000,0,1);
 
     CPPUNIT_ASSERT_THROW( (void) vec[1001],  EsysException);
   }
   #endif
-
+*/
 }
 
 TestSuite* DataVectorTestCase::suite()
diff --git a/escriptcore/test/EsysExceptionTestCase.cpp b/escriptcore/test/EsysExceptionTestCase.cpp
new file mode 100644
index 0000000..a0c0691
--- /dev/null
+++ b/escriptcore/test/EsysExceptionTestCase.cpp
@@ -0,0 +1,165 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+
+#include "EsysExceptionTestCase.h"
+#include "escript/EsysException.h"
+
+#include <cppunit/TestCaller.h>
+#include <iostream>
+
+using namespace std;
+using namespace CppUnit;
+using namespace escript;
+
+class DerivedEx : public EsysException
+{
+    typedef EsysException Parent;
+public:
+    DerivedEx(const string& str) : Parent(str) {}
+};
+
+void EsysExceptionTestCase::testCase1()
+{
+    string ex1Text("My first funny exception message.");
+    EsysException ex1(ex1Text);
+
+    string ex1String = ex1.what();
+
+    //
+    // exception text should contain entered exception message
+    //
+    CPPUNIT_ASSERT(ex1String.find(ex1Text) != string::npos);
+
+    //
+    // copy constructed exception should match original
+    //
+    EsysException copyEx(ex1);
+    string copyString = copyEx.what();
+    CPPUNIT_ASSERT(ex1String == copyString);
+
+    //
+    // check throw/catch mechanism
+    //
+    string ex2Text("My second funny exception message.");
+    try {
+        EsysException ex2(ex2Text);
+        throw(ex2);
+    } catch (EsysException& e) {
+        //
+        // exception text should contain entered exception message
+        //
+        string eString = e.what();
+        CPPUNIT_ASSERT(eString.find(ex2Text) != string::npos);
+    }
+}
+
+//
+// test derived EsysException
+//
+void EsysExceptionTestCase::testCase2()
+{
+    string ex1Text("asdjhieurncidhfjsnfkjefkjndfjkhsdrdfjksdhfweh");
+    DerivedEx ex1(ex1Text);
+
+    //
+    // exception text should contain entered exception message
+    //
+    string ex1String = ex1.what();
+    CPPUNIT_ASSERT(ex1String.find(ex1Text) != string::npos);
+
+    //
+    // copy constructed exception should match original
+    //
+    DerivedEx copyEx(ex1);
+    string copyString = copyEx.what();
+    CPPUNIT_ASSERT(ex1String == copyString);
+
+    //
+    // check throw/catch mechanism
+    //
+    string ex2Text("pjkkjhdfbnkjerbkjsduflfkjahalkgjlklhjhj");
+    try {
+
+        DerivedEx ex2(ex2Text);
+        throw(ex2);
+    } catch (DerivedEx& e) {
+        //
+        // exception text should contain entered exception message
+        //
+        string eString = e.what();
+        CPPUNIT_ASSERT(eString.find(ex2Text) != string::npos);
+    }
+
+    //
+    // check throw/catch mechanism
+    //
+    string ex3Text("irfjvniouf;iarhglAKSDIghlAKSDghladg");
+    try {
+
+        DerivedEx ex3(ex3Text);
+        throw(ex3);
+    } catch (EsysException& e) {
+        //
+        // exception text should contain entered exception message
+        //
+        std::string eString = e.what();
+        CPPUNIT_ASSERT(eString.find(ex3Text) != string::npos);
+    }
+
+    //
+    // test to see if exception name gets lost on rethrow
+    //
+    try {
+        try {
+            DerivedEx ex4("D ex4 text.");
+            throw ex4;
+        }
+        catch (EsysException& e) {
+            cout << endl << e.what() << endl;
+            throw;
+        }
+    } catch (EsysException& e) {
+        cout << e.what() << endl;
+    }
+
+    cout << "Test EsysException may be caught as a std::exception" << endl;
+    try {
+        DerivedEx ex4("Exception caught as std::exception");
+        throw ex4;
+    } catch (exception& e) {
+        // cout << e.what() << endl;
+        CPPUNIT_ASSERT(e.what() == string("Exception caught as std::exception"));
+    } catch (...) {
+        //
+        // if the exception is caught here there is a problem
+        CPPUNIT_ASSERT(false);
+    }
+}
+
+TestSuite* EsysExceptionTestCase::suite()
+{
+    //
+    // create the suite of tests to perform.
+    TestSuite *testSuite = new TestSuite("EsysExceptionTestCase");
+
+    testSuite->addTest(new TestCaller<EsysExceptionTestCase>(
+                "testCase1",&EsysExceptionTestCase::testCase1));
+    testSuite->addTest(new TestCaller<EsysExceptionTestCase>(
+                "testCase2",&EsysExceptionTestCase::testCase2));
+    return testSuite;
+}
+
diff --git a/esysUtils/test/EsysExceptionTestCase.h b/escriptcore/test/EsysExceptionTestCase.h
similarity index 97%
rename from esysUtils/test/EsysExceptionTestCase.h
rename to escriptcore/test/EsysExceptionTestCase.h
index 0507d1a..cfdb1af 100644
--- a/esysUtils/test/EsysExceptionTestCase.h
+++ b/escriptcore/test/EsysExceptionTestCase.h
@@ -24,7 +24,6 @@
 class EsysExceptionTestCase : public CppUnit::TestFixture
 {
 public:
-   void testCase0();
    void testCase1();
    void testCase2();
 
diff --git a/esysUtils/test/EsysFileWriterTestCase.cpp b/escriptcore/test/FileWriterTestCase.cpp
similarity index 75%
rename from esysUtils/test/EsysFileWriterTestCase.cpp
rename to escriptcore/test/FileWriterTestCase.cpp
index 199745d..84609bc 100644
--- a/esysUtils/test/EsysFileWriterTestCase.cpp
+++ b/escriptcore/test/FileWriterTestCase.cpp
@@ -14,21 +14,21 @@
 *
 *****************************************************************************/
 
+#include <escript/FileWriter.h>
+
+#include "FileWriterTestCase.h"
 
-#include "EsysFileWriterTestCase.h"
-#include "esysUtils/esysFileWriter.h"
 #include <cppunit/TestCaller.h>
 #include <fstream>
 #include <sstream>
 
-#include "esysUtils/Esys_MPI.h"
 
 using namespace CppUnit;
 using namespace std;
 
-using esysUtils::FileWriter;
+using escript::FileWriter;
 
-void EsysFileWriterTestCase::testAll()
+void FileWriterTestCase::testAll()
 {
     const string filename("fwtest_file");
     int mpisize=1, mpirank=0;
@@ -61,12 +61,17 @@ void EsysFileWriterTestCase::testAll()
     CPPUNIT_ASSERT(fileSize(filename) == 100);
 
     CPPUNIT_ASSERT(fw->openFile(filename) == true);
-    oss.write(data, 4);
     cout << "\tTest writeShared." << endl;
-    CPPUNIT_ASSERT(fw->writeShared(oss) == true);
-    CPPUNIT_ASSERT(oss.str().length() == 0);
+    if (mpirank == mpisize-1) {
+        oss.write(data, 4);
+        CPPUNIT_ASSERT(fw->writeShared(oss) == true);
+        CPPUNIT_ASSERT(oss.str().length() == 0);
+    }
     fw->close();
-    CPPUNIT_ASSERT(fileSize(filename) == 4*mpisize);
+#ifdef ESYS_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+    CPPUNIT_ASSERT_EQUAL(fileSize(filename), 4L);
 
     CPPUNIT_ASSERT(fw->openFile(filename) == true);
     oss.write(data, 4);
@@ -74,11 +79,14 @@ void EsysFileWriterTestCase::testAll()
     CPPUNIT_ASSERT(fw->writeAt(oss, 16*(mpirank+1)) == true);
     CPPUNIT_ASSERT(oss.str().length() == 0);
     fw->close();
+#ifdef ESYS_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
     CPPUNIT_ASSERT(fileSize(filename) == 16*mpisize+4);
     delete fw;
 }
 
-long EsysFileWriterTestCase::fileSize(string filename)
+long FileWriterTestCase::fileSize(string filename)
 {
     ifstream f(filename.c_str());
     f.seekg(0, f.end);
@@ -90,11 +98,11 @@ long EsysFileWriterTestCase::fileSize(string filename)
     return pos;
 }
 
-TestSuite* EsysFileWriterTestCase::suite()
+TestSuite* FileWriterTestCase::suite()
 {
-    TestSuite *testSuite = new TestSuite("EsysFileWriterTestCase");
-    testSuite->addTest(new TestCaller<EsysFileWriterTestCase>(
-                "testAll",&EsysFileWriterTestCase::testAll));
+    TestSuite *testSuite = new TestSuite("FileWriterTestCase");
+    testSuite->addTest(new TestCaller<FileWriterTestCase>(
+                "testAll",&FileWriterTestCase::testAll));
     return testSuite;
 }
 
diff --git a/esysUtils/test/EsysFileWriterTestCase.h b/escriptcore/test/FileWriterTestCase.h
similarity index 85%
rename from esysUtils/test/EsysFileWriterTestCase.h
rename to escriptcore/test/FileWriterTestCase.h
index 425dfb8..f9fe8a2 100644
--- a/esysUtils/test/EsysFileWriterTestCase.h
+++ b/escriptcore/test/FileWriterTestCase.h
@@ -15,13 +15,13 @@
 *****************************************************************************/
 
 
-#ifndef __ESYS_FILEWRITERTESTCASE_H__
-#define __ESYS_FILEWRITERTESTCASE_H__
+#ifndef __ESCRIPT_FILEWRITERTESTCASE_H__
+#define __ESCRIPT_FILEWRITERTESTCASE_H__
 
 #include <cppunit/TestFixture.h>
 #include <cppunit/TestSuite.h>
 
-class EsysFileWriterTestCase : public CppUnit::TestFixture
+class FileWriterTestCase : public CppUnit::TestFixture
 {
 public:
     void testAll();
diff --git a/escriptcore/test/FunctionSpaceTestCase.cpp b/escriptcore/test/FunctionSpaceTestCase.cpp
index dd787e9..74e37f1 100644
--- a/escriptcore/test/FunctionSpaceTestCase.cpp
+++ b/escriptcore/test/FunctionSpaceTestCase.cpp
@@ -14,24 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#ifdef BADPYTHONMACROS
-// This hack is required for BSD/OSX builds with python 2.7
-// (and possibly others).  It must be the first include.
-// From bug reports online it seems that python redefines
-// some c macros that are functions in c++.
-// c++ doesn't like that!
-#include <Python.h>
-#undef BADPYTHONMACROS
-#endif
+#include <escript/FunctionSpace.h>
 
 #include "FunctionSpaceTestCase.h"
 
-#include "escript/FunctionSpace.h"
-#include "escript/NullDomain.h"
+#include <escript/NullDomain.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
diff --git a/escriptcore/test/SConscript b/escriptcore/test/SConscript
index c093b14..807f857 100644
--- a/escriptcore/test/SConscript
+++ b/escriptcore/test/SConscript
@@ -20,11 +20,11 @@ local_env = env.Clone()
 
 if local_env['cppunit']:
     # get the test source file names
-    sources = Glob('*.cpp')+Glob('*.c')
-    testname='escript_UnitTest'
+    sources = Glob('*.cpp')
+    testname = 'escript_UnitTest'
 
     # build the executable
-    local_env.Append(LIBS=['escript', 'esysUtils']+env['cppunit_libs'])
+    local_env.AppendUnique(LIBS=env['escript_libs']+env['cppunit_libs'])
     # some of these test files are too large to optimize
     local_env['CCFLAGS'] = re.sub('-O[0-9]', '-g', str(local_env['CCFLAGS']))
     program = local_env.Program(testname, sources)
@@ -37,10 +37,10 @@ if local_env['cppunit']:
     Alias("run_tests", testname+'.passed')
 
     # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/escriptcore/test", ('./'+testname,))
+    from grouptest import GroupTest
+    tgroup = GroupTest("escriptcpp", "$BINRUNNER ", (), "", "$BUILD_DIR/escriptcore/test", ('./'+testname,))
     TestGroups.append(tgroup)
 
 # configure python unit tests
-local_env.SConscript(dirs = ['#/escriptcore/test/python'], variant_dir='python', duplicate=0, exports=['py_wrapper_lib'])
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/escriptcore/test/SharedDataTestCase.cpp b/escriptcore/test/SharedDataTestCase.cpp
index a701602..1a9bb78 100644
--- a/escriptcore/test/SharedDataTestCase.cpp
+++ b/escriptcore/test/SharedDataTestCase.cpp
@@ -15,13 +15,11 @@
 
 // The purpose of these tests is to check for unwanted sharing of between Data objects
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/Data.h>
+#include <escript/TestDomain.h>
 
 #include "SharedDataTestCase.h"
-#include "escript/Data.h"
-#include "escript/EscriptParams.h"
+#include <escript/EscriptParams.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
@@ -31,9 +29,17 @@ using namespace std;
 using namespace CppUnit;
 using namespace escript::DataTypes;
 
+FunctionSpace getSharedFs()
+{
+    static FunctionSpace fs=getTestDomainFunctionSpace(1,50,1);
+    return fs;
+  
+}
+
+
 // Create a data, involve it in a lazy expression. Then modify the original
 // and see if the value of the lazy is affected.
-#define TESTEQOP(OP) { Data d((double)42,DataTypes::scalarShape); Data L=d.delay(); L-=Data((double)42,DataTypes::scalarShape); d OP Data(2,DataTypes::scalarShape); CPPUNIT_ASSERT(L.Lsup()<0.001);} 
+#define TESTEQOP(OP) { Data d((double)42,DataTypes::scalarShape, getSharedFs(),false); Data L=d.delay(); L-=Data((double)42,DataTypes::scalarShape, getSharedFs(),false); d OP Data(2,DataTypes::scalarShape, getSharedFs(),false); CPPUNIT_ASSERT(L.Lsup()<0.001);} 
 
 // Test if the copy constructor shares a DataAbstract with its originator
 void SharedDataTestCase::testEQ()
@@ -46,16 +52,17 @@ void SharedDataTestCase::testEQ()
   TESTEQOP(*=)
   cout << "\tOK" << endl << "Testing /=";
   TESTEQOP(/=)
+  cout << "\tOK" << endl;
 }
 
 // Test for shared data caused by using a copy constructor
 void SharedDataTestCase::testCC()
 {
   cout << endl;
-  Data d(42, DataTypes::scalarShape);
+  Data d(42, DataTypes::scalarShape, getSharedFs(),false);
   Data shared(d);
-  d+=Data(20,DataTypes::scalarShape);
-  shared-=Data(42,DataTypes::scalarShape);
+  d+=Data(20,DataTypes::scalarShape, getSharedFs(),false);
+  shared-=Data(42,DataTypes::scalarShape, getSharedFs(),false);
   CPPUNIT_ASSERT(shared.Lsup()<0.001);
 }
 
@@ -63,28 +70,28 @@ void SharedDataTestCase::testCC()
 void SharedDataTestCase::testAssign()
 {
   cout << endl;
-  Data d(42, DataTypes::scalarShape);
+  Data d(42, DataTypes::scalarShape, getSharedFs(),false);
   Data shared=d;
-  d+=Data(20,DataTypes::scalarShape);
-  shared-=Data(42,DataTypes::scalarShape);
+  d+=Data(20,DataTypes::scalarShape, getSharedFs(),false);
+  shared-=Data(42,DataTypes::scalarShape, getSharedFs(),false);
   CPPUNIT_ASSERT(shared.Lsup()<0.001);
 }
 
 void SharedDataTestCase::testSetToZero()
 {
-  Data d((double)42,DataTypes::scalarShape); 
+  Data d((double)42,DataTypes::scalarShape, getSharedFs(),false); 
   Data L=d.delay(); 
-  L-=Data((double)42,DataTypes::scalarShape);
+  L-=Data((double)42,DataTypes::scalarShape, getSharedFs(),false);
   d.setToZero();
   CPPUNIT_ASSERT(L.Lsup()<0.001);
 }
 
 void SharedDataTestCase::testSetTaggedValueFromCPP()
 {
-  Data d((double)42,DataTypes::scalarShape);
+  Data d((double)42,DataTypes::scalarShape, getSharedFs(),false);
   d.tag(); 
   Data L=d.delay();
-  ValueType v(1,17);
+  RealVectorType v(1,17);
   d.setTaggedValueFromCPP(1,DataTypes::scalarShape,v);
   L.resolve();
   // at this point, d should have a tag and L should not
@@ -95,7 +102,7 @@ void SharedDataTestCase::testSetTaggedValueFromCPP()
 
 void SharedDataTestCase::testGetDataAtOffset()
 {
-  Data d((double)42,DataTypes::scalarShape);
+  Data d((double)42,DataTypes::scalarShape, getSharedFs(),false);
   Data L=d.delay();
   // now change the data directly
   d.requireWrite();
@@ -105,7 +112,7 @@ void SharedDataTestCase::testGetDataAtOffset()
 
 void SharedDataTestCase::testGetDataPoint()
 {
-  Data d((double)42,DataTypes::scalarShape);
+  Data d((double)42,DataTypes::scalarShape, getSharedFs(),false);
   Data L=d.delay();
   // now change the data directly
   d.requireWrite();
@@ -115,10 +122,15 @@ void SharedDataTestCase::testGetDataPoint()
 
 void SharedDataTestCase::testGetSampleRW()
 {
-  Data d((double)42,DataTypes::scalarShape);
+  Data d((double)42,DataTypes::scalarShape, getSharedFs(),false);
   Data L=d.delay();
+  
+  std::cerr << "Please ignore the shared object message. We are testing the error checking.\n";
+#ifdef SLOWSHARECHECK
   // now change the data directly
   CPPUNIT_ASSERT_THROW(*d.getSampleDataRW(0)=17, DataException);
+#endif  
+  std::cerr << "End ignore message\n";
   // Now try again properly 
   d.requireWrite();
   *d.getSampleDataRW(0)=17;
diff --git a/escriptcore/test/TaipanTestCase.cpp b/escriptcore/test/TaipanTestCase.cpp
index 7395ab1..96d64d2 100644
--- a/escriptcore/test/TaipanTestCase.cpp
+++ b/escriptcore/test/TaipanTestCase.cpp
@@ -14,21 +14,17 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataTypes.h>
 
 #include "TaipanTestCase.h"
 
-#include "escript/Taipan.h"
-#include "esysUtils/EsysException.h"
+#include <escript/Taipan.h>
 
 #include <cppunit/TestCaller.h>
 #include <iostream>
 
 using namespace std;
 using namespace escript;
-using namespace esysUtils;
 using namespace CppUnit;
 
 void TaipanTestCase::testN1() {
diff --git a/escriptcore/test/escript_UnitTest.cpp b/escriptcore/test/escript_UnitTest.cpp
index d6163bb..f4e479f 100644
--- a/escriptcore/test/escript_UnitTest.cpp
+++ b/escriptcore/test/escript_UnitTest.cpp
@@ -14,24 +14,27 @@
 *
 *****************************************************************************/
 
+#include <escript/EsysMPI.h>
 
-#include <iostream>
-
-#include "DataEmptyTestCase.h"
+//#include "DataAlgorithmAdapterTestCase.h"
 #include "DataConstantTestCase.h"
-#include "DataTaggedTestCase.h"
+#include "DataEmptyTestCase.h"
 #include "DataExpandedTestCase.h"
 #include "DataFactoryTestCase.h"
-#include "DataBlocks2DTestCase.h"
-#include "DataVectorTestCase.h"
-#include "TaipanTestCase.h"
-#include "DataAlgorithmAdapterTestCase.h"
-#include "FunctionSpaceTestCase.h"
-#include "DataTestCase.h"
+#include "DataLazyTestCase.h"
 #include "DataMathsTestCase.h"
+#include "DataTaggedTestCase.h"
+#include "DataTestCase.h"
 #include "DataTypesTestCase.h"
-#include "DataLazyTestCase.h"
+#include "DataVectorTestCase.h"
+#include "EsysExceptionTestCase.h"
+#include "FileWriterTestCase.h"
+#include "FunctionSpaceTestCase.h"
 #include "SharedDataTestCase.h"
+#include "TaipanTestCase.h"
+#include "DataCombinationsTestCase.h"
+
+#include <iostream>
 
 #include <cppunit/CompilerOutputter.h>
 #include <cppunit/TestResult.h>
@@ -40,8 +43,6 @@
 
 using namespace CppUnit;
 
-#include "esysUtils/Esys_MPI.h"
-
 int main(int argc, char* argv[])
 {
 #ifdef ESYS_MPI
@@ -55,21 +56,22 @@ int main(int argc, char* argv[])
     TestResultCollector result;
     controller.addListener(&result);
 	TestRunner runner;
+	runner.addTest(EsysExceptionTestCase::suite());
 	runner.addTest(SharedDataTestCase::suite());
 	runner.addTest(DataTypesTestCase::suite());
-	runner.addTest(DataMathsTestCase::suite());
+	runner.addTest(DataFactoryTestCase::suite());
 	runner.addTest(DataEmptyTestCase::suite());
 	runner.addTest(DataConstantTestCase::suite());
  	runner.addTest(DataTaggedTestCase::suite());
 	runner.addTest(DataExpandedTestCase::suite());
-	runner.addTest(DataFactoryTestCase::suite());
-	runner.addTest(DataBlocks2DTestCase::suite());
 	runner.addTest(DataVectorTestCase::suite());
+	runner.addTest(DataMathsTestCase::suite());
+	runner.addTest(FileWriterTestCase::suite());
 	runner.addTest(TaipanTestCase::suite());
- 	runner.addTest(DataAlgorithmAdapterTestCase::suite());
 	runner.addTest(FunctionSpaceTestCase::suite());
 	runner.addTest(DataTestCase::suite());
 	runner.addTest(DataLazyTestCase::suite());
+	runner.addTest(DataCombinationsTestCase::suite());
 
 	runner.run(controller);
     CompilerOutputter outputter( &result, std::cerr );
diff --git a/escriptcore/test/multi_arrayTestCase.cpp b/escriptcore/test/multi_arrayTestCase.cpp
index c728ce6..f1bed91 100644
--- a/escriptcore/test/multi_arrayTestCase.cpp
+++ b/escriptcore/test/multi_arrayTestCase.cpp
@@ -14,9 +14,7 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/DataTypes.h>
 
 #include "multi_arrayTestCase.h"
 
diff --git a/escriptcore/test/python/SConscript b/escriptcore/test/python/SConscript
index 288186f..251f8a4 100644
--- a/escriptcore/test/python/SConscript
+++ b/escriptcore/test/python/SConscript
@@ -50,12 +50,12 @@ env.Alias('py_tests', [splitext(x)[0]+'.passed' for x in testruns])
 
 # run all tests
 program = local_env.RunPyUnitTest(alltestruns)
-Depends(program, [py_wrapper_lib, 'install_escript_py', 'build_py_tests'])
+Requires(program, ['install_escript', 'build_py_tests'])
 if env['usempi']:
-    Depends(program, env['prefix']+"/lib/pythonMPI")
+    Requires(program, ['install_pythonMPI'])
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("ESCRIPT_TEST_DATA_ROOT","$BATCH_ROOT/escriptcore/test/python"),('ESCRIPT_WORKDIR','$BUILD_DIR/escriptcore/test/python')),"$BATCH_ROOT/escriptcore/test/python","$BUILD_DIR/escriptcore/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("escript", "$PYTHONRUNNER ", (("ESCRIPT_TEST_DATA_ROOT","$BATCH_ROOT/escriptcore/test/python"),('ESCRIPT_WORKDIR','$BUILD_DIR/escriptcore/test/python')), "$BATCH_ROOT/escriptcore/test/python","$BUILD_DIR/escriptcore/test/python", testruns)
 TestGroups.append(tgroup)
 
diff --git a/escriptcore/test/python/run_symbolic.py b/escriptcore/test/python/run_symbolic.py
index 97ad705..2904657 100644
--- a/escriptcore/test/python/run_symbolic.py
+++ b/escriptcore/test/python/run_symbolic.py
@@ -37,6 +37,7 @@ Test suite for the escript.symbolic module
 __author__="Cihan Altinay"
 
 from esys.escript import *
+from esys.escript.symbolic import *
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 import numpy
@@ -2465,23 +2466,23 @@ class Test_SymbolicTestCase(unittest.TestCase):
         self.assertAlmostEqual(Lsup(res-ref), 0.0, self.TOL_DIGITS, "wrong result")
 
     #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    def test_nonsymmetric_Symbol_rank2(self):
+    def test_antisymmetric_Symbol_rank2(self):
         shape=(4, 4)
         x=Symbol('x', shape)
-        y=nonsymmetric(x)
+        y=antisymmetric(x)
         self.assertTrue(isinstance(y, Symbol), "wrong type of result")
         xx=numpy.array([[-0.66708916764681492, -0.74238465201633974, -0.079353161755557622, 0.30257601442541904], 
 [0.20925829383746208, -0.022009924374370327, 0.10502873236092491, -0.15884250966193902], [0.1797060494083087, 
 0.023139755304512288, 0.45170178441767739, 0.48507784807517917], [0.059968719280828253, -0.41251906740163857, 
 0.2910687977002866, -0.63445501785955627]])
-        ref=nonsymmetric(xx)
+        ref=antisymmetric(xx)
         res=Evaluator(y)(x=xx)
         self.assertAlmostEqual(Lsup(res-ref), 0.0, self.TOL_DIGITS, "wrong result")
     #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    def test_nonsymmetric_Symbol_rank4(self):
+    def test_antisymmetric_Symbol_rank4(self):
         shape=(2, 3, 2, 3)
         x=Symbol('x', shape)
-        y=nonsymmetric(x)
+        y=antisymmetric(x)
         self.assertTrue(isinstance(y, Symbol), "wrong type of result")
         xx=numpy.array([[[[0.55376735565955926, -0.26986726052539045, 0.99540130065296051], [-0.66278909769325844, 
 0.47418600478729656, -0.57224349831177945]], [[0.26869674765376628, 0.87439268666551895, -0.58960755596620462], 
@@ -2491,7 +2492,7 @@ class Test_SymbolicTestCase(unittest.TestCase):
 [[0.38794533456732516, 0.68280080521959974, 0.95500219859151003], [-0.58249140643372388, -0.38374270163626578, 
 0.49303266750299435]], [[-0.34681269192922426, -0.24778438052869323, 0.72024308366404277], [0.768229749916157, 
 -0.79962778314547212, 0.70336464030375567]]]])
-        ref=nonsymmetric(xx)
+        ref=antisymmetric(xx)
         res=Evaluator(y)(x=xx)
         self.assertAlmostEqual(Lsup(res-ref), 0.0, self.TOL_DIGITS, "wrong result")
     #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff --git a/escriptcore/test/python/run_testdomain.py b/escriptcore/test/python/run_testdomain.py
index c5d9f9a..1807e02 100644
--- a/escriptcore/test/python/run_testdomain.py
+++ b/escriptcore/test/python/run_testdomain.py
@@ -23,6 +23,7 @@ __license__="""Licensed under the Apache License, version 2.0
 http://www.apache.org/licenses/LICENSE-2.0"""
 __url__="https://launchpad.net/escript-finley"
 
+import numpy
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 import sys
diff --git a/escriptcore/test/python/test_linearPDEs.py b/escriptcore/test/python/test_linearPDEs.py
index f0f4e35..e9ee508 100644
--- a/escriptcore/test/python/test_linearPDEs.py
+++ b/escriptcore/test/python/test_linearPDEs.py
@@ -32,11 +32,18 @@ Test suite for linearPDEs class
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
 from esys.escript.util import Lsup,kronecker,interpolate,whereZero, outer, swap_axes
-from esys.escript import Function,FunctionOnBoundary,FunctionOnContactZero,Solution,ReducedSolution,Vector,ContinuousFunction,Scalar, ReducedFunction,ReducedFunctionOnBoundary,ReducedFunctionOnContactZero,Data, Tensor4, Tensor, getEscriptParamInt, canInterpolate, getMPISizeWorld
+from esys.escript import Function,FunctionOnBoundary,FunctionOnContactZero,Solution,ReducedSolution,Vector,ContinuousFunction,Scalar, ReducedFunction,ReducedFunctionOnBoundary,ReducedFunctionOnContactZero,Data, Tensor4, Tensor, canInterpolate, getMPISizeWorld, hasFeature
 from esys.escript.linearPDEs import SolverBuddy, LinearPDE,IllegalCoefficientValue,Poisson, IllegalCoefficientFunctionSpace, TransportPDE, IllegalCoefficient, Helmholtz, LameEquation, SolverOptions
 import numpy
 import esys.escriptcore.utestselect as unittest
 
+mpisize = getMPISizeWorld()
+skip_amg = hasFeature("paso") and mpisize > 1
+# Transport problems only work with paso
+no_paso = not hasFeature("paso")
+no_direct = not hasFeature('trilinos') and not hasFeature('PASO_DIRECT') and mpisize == 1
+skip_muelu_long = False #no_paso and hasFeature("longindex")
+
 class Test_linearPDEs(unittest.TestCase):
     TOL=1.e-6
     SOLVER_TOL=1.e-10
@@ -446,7 +453,7 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         sb.setInnerTolerance(0.4)
         self.assertTrue(sb.getInnerTolerance() == 0.4, "InnerTolerance is wrong.")
 
-        self.assertTrue(sb.getDropTolerance() == 0.01, "initial DropTolerance is wrong.")
+        self.assertTrue(sb.getDropTolerance() == 0.0005, "initial DropTolerance is wrong.")
         self.assertRaises(ValueError,sb.setDropTolerance,-1)
         sb.setDropTolerance(0.5)
         self.assertTrue(sb.getDropTolerance() == 0.5, "DropDropTolerance is wrong.")
@@ -540,10 +547,6 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         self.assertTrue(sb.getPackage() == so.PASO, "PASO is not set.")
         sb.setPackage(so.CUSP)
         self.assertTrue(sb.getPackage() == so.CUSP, "CUSP is not set.")
-        sb.setPackage(so.SUPER_LU)
-        self.assertTrue(sb.getPackage() == so.SUPER_LU, "SUPER_LU is not set.")
-        sb.setPackage(so.PASTIX)
-        self.assertTrue(sb.getPackage() == so.PASTIX, "PASTIX is not set.")
         sb.setPackage(so.MKL)
         self.assertTrue(sb.getPackage() == so.MKL, "MKL is not set.")
         sb.setPackage(so.UMFPACK)
@@ -554,10 +557,10 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         self.assertTrue(sb.getSolverMethod() == so.DEFAULT, "initial SolverMethod is wrong.")
         self.assertRaises(ValueError,sb.setSolverMethod,-1)
 
-        if getMPISizeWorld() == 1 and not getEscriptParamInt('PASO_DIRECT'):
+        if no_direct:
             with self.assertRaises(ValueError) as package:
                 sb.setSolverMethod(so.DIRECT)
-            self.assertTrue('SolverOptionsException' in str(package.exception))
+            self.assertTrue('not compiled' in str(package.exception))
         else:
             sb.setSolverMethod(so.DIRECT)
             self.assertTrue(sb.getSolverMethod() == so.DIRECT, "DIRECT is not set.")
@@ -599,13 +602,11 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         self.assertTrue(sb.getPreconditioner() == so.ILUT, "ILUT is not set.")
         sb.setPreconditioner(so.JACOBI)
         self.assertTrue(sb.getPreconditioner() == so.JACOBI, "JACOBI is not set.")
-        if getEscriptParamInt('DISABLE_AMG', 0):
-            print("AMG test disabled on MPI build")
+        if skip_amg:
+            print("Paso AMG test disabled with more than 1 MPI rank")
         else:
             sb.setPreconditioner(so.AMG)
             self.assertTrue(sb.getPreconditioner() == so.AMG, "AMG is not set.")
-        sb.setPreconditioner(so.REC_ILU)
-        self.assertTrue(sb.getPreconditioner() == so.REC_ILU, "REC_ILU is not set.")
         sb.setPreconditioner(so.GAUSS_SEIDEL)
         self.assertTrue(sb.getPreconditioner() == so.GAUSS_SEIDEL, "GAUSS_SEIDEL is not set.")
         sb.setPreconditioner(so.RILU)
@@ -826,10 +827,12 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
 
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient("d_contact").isEmpty(),"d_contact is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_y_contact_Scalar(self):
-        d=self.domain.getDim()
         if self.domain.supportsContactElements():
+            d=self.domain.getDim()
             mypde=LinearPDE(self.domain,numSolutions=3,debug=self.DEBUG)
             mypde.setValue(y_contact=1.)
             coeff=mypde.getCoefficient("y_contact")
@@ -837,6 +840,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
 
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_A_reduced_Scalar(self):
         d=self.domain.getDim()
@@ -928,6 +933,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
 
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_Scalar(self):
         if self.domain.supportsContactElements():
             d=self.domain.getDim()
@@ -938,6 +946,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
 
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_r_Scalar(self):
         d=self.domain.getDim()
@@ -1110,6 +1120,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),FS(self.domain),1,1))
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient(coeff_name).isEmpty(),"%s is empty after reset of right hand side coefficients"%coeff_name)
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_y_contact_reduced_Scalar_using_y_contact(self):
         if self.domain.supportsContactElements():
@@ -1126,6 +1138,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),FS(self.domain),1))
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient(coeff_name).isEmpty(),"%s is not empty after reset of right hand side coefficients"%coeff_name)
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     #
     #  set coefficients for systems:
@@ -1204,6 +1218,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),FunctionOnContactZero(self.domain),self.N,self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient("d_contact").isEmpty(),"d_contact is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_System(self):
         if self.domain.supportsContactElements():
             d=self.domain.getDim()
@@ -1213,6 +1230,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),FunctionOnContactZero(self.domain),self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_A_reduced_System(self):
         d=self.domain.getDim()
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
@@ -1286,6 +1306,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnContactZero(self.domain),self.N,self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_System(self):
         if self.domain.supportsContactElements():
             d=self.domain.getDim()
@@ -1295,6 +1318,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnContactZero(self.domain),self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_r_System(self):
         d=self.domain.getDim()
         mypde=LinearPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -1465,6 +1491,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(),mypde.getNumEquations()),((self.N,self.N),FS(self.domain),self.N,self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient(coeff_name).isEmpty(),"%s is empty after reset of right hand side coefficients"%coeff_name)
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_y_contact_reduced_System_using_y_contact(self):
         if self.domain.supportsContactElements():
@@ -1481,6 +1509,8 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),FS(self.domain),self.N))
             mypde.resetRightHandSideCoefficients()
             self.assertTrue(mypde.getCoefficient(coeff_name).isEmpty(),"%s is not empty after reset of right hand side coefficients"%coeff_name)
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_resetCoefficient_HomogeneousConstraint(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
@@ -1584,7 +1614,9 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             d_contact=5*numpy.ones((self.N,self.N))
             d_contact[0,1]=0.
             mypde.setValue(d_contact=d_contact)
-            self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
+            self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_symmetryCheckFalse_A_reduced_System(self):
         d=self.domain.getDim()
@@ -1622,7 +1654,10 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             d_contact=5*numpy.ones((self.N,self.N))
             d_contact[0,1]=0.
             mypde.setValue(d_contact_reduced=d_contact)
-            self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
+            # This should be negated like the other tests?
+            self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_symmetryCheckTrue_Scalar(self):
         d=self.domain.getDim()
@@ -1679,24 +1714,6 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_symmetryOnDirect(self):
-        mypde=LinearPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
-        if getMPISizeWorld() == 1 and not getEscriptParamInt('PASO_DIRECT'):
-            with self.assertRaises(ValueError) as package:
-                mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-            self.assertTrue('SolverOptionsException' in str(package.exception))
-            return
-        else:
-            mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-        mypde.getSolverOptions().setVerbosity(self.VERBOSE)
-        if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as package:
-                u=mypde.getSolution()
-            self.assertTrue('PasoException' in str(package.exception))
-        else:
-            u=mypde.getSolution()
-            self.assertTrue(self.check(u,1.),'solution is wrong.')
     def test_PCG_JACOBI(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
@@ -1710,14 +1727,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(80)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_PCG_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
@@ -1733,37 +1751,36 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_PCG_RILU(self):
+    def test_PCG_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_PCG_REC_ILU(self):
+    def test_PCG_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
     def test_DIRECT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
-        if getMPISizeWorld() == 1 and not getEscriptParamInt('PASO_DIRECT'):
+        if no_direct:
             with self.assertRaises(ValueError) as package:
                 mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-            self.assertTrue('SolverOptionsException' in str(package.exception))
+            self.assertTrue('not compiled' in str(package.exception))
             return
         else:
             mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
-        if getMPISizeWorld() > 1:
+        if hasFeature('paso') and mpisize > 1:
             with self.assertRaises(RuntimeError) as package:
                 u=mypde.getSolution()
-            self.assertTrue('PasoException' in str(package.exception))
         else:
             u=mypde.getSolution()
             self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -1781,14 +1798,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_BICGSTAB_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
             mypde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
@@ -1804,19 +1822,19 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_BICGSTAB_RILU(self):
+    def test_BICGSTAB_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_BICGSTAB_REC_ILU(self):
+    def test_BICGSTAB_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -1833,14 +1851,16 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
+        if not hasFeature('paso'):
+            mypde.getSolverOptions().setNumSweeps(350)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_MINRES_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG',0):
-                print("AMG test disabled on MPI build")
-                return                
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
             mypde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
@@ -1856,19 +1876,19 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_MINRES_RILU(self):
+    def test_MINRES_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_MINRES_REC_ILU(self):
+    def test_MINRES_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -1885,14 +1905,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_TFQMR_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return 
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
             mypde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
@@ -1908,19 +1929,19 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_TFQMR_RILU(self):
+    def test_TFQMR_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_TFQMR_REC_ILU(self):
+    def test_TFQMR_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -1937,14 +1958,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PRES20)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_PRES20_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return 
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.PRES20)
@@ -1960,19 +1982,19 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_PRES20_RILU(self):
+    def test_PRES20_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PRES20)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_PRES20_REC_ILU(self):
+    def test_PRES20_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PRES20)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -1991,14 +2013,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setTruncation(50)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRESnoRestart_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
@@ -2016,20 +2039,20 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setTruncation(50)                         
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRESnoRestart_RILU(self):
+    def test_GMRESnoRestart_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
-        mypde.getSolverOptions().setTruncation(50)
+        mypde.getSolverOptions().setTruncation(50)                         
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRESnoRestart_REC_ILU(self):
+    def test_GMRESnoRestart_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         mypde.getSolverOptions().setTruncation(50)
         u=mypde.getSolution()
@@ -2047,14 +2070,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRES_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
@@ -2070,19 +2094,19 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRES_RILU(self):
+    def test_GMRES_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRES_REC_ILU(self):
+    def test_GMRES_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -2102,15 +2126,16 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setTruncation(10)
         mypde.getSolverOptions().setRestart(20)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRES_truncation_restart_AMG(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             mypde=LinearPDE(self.domain,debug=self.DEBUG)
             mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
             mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
@@ -2130,21 +2155,21 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setRestart(20)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRES_truncation_restart_RILU(self):
+    def test_GMRES_truncation_restart_ILUT(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.ILUT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         mypde.getSolverOptions().setTruncation(10)
         mypde.getSolverOptions().setRestart(20)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_GMRES_truncation_restart_REC_ILU(self):
+    def test_GMRES_truncation_restart_RILU(self):
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=kronecker(self.domain),D=1.,Y=1.)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
-        mypde.getSolverOptions().setPreconditioner(SolverOptions.REC_ILU)
+        mypde.getSolverOptions().setPreconditioner(SolverOptions.RILU)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         mypde.getSolverOptions().setTruncation(10)
         mypde.getSolverOptions().setRestart(20)
@@ -2166,31 +2191,7 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
-    def test_symmetryOnDirect_System(self):
-        A=Tensor4(0.,Function(self.domain))
-        D=Tensor(1.,Function(self.domain))
-        Y=Vector(self.domain.getDim(),Function(self.domain))
-        for i in range(self.domain.getDim()): 
-            A[i,:,i,:]=kronecker(self.domain)
-            D[i,i]+=i
-            Y[i]+=i
-        mypde=LinearPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(A=A,D=D,Y=Y)
-        if getMPISizeWorld() == 1 and not getEscriptParamInt('PASO_DIRECT'):
-            with self.assertRaises(ValueError) as package:
-                mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-            self.assertTrue('SolverOptionsException' in str(package.exception))
-            return
-        else:
-            mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-        mypde.getSolverOptions().setVerbosity(self.VERBOSE)
-        if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as package:
-                u=mypde.getSolution()
-            self.assertTrue('PasoException' in str(package.exception))
-        else:
-            u=mypde.getSolution()
-            self.assertTrue(self.check(u,1.),'solution is wrong.')
+
     def test_PCG_JACOBI_System(self):
         A=Tensor4(0.,Function(self.domain))
         D=Tensor(1.,Function(self.domain))
@@ -2218,14 +2219,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(130)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_PCG_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2265,18 +2267,17 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
             Y[i]+=i
         mypde=LinearPDE(self.domain,debug=self.DEBUG)
         mypde.setValue(A=A,D=D,Y=Y)
-        if getMPISizeWorld() == 1 and not getEscriptParamInt('PASO_DIRECT'):
+        if no_direct:
             with self.assertRaises(ValueError) as package:
                 mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
-            self.assertTrue('SolverOptionsException' in str(package.exception))
+            self.assertTrue('not compiled' in str(package.exception))
             return
         else:
             mypde.getSolverOptions().setSolverMethod(SolverOptions.DIRECT)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
-        if getMPISizeWorld() > 1:
+        if hasFeature('paso') and mpisize > 1:
             with self.assertRaises(RuntimeError) as package:
                 u=mypde.getSolution()
-            self.assertTrue('PasoException' in str(package.exception))
         else:
             u=mypde.getSolution()
             self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -2307,14 +2308,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_BICGSTAB_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2371,14 +2373,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.PRES20)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_PRES20_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2420,7 +2423,6 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        # u=mypde.getSolution(verbose=self.VERBOSE,truncation=5)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
@@ -2436,15 +2438,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        # u=mypde.getSolution(verbose=self.VERBOSE,truncation=5)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRESnoRestart_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG',0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2503,14 +2505,15 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.setValue(A=A,D=D,Y=Y)
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRES_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2570,15 +2573,16 @@ class Test_LinearPDE_noLumping(Test_linearPDEs):
         mypde.getSolverOptions().setSolverMethod(SolverOptions.GMRES)
         mypde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
         mypde.getSolverOptions().setVerbosity(self.VERBOSE)
+        mypde.getSolverOptions().setNumSweeps(30)
         mypde.getSolverOptions().setTruncation(10)
         mypde.getSolverOptions().setRestart(20)
         u=mypde.getSolution()
         self.assertTrue(self.check(u,1.),'solution is wrong.')
+
+    @unittest.skipIf(skip_amg, "Paso AMG test disabled on more than 1 MPI rank")
+    @unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
     def test_GMRES_truncation_restart_AMG_System(self):
         if self.order!=2:
-            if getEscriptParamInt('DISABLE_AMG', 0):
-                print("AMG test disabled on MPI build")
-                return
             A=Tensor4(0.,Function(self.domain))
             D=Tensor(1.,Function(self.domain))
             Y=Vector(self.domain.getDim(),Function(self.domain))
@@ -2935,6 +2939,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),FunctionOnBoundary(self.domain),1,1))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d").isEmpty(),"d is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_m_Scalar(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -2943,6 +2948,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),FunctionOnBoundary(self.domain),1,1))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("m").isEmpty(),"m is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_Scalar(self):
         if self.domain.supportsContactElements():
             d=self.domain.getDim()
@@ -2952,14 +2958,20 @@ class Test_TransportPDE(Test_linearPDEs):
             self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),FunctionOnContactZero(self.domain),1,1))
             mypde.resetRightHandSideCoefficients()
             self.assertFalse(mypde.getCoefficient("d_contact").isEmpty(),"d_contact is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_Scalar(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact=1.)
-        coeff=mypde.getCoefficient("y_contact")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),FunctionOnContactZero(self.domain),1))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact=1.)
+            coeff=mypde.getCoefficient("y_contact")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),FunctionOnContactZero(self.domain),1))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_setCoefficient_M_reduced_Scalar(self):
         d=self.domain.getDim()
@@ -3041,22 +3053,31 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnBoundary(self.domain),1,1))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d_reduced").isEmpty(),"d_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_reduced_Scalar(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(d_contact_reduced=1.)
-        coeff=mypde.getCoefficient("d_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1,1))
-        mypde.resetRightHandSideCoefficients()
-        self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            mypde.setValue(d_contact_reduced=1.)
+            coeff=mypde.getCoefficient("d_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1,1))
+            mypde.resetRightHandSideCoefficients()
+            self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_Scalar(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact_reduced=1.)
-        coeff=mypde.getCoefficient("y_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact_reduced=1.)
+            coeff=mypde.getCoefficient("y_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_r_Scalar(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -3186,22 +3207,31 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnBoundary(self.domain),1,1))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d_reduced").isEmpty(),"d_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_reduced_Scalar_using_d_contact(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(d_contact=Scalar(1.,ReducedFunctionOnContactZero(self.domain)))
-        coeff=mypde.getCoefficient("d_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1,1))
-        mypde.resetRightHandSideCoefficients()
-        self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"M is empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            mypde.setValue(d_contact=Scalar(1.,ReducedFunctionOnContactZero(self.domain)))
+            coeff=mypde.getCoefficient("d_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1,1))
+            mypde.resetRightHandSideCoefficients()
+            self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"M is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_Scalar_using_y_contact(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact=Scalar(1.,ReducedFunctionOnContactZero(self.domain)))
-        coeff=mypde.getCoefficient("y_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact=Scalar(1.,ReducedFunctionOnContactZero(self.domain)))
+            coeff=mypde.getCoefficient("y_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((),ReducedFunctionOnContactZero(self.domain),1))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     #
     #  set coefficients for systems:
     #
@@ -3213,6 +3243,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),Function(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("M").isEmpty(),"M is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_A_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3221,6 +3252,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N,d),Function(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("A").isEmpty(),"A is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_B_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3229,6 +3261,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N),Function(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("B").isEmpty(),"B is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_C_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3237,6 +3270,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N,d),Function(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("C").isEmpty(),"C is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_D_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3245,6 +3279,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),Function(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("D").isEmpty(),"D is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_X_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3253,6 +3288,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,d),Function(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("X").isEmpty(),"X is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_Y_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3261,6 +3297,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),Function(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("Y").isEmpty(),"Y is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_y_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3269,6 +3306,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),FunctionOnBoundary(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("y").isEmpty(),"y is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_m_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3277,6 +3315,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),FunctionOnBoundary(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("m").isEmpty(),"m is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3285,23 +3324,31 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),FunctionOnBoundary(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d").isEmpty(),"d is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_System(self):
-        
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(d_contact=numpy.ones((self.N,self.N)))
-        coeff=mypde.getCoefficient("d_contact")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),FunctionOnContactZero(self.domain),self.N,self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertFalse(mypde.getCoefficient("d_contact").isEmpty(),"d_contact is empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            mypde.setValue(d_contact=numpy.ones((self.N,self.N)))
+            coeff=mypde.getCoefficient("d_contact")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),FunctionOnContactZero(self.domain),self.N,self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertFalse(mypde.getCoefficient("d_contact").isEmpty(),"d_contact is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_System(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact=numpy.ones((self.N,)))
-        coeff=mypde.getCoefficient("y_contact")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),FunctionOnContactZero(self.domain),self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact=numpy.ones((self.N,)))
+            coeff=mypde.getCoefficient("y_contact")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),FunctionOnContactZero(self.domain),self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact").isEmpty(),"y_contact is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_M_System_reduced(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3310,6 +3357,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("M_reduced").isEmpty(),"M_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_A_reduced_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3318,6 +3366,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N,d),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("A_reduced").isEmpty(),"A_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_B_reduced_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3326,6 +3375,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("B_reduced").isEmpty(),"B_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_C_reduced_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3334,6 +3384,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N,d),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("C_reduced").isEmpty(),"C_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_D_System_reduced(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3342,6 +3393,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("D_reduced").isEmpty(),"D_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_X_System_reduced(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3350,6 +3402,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,d),ReducedFunction(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("X").isEmpty(),"X is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_Y_System_reduced(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3358,6 +3411,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunction(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("Y_reduced").isEmpty(),"Y_reduced is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_y_System_reduced(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3366,6 +3420,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnBoundary(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("y_reduced").isEmpty(),"y_reduced is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_m_reduced_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3374,6 +3429,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnBoundary(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("m_reduced").isEmpty(),"m_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_reduced_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3382,22 +3438,31 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnBoundary(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d_reduced").isEmpty(),"d_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_reduced_System(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(d_contact_reduced=numpy.ones((self.N,self.N)))
-        coeff=mypde.getCoefficient("d_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnContactZero(self.domain),self.N,self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            mypde.setValue(d_contact_reduced=numpy.ones((self.N,self.N)))
+            coeff=mypde.getCoefficient("d_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnContactZero(self.domain),self.N,self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_System(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact_reduced=numpy.ones((self.N,)))
-        coeff=mypde.getCoefficient("y_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnContactZero(self.domain),self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"X is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact_reduced=numpy.ones((self.N,)))
+            coeff=mypde.getCoefficient("y_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnContactZero(self.domain),self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"X is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_r_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -3406,6 +3471,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions()),((self.N,),Solution(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("r").isEmpty(),"r is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_q_System(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -3414,6 +3480,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions()),((self.N,),Solution(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("q").isEmpty(),"q is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_r_System_reducedOn(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -3423,6 +3490,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions()),((self.N,),ReducedSolution(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("r").isEmpty(),"r is no empty after reset of right hand side coefficients")
+
     def test_setCoefficient_q_System_reducedOn(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numEquations=3,debug=self.DEBUG)
@@ -3456,6 +3524,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N,d),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("A_reduced").isEmpty(),"A_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_B_reduced_System_using_B(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3464,6 +3533,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,d,self.N),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("B_reduced").isEmpty(),"B_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_C_reduced_System_using_C(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3472,6 +3542,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N,d),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("C_reduced").isEmpty(),"C_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_D_reduced_System_using_D(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3480,6 +3551,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunction(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("D_reduced").isEmpty(),"D_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_X_reduced_System_using_X(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3488,6 +3560,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,d),ReducedFunction(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("X_reduced").isEmpty(),"X_reduced is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_Y_reduced_System_using_Y(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3496,6 +3569,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunction(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("Y_reduced").isEmpty(),"Y_reduced is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_y_reduced_System_using_y(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
@@ -3504,6 +3578,7 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnBoundary(self.domain),self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertTrue(mypde.getCoefficient("y_reduced").isEmpty(),"y_reduced is not empty after reset of right hand side coefficients")
+
     def test_setCoefficient_m_reduced_System_using_m(self):
         d=self.domain.getDim()
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3527,22 +3602,30 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnBoundary(self.domain),self.N,self.N))
         mypde.resetRightHandSideCoefficients()
         self.assertFalse(mypde.getCoefficient("d_reduced").isEmpty(),"d_reduced is empty after reset of right hand side coefficients")
+
     def test_setCoefficient_d_contact_reduced_System_using_d_contact(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        mypde.setValue(d_contact=Data(numpy.ones((self.N,self.N)),ReducedFunctionOnContactZero(self.domain)))
-        coeff=mypde.getCoefficient("d_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnContactZero(self.domain),self.N,self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            mypde.setValue(d_contact=Data(numpy.ones((self.N,self.N)),ReducedFunctionOnContactZero(self.domain)))
+            coeff=mypde.getCoefficient("d_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumSolutions(), mypde.getNumEquations()),((self.N,self.N),ReducedFunctionOnContactZero(self.domain),self.N,self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertFalse(mypde.getCoefficient("d_contact_reduced").isEmpty(),"d_contact_reduced is empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
+
     def test_setCoefficient_y_contact_reduced_System_using_y_contact(self):
-        d=self.domain.getDim()
-        mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
-        mypde.setValue(y_contact=Data(numpy.ones((self.N,)),ReducedFunctionOnContactZero(self.domain)))
-        coeff=mypde.getCoefficient("y_contact_reduced")
-        self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnContactZero(self.domain),self.N))
-        mypde.resetRightHandSideCoefficients()
-        self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        if self.domain.supportsContactElements():
+            d=self.domain.getDim()
+            mypde=TransportPDE(self.domain,numSolutions=3,debug=self.DEBUG)
+            mypde.setValue(y_contact=Data(numpy.ones((self.N,)),ReducedFunctionOnContactZero(self.domain)))
+            coeff=mypde.getCoefficient("y_contact_reduced")
+            self.assertEqual((coeff.getShape(),coeff.getFunctionSpace(), mypde.getNumEquations()),((self.N,),ReducedFunctionOnContactZero(self.domain),self.N))
+            mypde.resetRightHandSideCoefficients()
+            self.assertTrue(mypde.getCoefficient("y_contact_reduced").isEmpty(),"y_contact_reduced is not empty after reset of right hand side coefficients")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_symmetryCheckTrue_System(self):
         d=self.domain.getDim()
@@ -3555,7 +3638,9 @@ class Test_TransportPDE(Test_linearPDEs):
         d=4*numpy.ones((self.N,self.N))
         m=64*numpy.ones((self.N,self.N))
         d_contact=5*numpy.ones((self.N,self.N))
-        mypde.setValue(M=M,A=A,B=B,C=C,D=D,d=d,d_contact=d_contact,m=m,M_reduced=-M,A_reduced=-A,B_reduced=-B,C_reduced=-C,D_reduced=-D,d_reduced=-d,d_contact_reduced=-d_contact, m_reduced=-m)
+        mypde.setValue(M=M,A=A,B=B,C=C,D=D,d=d,m=m,M_reduced=-M,A_reduced=-A,B_reduced=-B,C_reduced=-C,D_reduced=-D,d_reduced=-d, m_reduced=-m)
+        if self.domain.supportsContactElements():
+            mypde.setValue(d_contact=d_contact,d_contact_reduced=-d_contact)
         self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
 
     def test_symmetryCheckFalse_M_System(self):
@@ -3605,11 +3690,14 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
 
     def test_symmetryCheckFalse_d_contact_System(self):
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        d_contact=5*numpy.ones((self.N,self.N))
-        d_contact[0,1]=0.
-        mypde.setValue(d_contact=d_contact)
-        self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
+        if self.domain.supportsContactElements():
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            d_contact=5*numpy.ones((self.N,self.N))
+            d_contact[0,1]=0.
+            mypde.setValue(d_contact=d_contact)
+            self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     def test_symmetryCheckFalse_M_reduced_System(self):
         mypde=TransportPDE(self.domain,debug=self.DEBUG)
@@ -3657,11 +3745,14 @@ class Test_TransportPDE(Test_linearPDEs):
         self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
 
     def test_symmetryCheckFalse_d_contact_reduced_System(self):
-        mypde=TransportPDE(self.domain,debug=self.DEBUG)
-        d_contact=5*numpy.ones((self.N,self.N))
-        d_contact[0,1]=0.
-        mypde.setValue(d_contact_reduced=d_contact)
-        self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
+        if self.domain.supportsContactElements():
+            mypde=TransportPDE(self.domain,debug=self.DEBUG)
+            d_contact=5*numpy.ones((self.N,self.N))
+            d_contact[0,1]=0.
+            mypde.setValue(d_contact_reduced=d_contact)
+            self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
+        else:
+            return unittest.skip("Domain does not support contact elements")
 
     #==============================================================
     def test_symmetryCheckTrue_Scalar(self):
@@ -3674,8 +3765,10 @@ class Test_TransportPDE(Test_linearPDEs):
         D=3
         m=10
         d=4
-        d_contact=5
-        mypde.setValue(M=M,A=A,B=B,C=C,D=D,d=d,m=m,d_contact=d_contact,M_reduced=-M,A_reduced=-A,B_reduced=-B,C_reduced=-C,D_reduced=-D,d_reduced=-d,d_contact_reduced=-d_contact,m_reduced=-m)
+        mypde.setValue(M=M,A=A,B=B,C=C,D=D,d=d,m=m,M_reduced=-M,A_reduced=-A,B_reduced=-B,C_reduced=-C,D_reduced=-D,d_reduced=-d,m_reduced=-m)
+        if self.domain.supportsContactElements():
+            d_contact=5
+            mypde.setValue(d_contact=d_contact,d_contact_reduced=-d_contact)
         self.assertTrue(mypde.checkSymmetry(verbose=False),"symmetry detected")
 
     def test_symmetryCheckFalse_A_Scalar(self):
@@ -3709,6 +3802,7 @@ class Test_TransportPDE(Test_linearPDEs):
         mypde.setValue(B_reduced=B,C_reduced=C)
         self.assertTrue(not mypde.checkSymmetry(verbose=False),"symmetry detected")
 
+    @unittest.skipIf(no_paso, "Transport PDEs require Paso")
     def test_reducedOn(self):
         dt=0.1
         mypde=TransportPDE(self.domain,numSolutions=1,debug=self.DEBUG)
@@ -3727,3 +3821,4 @@ class Test_TransportPDE(Test_linearPDEs):
         u=mypde.getSolution(0.1)
         self.assertTrue(u.getFunctionSpace() == Solution(self.domain), "wrong function space")
         self.assertTrue(self.check(u,10.+dt),'solution is wrong.')
+
diff --git a/escriptcore/test/python/test_nonLinearPDE.py b/escriptcore/test/python/test_nonLinearPDE.py
index 391e562..0df291f 100644
--- a/escriptcore/test/python/test_nonLinearPDE.py
+++ b/escriptcore/test/python/test_nonLinearPDE.py
@@ -54,9 +54,8 @@ class Test_nonLinearPDEs(unittest.TestCase):
     VERBOSE=False
     
     
-    
+ at unittest.skipIf(not sympyavail, 'sympy not available')
 class Test_nlpde(Test_nonLinearPDEs):
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_run(self):
         #test just to confirm nlpde works   
         u=Symbol('u', dim=self.domain.getDim())
@@ -65,7 +64,7 @@ class Test_nlpde(Test_nonLinearPDEs):
         gammaD=whereZero(x[0])+whereZero(x[1])
         nlpde.setValue(X=grad(u), Y=5*u, q=gammaD, r=1)
         v=nlpde.getSolution(u=1)
-    @unittest.skipIf(not sympyavail, 'sympy not available')
+
     def test_setVals1eq(self):
         #test setting Coefficients with 1 equation
         dim=self.domain.getDim()
@@ -107,7 +106,6 @@ class Test_nlpde(Test_nonLinearPDEs):
             temp=Symbol('temp') 
             self.assertTrue(D-temp.subs(temp,5)==temp.subs(temp,0))
 
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_setVals2eq(self):
         #test setting Coefficients with 2 coeficients
         dim=self.domain.getDim()
@@ -155,7 +153,6 @@ class Test_nlpde(Test_nonLinearPDEs):
             self.assertTrue(numpy.ndarray.__eq__(CTest, C).all())
             self.assertTrue(numpy.ndarray.__eq__(DTest, D).all())
 
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_DimAndShape1eq(self):
         dim=self.domain.getDim()
         if dim==3:
@@ -173,7 +170,6 @@ class Test_nlpde(Test_nonLinearPDEs):
         #args=dict(q=u)
         #self.assertRaises(IllegalCoefficientValue, nlpde.setValue,**args)
     
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_DimAndShape2eq(self):
         dim=self.domain.getDim()
         u = Symbol('u',(2,), dim=dim)
@@ -183,7 +179,6 @@ class Test_nlpde(Test_nonLinearPDEs):
         args=dict(X=grad(u[0]), Y=5*u)
         self.assertRaises(IllegalCoefficientValue, nlpde.setValue,**args)
 
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_setUnknownPeram(self):
         dim=self.domain.getDim()
         u = Symbol('u',(2,), dim=dim)
@@ -191,7 +186,6 @@ class Test_nlpde(Test_nonLinearPDEs):
         args=dict(k=0,f=8)  
         self.assertRaises(IllegalCoefficient,nlpde.setValue,**args)
 
-    @unittest.skipIf(not sympyavail, 'sympy not available')
     def test_yDirection(self):
         dim=self.domain.getDim()
         if dim==3:
@@ -227,3 +221,4 @@ class Test_nlpde(Test_nonLinearPDEs):
         loc=Locator(v.getFunctionSpace(),x)
         valAtX=loc(v)
         self.assertTrue(valAtX[0]>10*valAtX[1])
+
diff --git a/escriptcore/test/python/test_objects.py b/escriptcore/test/python/test_objects.py
index 58a30d1..0ea5c99 100644
--- a/escriptcore/test/python/test_objects.py
+++ b/escriptcore/test/python/test_objects.py
@@ -270,7 +270,7 @@ class Test_saveCSV(unittest.TestCase):
             self.assertEqual(len(self.firstline[i]),len(line_got))
             for j in range(len(self.firstline[i])):
                 if self.firstline[i][j] is not None:
-                    self.assertAlmostEquals(self.firstline[i][j],line_got[j])
+                    self.assertAlmostEqual(self.firstline[i][j],line_got[j])
             linecount=1
             while line!='':
                 linecount+=1
@@ -367,7 +367,7 @@ class Test_Domain(unittest.TestCase):
         self.assertTrue(not self.domain.isValidTagName(tag3))
         self.assertTrue(self.domain.getTag(tag1)==1)
         self.assertTrue(self.domain.getTag(tag2)==2)
-        self.assertRaises(RuntimeError,self.domain.getTag,tag3)
+        self.assertRaises(ValueError,self.domain.getTag,tag3)
 
         # set tag:
         s=Scalar(0,Function(self.domain))
@@ -487,25 +487,6 @@ class Test_Domain(unittest.TestCase):
         if self.domain.getDim()>2: self.assertTrue(sup(x[2])<=1.)
    #===========================================================================
 
-class Test_GlobalMinMax(unittest.TestCase):
-   def test_GlobalMinMax(self):
-        myrank=getMPIRankWorld()
-        d=Data(myrank,Function(self.domain))
-        minproc=inf(d)
-        maxproc=sup(d)          #This tells us where to expect values to be
-        if d.getNumberOfDataPoints()>0:
-                d.setValueOfDataPoint(0,myrank-0.001);
-        p,n=d.minGlobalDataPoint()
-        self.assertTrue(p==minproc,"Incorrect process indentified as holding min")
-        self.assertTrue(n==0,"Incorrect position for min")
-        if d.getNumberOfDataPoints()>0:
-                d.setValueOfDataPoint(0,myrank+0.001)
-        p,n=d.maxGlobalDataPoint()
-        self.assertTrue(p==maxproc,"Incorrect process indentified as holding min")
-        self.assertTrue(n==0,"Incorrect position for min")
-
-
-
 class Test_SetDataPointValue(unittest.TestCase):
     args=[9.81,
         numpy.array([3.098, -3.111]),
diff --git a/escriptcore/test/python/test_pdetools.py b/escriptcore/test/python/test_pdetools.py
index 796993f..1b3a939 100644
--- a/escriptcore/test/python/test_pdetools.py
+++ b/escriptcore/test/python/test_pdetools.py
@@ -50,6 +50,7 @@ The tests must be linked with a Domain class object in the setUp method:
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
 import esys.escriptcore.utestselect as unittest
+import numpy
 from esys.escript import *
 from esys.escript.pdetools import Locator,Projector,TimeIntegrationManager,NoPDE,PCG, ArithmeticTuple, GMRES, MINRES, TFQMR, HomogeneousSaddlePointProblem
 from esys.escript.pdetools import Defect, NewtonGMRES
@@ -66,7 +67,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
            t+=dt
            tm.checkin(dt,t)
         v_guess=tm.extrapolate(dt)
-        self.assertTrue(abs(v_guess-(tm.getTime()+dt))<self.RES_TOL,"extrapolation is wrong")
+        self.assertLess(abs(v_guess-(tm.getTime()+dt)), self.RES_TOL, "extrapolation is wrong")
 
     def test_TimeIntegrationManager_vector(self):
         t=0.
@@ -77,7 +78,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
            tm.checkin(dt,t,3*t)
         v_guess=tm.extrapolate(dt)
         e=max(abs(v_guess[0]-(tm.getTime()+dt)),abs(v_guess[1]-(tm.getTime()+dt)*3.))
-        self.assertTrue(e<self.RES_TOL,"extrapolation is wrong")
+        self.assertLess(e, self.RES_TOL)
 
     def test_Locator(self):
         x=self.domain.getX()
@@ -105,13 +106,13 @@ class Test_pdetools_noLumping(unittest.TestCase):
         self.assertTrue(Lsup(xx-l(x2))<self.RES_TOL,"location wrong")
         xx=l(x[0]+x[1])
         self.assertTrue(isinstance(xx,float),"wrong scalar type")
-        self.assertTrue(abs(xx-l(x2[0])-l(x2[1]))<self.RES_TOL,"value wrong scalar")
+        self.assertLess(abs(xx-l(x2[0])-l(x2[1])), self.RES_TOL)
 
         l=Locator(self.domain,numpy.ones((self.domain.getDim(),)))
         d=Data(0, ContinuousFunction(self.domain))
         l.setValue(d, 7)
         self.assertTrue(sup(d)>6, "value not set")     # guarantees we have set something
-        self.assertTrue(Lsup(l.getValue(d)-7)<self.RES_TOL, "value not set in the correct place")        
+        self.assertLess(Lsup(l.getValue(d)-7), self.RES_TOL, "value not set in the correct place")        
 
 
     def test_Locator_withList(self):
@@ -152,43 +153,41 @@ class Test_pdetools_noLumping(unittest.TestCase):
         self.assertTrue(isinstance(xx,list),"list expected (3)")
         for i in range(len(xx)):
            self.assertTrue(isinstance(xx[i],float),"wrong scalar type")
-           self.assertTrue(abs(xx[i]-(l(x2[0])[i]+l(x2[1])[i]))<self.RES_TOL,"value wrong scalar")
+           self.assertLess(abs(xx[i]-(l(x2[0])[i]+l(x2[1])[i])), self.RES_TOL)
            
         l=Locator(self.domain,numpy.ones((self.domain.getDim(),)))
         d=Data(0, ContinuousFunction(self.domain))
         l.setValue(d, 7)
         self.assertTrue(sup(d)>6, "value not set")     # guarantees we have set something
-        self.assertTrue(Lsup(l.getValue(d)-7)<self.RES_TOL, "value not set in the correct place")
-           
-         
-      
+        self.assertLess(Lsup(l.getValue(d)-7), self.RES_TOL, "value not set in the correct place")
+
     def testProjector_rank0(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=x[0]
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank1(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=x
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank2(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=[[11.,12.],[21,22.]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank3(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=[[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank4(self):
       x=ContinuousFunction(self.domain).getX()
@@ -196,7 +195,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       td_ref=[[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
 
     def testProjector_rank0_reduced(self):
@@ -204,28 +203,28 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=x[0]
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank1_reduced(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=x
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank2_reduced(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=[[11.,12.],[21,22.]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank3_reduced(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=[[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank4_reduced(self):
       x=ContinuousFunction(self.domain).getX()
@@ -233,35 +232,35 @@ class Test_pdetools_noLumping(unittest.TestCase):
       td_ref=[[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank0_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=x[0]
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank1_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=x
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank2_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=[[11.,12.],[21,22.]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank3_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=False,fast=False)
       td_ref=[[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank4_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -269,7 +268,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       td_ref=[[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
 
     def testProjector_rank0_reduced_with_reduced_input(self):
@@ -277,28 +276,28 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=1.
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank1_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=numpy.array([1.,2.,3.])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank2_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=numpy.array([[11.,12.],[21,22.]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank3_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
       p=Projector(self.domain,reduce=True,fast=False)
       td_ref=numpy.array([[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
     def testProjector_rank4_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -306,7 +305,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       td_ref=numpy.array([[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*self.RES_TOL)
 
 
     def test_NoPDE_scalar_missing_r(self):
@@ -316,7 +315,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=1.,Y=1.,q=msk)
       u=p.getSolution()
       u_ex=(1.-msk)
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_scalar_missing_Y(self):
       p=NoPDE(self.domain)
@@ -325,7 +324,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=1.,q=msk,r=2.)
       u=p.getSolution()
       u_ex=msk*2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_scalar_constant(self):
       p=NoPDE(self.domain)
@@ -334,7 +333,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=1.,Y=1.,q=msk,r=2.)
       u=p.getSolution()
       u_ex=(1.-msk)+msk*2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_scalar_variable(self):
       p=NoPDE(self.domain)
@@ -343,7 +342,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=10,Y=2*10,q=msk,r=2.)
       u=p.getSolution()
       u_ex=2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_vector_missing_Y(self):
       p=NoPDE(self.domain)
@@ -352,7 +351,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=numpy.ones([2]),q=msk,r=2.)
       u=p.getSolution()
       u_ex=msk*2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_vector_missing_r(self):
       p=NoPDE(self.domain)
@@ -361,7 +360,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=numpy.ones([2]),Y=numpy.ones([2]),q=msk)
       u=p.getSolution()
       u_ex=(1.-msk)
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_vector_constant(self):
       p=NoPDE(self.domain)
@@ -370,7 +369,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=numpy.ones([2]),Y=numpy.ones([2]),q=msk,r=2.)
       u=p.getSolution()
       u_ex=(1.-msk)+msk*2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
 
     def test_NoPDE_vector_variable(self):
       p=NoPDE(self.domain)
@@ -379,7 +378,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       p.setValue(D=x[:2]+1,Y=2*(x[:2]+1),q=msk,r=2.)
       u=p.getSolution()
       u_ex=2.
-      self.assertTrue(Lsup(u_ex-u)<Lsup(u_ex)*self.RES_TOL,"value wrong")
+      self.assertLess(Lsup(u_ex-u), Lsup(u_ex)*self.RES_TOL)
     #=====
     def testPCG(self):
       from numpy import array, dot, zeros, size, float64
@@ -453,8 +452,8 @@ class Test_pdetools_noLumping(unittest.TestCase):
 
       tol=1.e-4
       x,r,a_norm=PCG(b*1.,Ap,x_ref*0.,Ms,dot, atol=0, rtol=tol, iter_max=12)
-      self.assertTrue(Lsup(x-x_ref)<=Lsup(x_ref)*tol*10.,"wrong solution")
-      self.assertTrue(Lsup(r-(b-dot(A,x)))<=Lsup(b)*EPSILON*100.,"wrong solution")
+      self.assertLess(Lsup(x-x_ref), Lsup(x_ref)*tol*10.)
+      self.assertLess(Lsup(r-(b-dot(A,x))), Lsup(b)*EPSILON*100.)
 
     def testMINRES(self):
       from numpy import array, dot, zeros, size, float64
@@ -528,7 +527,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
 
       tol=1.e-4
       x=MINRES(b*1.,Ap,x_ref*0,Ms,dot, atol=0, rtol=tol, iter_max=12)
-      self.assertTrue(Lsup(x-x_ref)<=Lsup(x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-x_ref), Lsup(x_ref)*tol*10.)
 
     def testTFQMR(self):
       from numpy import array, dot, zeros, size, float64
@@ -601,7 +600,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       tol=1.e-5
       for i in range(size(b)): b[i]/=A[i,i]
       x=TFQMR(b,Ap,x_ref*0,dot, atol=0, rtol=tol, iter_max=12)
-      self.assertTrue(Lsup(x-x_ref)<=Lsup(x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-x_ref), Lsup(x_ref)*tol*10.)
 
     def testGMRES(self):
       from numpy import array, dot, zeros, size, float64
@@ -674,7 +673,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       tol=1.e-4
       for i in range(size(b)): b[i]/=A[i,i]
       x=GMRES(b,Ap,x_ref*0,dot,atol=0, rtol=tol, iter_max=12)
-      self.assertTrue(Lsup(x-x_ref)<=Lsup(x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-x_ref), Lsup(x_ref)*tol*10.)
 
     def testGMRES_P_R(self):
       from numpy import array,  dot, zeros, size, float64
@@ -748,7 +747,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
        
       tol=1.e-4
       x=GMRES(b,Ap,x_ref*0,dot,atol=0, rtol=tol, iter_max=12,P_R=P_Rp)
-      self.assertTrue(Lsup(x-x_ref)<=Lsup(x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-x_ref), Lsup(x_ref)*tol*10.)
 
     def testNewtonGMRES(self):
       from numpy import array, dot, zeros, size, float64
@@ -825,7 +824,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       tol=1.e-8
       ll=LL()
       x=NewtonGMRES(LL(),ll.x_ref*0., iter_max=100, sub_iter_max=20, atol=0,rtol=tol, verbose=self.VERBOSE)
-      self.assertTrue(Lsup(x-ll.x_ref)<=Lsup(ll.x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-ll.x_ref), Lsup(ll.x_ref)*tol*10.)
 
     def testNewtonGMRES(self):
       from numpy import array,  dot, zeros, size, float64
@@ -902,7 +901,7 @@ class Test_pdetools_noLumping(unittest.TestCase):
       tol=1.e-8
       ll=LL()
       x=NewtonGMRES(LL(),ll.x_ref*0., iter_max=100, sub_iter_max=20, atol=0,rtol=tol, verbose=self.VERBOSE)
-      self.assertTrue(Lsup(x-ll.x_ref)<=Lsup(ll.x_ref)*tol*10.,"wrong solution")
+      self.assertLess(Lsup(x-ll.x_ref), Lsup(ll.x_ref)*tol*10.)
 
     def testHomogeneousSaddlePointProblem_PCG(self):
       from numpy import array,  dot, zeros, size, float64
@@ -1011,8 +1010,8 @@ class Test_pdetools_noLumping(unittest.TestCase):
       ll.setTolerance(tol)
       # ll.setSubToleranceReductionFactor(0.1)
       x,p=ll.solve(ll.x_ref*1.20,ll.p_ref*(-2),max_iter=20, verbose=False, usePCG=True, iter_restart=20,max_correction_steps=10)
-      self.assertTrue(Lsup(x-ll.x_ref)<=Lsup(ll.x_ref)*tol*10.,"wrong x solution")
-      self.assertTrue(Lsup(p-ll.p_ref)<=Lsup(ll.p_ref)*tol*10.,"wrong p solution")
+      self.assertLess(Lsup(x-ll.x_ref), Lsup(ll.x_ref)*tol*10.)
+      self.assertLess(Lsup(p-ll.p_ref), Lsup(ll.p_ref)*tol*10.)
 
     def testHomogeneousSaddlePointProblem_GMRES(self):
       from numpy import array, prod, dot, zeros, size, float64
@@ -1122,27 +1121,27 @@ class Test_pdetools_noLumping(unittest.TestCase):
       # ll.setSubToleranceReductionFactor(0.1)
       x,p=ll.solve(ll.x_ref*1.20,ll.p_ref*(-2),max_iter=20, verbose=False, usePCG=False, 
 iter_restart=20,max_correction_steps=10)
-      self.assertTrue(Lsup(x-ll.x_ref)<=Lsup(ll.x_ref)*tol*10.,"wrong x solution")
-      self.assertTrue(Lsup(p-ll.p_ref)<=Lsup(ll.p_ref)*tol*10.,"wrong p solution")
+      self.assertLess(Lsup(x-ll.x_ref), Lsup(ll.x_ref)*tol*10.)
+      self.assertLess(Lsup(p-ll.p_ref), Lsup(ll.p_ref)*tol*10.)
 
     def testArithmeticTuple(self):
         a=ArithmeticTuple(1.,2.)
-        self.assertTrue(len(a)==2,"wrong length")
-        self.assertTrue(a[0]==1.,"wrong first item")
-        self.assertTrue(a[1]==2.,"wrong second item")
+        self.assertTrue(len(a)==2, "wrong length")
+        self.assertTrue(a[0]==1., "wrong first item")
+        self.assertTrue(a[1]==2., "wrong second item")
         c=a*6.
-        self.assertTrue(isinstance(c,ArithmeticTuple),"c is not an instance of ArithmeticTuple")
-        self.assertTrue(len(c)==2,"c has wrong length")
-        self.assertTrue(c[0]==6.,"c has wrong first item")
-        self.assertTrue(c[1]==12.,"c has wrong second item")
+        self.assertTrue(isinstance(c,ArithmeticTuple), "c is not an instance of ArithmeticTuple")
+        self.assertTrue(len(c)==2, "c has wrong length")
+        self.assertTrue(c[0]==6., "c has wrong first item")
+        self.assertTrue(c[1]==12., "c has wrong second item")
         b=5.*a
         self.assertTrue(isinstance(b,ArithmeticTuple),"b is not an instance of ArithmeticTuple")
-        self.assertTrue(len(b)==2,"b has wrong length")
-        self.assertTrue(b[0]==5.,"b has wrong first item")
-        self.assertTrue(b[1]==10.,"b has wrong second item")
+        self.assertTrue(len(b)==2, "b has wrong length")
+        self.assertTrue(b[0]==5., "b has wrong first item")
+        self.assertTrue(b[1]==10., "b has wrong second item")
         a+=ArithmeticTuple(3.,4.)
-        self.assertTrue(a[0]==4.,"wrong first item of inplace update")
-        self.assertTrue(a[1]==6.,"wrong second item of inplace update")
+        self.assertTrue(a[0]==4., "wrong first item of inplace update")
+        self.assertTrue(a[1]==6., "wrong second item of inplace update")
 
 
 
@@ -1153,7 +1152,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=x[0]
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank1_fast_reduced(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1162,7 +1161,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       td_ref=x
       res=td_ref.interpolate(Function(self.domain))
       td=p(res)
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank2_fast_reduced(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1170,7 +1169,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=[[11.,12.],[21,22.]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank3_fast_reduced(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1178,7 +1177,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=[[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank4_fast_reduced(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1187,7 +1186,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       td_ref=[[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]]*(x[0]+x[1])
       td=p(td_ref.interpolate(Function(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank0_fast_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1195,7 +1194,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=1.
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank1_fast_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1203,7 +1202,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=numpy.array([1.,2.,3.])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank2_fast_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1211,7 +1210,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=numpy.array([[11.,12.],[21,22.]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank3_fast_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1219,7 +1218,7 @@ class Test_pdetools(Test_pdetools_noLumping):
       p=Projector(self.domain,reduce=True,fast=True)
       td_ref=numpy.array([[[111.,112.],[121,122.]],[[211.,212.],[221,222.]]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
     def testProjector_rank4_fast_reduced_with_reduced_input(self):
       x=ContinuousFunction(self.domain).getX()
@@ -1228,5 +1227,5 @@ class Test_pdetools(Test_pdetools_noLumping):
       td_ref=numpy.array([[[[1111.,1112.],[1121,1122.]],[[1211.,1212.],[1221,1222.]]], 
 [[[2111.,2112.],[2121,2122.]],[[2211.,2212.],[2221,2222.]]]])
       td=p(Data(td_ref,ReducedFunction(self.domain)))
-      self.assertTrue(Lsup(td-td_ref)<Lsup(td_ref)*h,"value wrong")
+      self.assertLess(Lsup(td-td_ref), Lsup(td_ref)*h)
 
diff --git a/escriptcore/test/python/test_simplesolve.py b/escriptcore/test/python/test_simplesolve.py
new file mode 100644
index 0000000..1b7bba2
--- /dev/null
+++ b/escriptcore/test/python/test_simplesolve.py
@@ -0,0 +1,169 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Generic base class for PDE solving tests
+"""
+
+from esys.escript import Data, Function, Lsup, Solution, Tensor4, Vector, \
+                         grad, inner, kronecker, matrixmult, whereZero
+from esys.escript.linearPDEs import LinearPDE, SolverOptions
+import esys.escriptcore.utestselect as unittest
+import numpy
+
+
+class SimpleSolveTestCase(unittest.TestCase):
+    REL_TOL = 1.e-6
+    SOLVER_VERBOSE = False
+    SOLVER_TOL = 1.e-8
+    FAC_DIAG = 1.
+    FAC_OFFDIAG = -0.4
+    # the following members must be set by the test methods in subclasses
+    domain = None
+    package = None
+    method = None
+    preconditioner = None
+
+    def _getGrad(self, system):
+        """returns exact gradient"""
+        dim = self.domain.getDim()
+        if system:
+            g_ex = Data(0., (dim,dim), Solution(self.domain))
+            if dim == 2:
+                g_ex[0,0] = 2.
+                g_ex[0,1] = 3.
+                g_ex[1,0] = 3.
+                g_ex[1,1] = 2.
+            else:
+                g_ex[0,0] = 2.
+                g_ex[0,1] = 3.
+                g_ex[0,2] = 4.
+                g_ex[1,0] = 4.
+                g_ex[1,1] = 1.
+                g_ex[1,2] = -2.
+                g_ex[2,0] = 8.
+                g_ex[2,1] = 4.
+                g_ex[2,2] = 5.
+        else:
+            g_ex = Data(0., (dim,), Solution(self.domain))
+            if dim == 2:
+                g_ex[0] = 2.
+                g_ex[1] = 3.
+            else:
+                g_ex[0] = 2.
+                g_ex[1] = 3.
+                g_ex[2] = 4.
+        return g_ex
+
+    def _getSolution(self, system):
+        """returns exact solution"""
+        dim = self.domain.getDim()
+        x = Solution(self.domain).getX()
+        if system:
+            u_ex = Vector(0., Solution(self.domain))
+            if dim == 2:
+                u_ex[0] =  1.+2.*x[0]+3.*x[1]
+                u_ex[1] = -1.+3.*x[0]+2.*x[1]
+            else:
+                u_ex[0] =  1.+2.*x[0]+3.*x[1]+4.*x[2]
+                u_ex[1] = -1.+4.*x[0]+1.*x[1]-2.*x[2]
+                u_ex[2] =  5.+8.*x[0]+4.*x[1]+5.*x[2]
+        else:
+            if dim == 2:
+                u_ex = 1.+2.*x[0]+3.*x[1]
+            else:
+                u_ex = 1.+2.*x[0]+3.*x[1]+4.*x[2]
+        return u_ex
+
+    def _setCoefficients(self, pde, system):
+        """sets PDE coefficients"""
+        FAC_DIAG = 1.
+        FAC_OFFDIAG = -0.4
+        x = Solution(self.domain).getX()
+        mask = whereZero(x[0])
+        dim = self.domain.getDim()
+        u_ex = self._getSolution(system)
+        g_ex = self._getGrad(system)
+
+        if system:
+            A = Tensor4(0., Function(self.domain))
+            for i in range(dim):
+                A[i,:,i,:] = kronecker(dim)
+
+            Y = Vector(0., Function(self.domain))
+            if dim == 2:
+                Y[0] = u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
+                Y[1] = u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
+            else:
+                Y[0] = u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
+                Y[1] = u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
+                Y[2] = u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
+            pde.setValue(r=u_ex, q=mask*numpy.ones(dim,),
+                         A=A,
+                         D=kronecker(dim)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((dim,dim))*FAC_OFFDIAG,
+                         Y=Y,
+                         y=matrixmult(g_ex,self.domain.getNormal()))
+        else:
+            pde.setValue(r=u_ex, q=mask, A=kronecker(dim),
+                         y=inner(g_ex, self.domain.getNormal()))
+
+    def _setSolverOptions(self, so):
+        """override this to modify solver options prior to solving"""
+        pass
+
+    def getPDE(self, system):
+        dim = self.domain.getDim()
+        if system:
+            pde=LinearPDE(self.domain, numEquations=dim)
+        else:
+            pde=LinearPDE(self.domain, numEquations=1)
+
+        self._setCoefficients(pde, system)
+        so = pde.getSolverOptions()
+        so.setPackage(self.package)
+        so.setSolverMethod(self.method)
+        so.setPreconditioner(self.preconditioner)
+        so.setTolerance(self.SOLVER_TOL)
+        so.setVerbosity(self.SOLVER_VERBOSE)
+        self._setSolverOptions(so)
+        return pde, self._getSolution(system), self._getGrad(system)
+
+    def test_single(self):
+        pde, u_ex, g_ex = self.getPDE(False)
+        g=grad(u_ex)
+        self.assertLess(Lsup(g_ex-g), self.REL_TOL*Lsup(g_ex))
+        u = pde.getSolution()
+        error = Lsup(u-u_ex)
+        self.assertLess(error, self.REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
+
+    def test_system(self):
+        pde, u_ex, g_ex = self.getPDE(True)
+        g = grad(u_ex)
+        self.assertLess(Lsup(g_ex-g), self.REL_TOL*Lsup(g_ex))
+        u = pde.getSolution()
+        error = Lsup(u-u_ex)
+        self.assertLess(error, self.REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
+
+
diff --git a/escriptcore/test/python/test_splitworld.py b/escriptcore/test/python/test_splitworld.py
index cc2a3ac..c68317c 100644
--- a/escriptcore/test/python/test_splitworld.py
+++ b/escriptcore/test/python/test_splitworld.py
@@ -1,6 +1,6 @@
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -15,7 +15,7 @@
 
 from __future__ import print_function, division
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
@@ -550,7 +550,7 @@ class Test_SplitWorld(unittest.TestCase):
       for x in range(2,getMPISizeWorld()+2):
         sw.addJob(Test_SplitWorld.FactorJob, fact=x)
       sw.runJobs()
-      self.assertEquals(sw.getFloatVariable('boolean'),0)
+      self.assertEqual(sw.getFloatVariable('boolean'),0)
       sw.clearVariable('value')
       sw.clearVariable('boolean')
       sw.addJob(Test_SplitWorld.InjectJob, name='value', val=101)      # Feed it a prime  
@@ -568,7 +568,7 @@ class Test_SplitWorld(unittest.TestCase):
         if 100%x==0:
           m=x
       sw.runJobs()
-      self.assertEquals(sw.getFloatVariable('boolean'),m)      
+      self.assertEqual(sw.getFloatVariable('boolean'),m)      
       
   def test_split_simple_solve(self):
     """
@@ -579,7 +579,7 @@ class Test_SplitWorld(unittest.TestCase):
     sw.addVariable("answer", "float", "SUM")
     sw.addJob(self.eqnJob2)
     sw.runJobs()
-    self.assertEquals(sw.getFloatVariable("answer"),1)
+    self.assertEqual(sw.getFloatVariable("answer"),1)
     
   def test_split_simple_solve_multiple(self):
     """
@@ -596,7 +596,7 @@ class Test_SplitWorld(unittest.TestCase):
         total+=jobid
         jobid+=1
     sw.runJobs()
-    self.assertEquals(sw.getFloatVariable("answer"), total)
+    self.assertEqual(sw.getFloatVariable("answer"), total)
     
   def test_split_simple_and_dummy(self):
     """
@@ -636,7 +636,7 @@ class Test_SplitWorld(unittest.TestCase):
         total=total+(x+1)
     sw.runJobs()
       # expecting this to fail until I work out the answer
-    self.assertEquals(sw.getFloatVariable("answer"),total)    
+    self.assertEqual(sw.getFloatVariable("answer"),total)    
     
     
   def test_split_multiple_batches(self):
@@ -661,7 +661,7 @@ class Test_SplitWorld(unittest.TestCase):
         total=total+(x+1+getMPISizeWorld())
     sw.runJobs()
       # expecting this to fail until I work out the answer
-    self.assertEquals(sw.getFloatVariable("answer"),total)    
+    self.assertEqual(sw.getFloatVariable("answer"),total)    
   
   @unittest.skipIf(getMPISizeWorld()%2!=0, "Test requires even number of processes")
   def test_multiple_equations_size2world(self):
@@ -693,7 +693,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")
@@ -711,7 +711,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")   
@@ -726,7 +726,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)    
+    self.assertEqual(ha, tot)    
 
   @unittest.skipIf(getMPISizeWorld()%4!=0, "Test requires number of processes divisible by 4")
   def test_multiple_equations_size4world(self):
@@ -758,7 +758,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")
@@ -776,7 +776,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")   
@@ -791,7 +791,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=2
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)        
+    self.assertEqual(ha, tot)        
     
     
   def test_multiple_equations_smallworld(self):
@@ -821,7 +821,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")
@@ -839,7 +839,7 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=1      
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)
+    self.assertEqual(ha, tot)
     sw.clearVariable("answer")
     sw.clearVariable("hanswer")
     sw.clearVariable("v")   
@@ -853,4 +853,4 @@ class Test_SplitWorld(unittest.TestCase):
       jobid+=3
     sw.runJobs()
     ha=sw.getFloatVariable("hanswer")
-    self.assertEquals(ha, tot)     
+    self.assertEqual(ha, tot)     
diff --git a/escriptcore/test/python/test_symfuncs.py b/escriptcore/test/python/test_symfuncs.py
index 015bca6..a8cac5a 100644
--- a/escriptcore/test/python/test_symfuncs.py
+++ b/escriptcore/test/python/test_symfuncs.py
@@ -39,6 +39,7 @@ from esys.escript import *
 import esys.escriptcore.utestselect as unittest
 
 class Test_symfuncs(unittest.TestCase):
+    RES_TOL = 1.e-7
 
     #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     def test_div_Symbol(self):
diff --git a/escriptcore/test/python/test_util_NaN_funcs.py b/escriptcore/test/python/test_util_NaN_funcs.py
index 017a65b..ebfd549 100644
--- a/escriptcore/test/python/test_util_NaN_funcs.py
+++ b/escriptcore/test/python/test_util_NaN_funcs.py
@@ -32,6 +32,15 @@ class Test_util_NaN_funcs(unittest.TestCase):
         self.assertTrue(sclNaN.hasNaN(),"sclNaN should contain NaN but its doesn't")
         sclNaN.replaceNaN(15.0)
         self.assertEqual(es.Lsup(sclNaN), 15.0)
+        scl=es.Scalar(0,es.ContinuousFunction(dom))
+        scl.expand()
+        scl.promote()
+        if not es.getEscriptParamInt('AUTOLAZY')==1:            
+            sclNaN=scl/0
+            self.assertTrue(sclNaN.hasNaN(),"sclNaN should contain NaN but its doesn't")
+            sclNaN.replaceNaN(3+4j)
+            self.assertEqual(es.Lsup(sclNaN), 5.0)
+
     
     def test_replaceNaNConstant(self):
         dom=self.domain
@@ -40,6 +49,12 @@ class Test_util_NaN_funcs(unittest.TestCase):
         self.assertTrue(dat.hasNaN(),"dat should contain NaN but its doesn't")
         dat.replaceNaN(10)
         self.assertEqual(es.Lsup(dat), 10)
+        dat = es.Data(10,es.ContinuousFunction(dom))
+        dat.promote()
+        dat=(dat*0)/0
+        self.assertTrue(dat.hasNaN(),"dat should contain NaN but its doesn't")
+        dat.replaceNaN(4+3j)
+        self.assertEqual(es.Lsup(dat), 5)
 
     def test_replaceNaNTagged(self):
         dom=self.domain
@@ -48,3 +63,9 @@ class Test_util_NaN_funcs(unittest.TestCase):
         sigma.setTaggedValue(1 , es.Lsup(dat))
         sigma.replaceNaN(10)
         self.assertEqual(es.Lsup(sigma), 10)
+        sigma = es.Scalar(0,es.FunctionOnBoundary(dom))
+        sigma.promote()
+        dat=(sigma*0)/0
+        sigma.setTaggedValue(1 , es.Lsup(dat))
+        sigma.replaceNaN(3+4j)
+        self.assertEqual(es.Lsup(sigma), 5)
diff --git a/escriptcore/test/python/test_util_binary_no_tagged_data.py b/escriptcore/test/python/test_util_binary_no_tagged_data.py
index f4881f4..ad6915b 100644
--- a/escriptcore/test/python/test_util_binary_no_tagged_data.py
+++ b/escriptcore/test/python/test_util_binary_no_tagged_data.py
@@ -54,6 +54,9 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       ref=-0.42972087524346775
       self.assertTrue(isinstance(res,float),"wrong type of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      res=inner(arg0,arg1*1j)
+      self.assertTrue(isinstance(res,complex),"wrong type of result.")
+      self.assertTrue(Lsup(res-ref*1j)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_float_rank0_array_rank0(self):
       arg0=-0.0120734848986
@@ -72,6 +75,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=Data(-0.028641890754520932j,self.functionspace) 
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_float_rank0_expandedData_rank0(self):
       arg0=-0.702135729267
@@ -83,6 +92,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg1.promote()
+        res=inner(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank0_float_rank0(self):
       arg0=numpy.array(-0.172668075783)
@@ -92,6 +107,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(-0.172668075783j)
+      res=inner(arg0,arg1)
+      ref=ref*1j
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result."+str(type(res)))
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank0_array_rank0(self):
       arg0=numpy.array(0.367217155332)
@@ -101,6 +122,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(0.367217155332j)
+      arg1=numpy.array(0.353427284375j)
+      res=inner(arg0,arg1)
+      ref=numpy.array(-0.12978456198487195)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank1_array_rank1(self):
       arg0=numpy.array([0.17057698496056495, 0.65197508237616231])
@@ -110,6 +138,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array([0.17057698496056495j, 0.65197508237616231j])
+      arg1=numpy.array([0.32087931119793067j, 0.47355654012965243j])
+      res=inner(arg0,arg1)
+      ref=numpy.array(-0.363481689701)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank2_array_rank2(self):
       arg0=numpy.array([[-0.035529960222147716, 0.99948208956536089, 0.29174060409603397, 0.80304088757032743, 
@@ -127,6 +162,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=numpy.array(0.400418822582)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank3_array_rank3(self):
       arg0=numpy.array([[[0.89352907932481784, -0.2813007089590529], [-0.69957301452894849, 0.17341921348802947]], 
@@ -181,6 +223,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank0_constData_rank0(self):
       arg0=numpy.array(-0.810401259044)
@@ -190,6 +239,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(-0.810401259044j)
+      arg1=Data(-0.325113918466j,self.functionspace)
+      res=inner(arg0,arg1)
+      ref=Data(-0.26347272885752965,self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank1_constData_rank1(self):
       arg0=numpy.array([-0.070866698348063917, 0.050603972040196954])
@@ -215,6 +271,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=Data(0.226903257331,self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank3_constData_rank3(self):
       arg0=numpy.array([[[0.10241974677245125, -0.5612793405960681], [0.48956883482708147, 0.21824369883334827]], 
@@ -233,6 +296,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank4_constData_rank4(self):
       arg0=numpy.array([[[[0.016639529837428668, 0.84838979094454325, -0.50467114288267378, -0.54181350086674529], 
@@ -281,6 +351,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(0.685017565314j)
+      if not arg1.isLazy():
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank1_expandedData_rank1(self):
       arg0=numpy.array([-0.33806363855530708, -0.38292988645561055])
@@ -316,6 +394,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=inner(arg0,arg1)
+        ref=-ref
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank3_expandedData_rank3(self):
       arg0=numpy.array([[[0.6807678093224121, -0.57723267525409039], [0.70068737825030447, -0.098432327342198533]], 
@@ -404,6 +490,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      res=res*1j
+      ref=ref*1j
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank0_array_rank0(self):
       arg0=Data(-0.68099713877,self.functionspace)
@@ -413,6 +505,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      res=res*1j
+      ref=ref*1j
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank1_array_rank1(self):
       arg0=Data(numpy.array([0.033542800183077981, 0.33729160037543515]),self.functionspace)
@@ -457,6 +555,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank4_array_rank4(self):
       arg0=Data(numpy.array([[[[0.84900275444177553, -0.246217726373545, -0.73088633570089478, -0.047481367331499991], 
@@ -494,6 +599,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank0_constData_rank0(self):
       arg0=Data(-0.109880112782,self.functionspace)
@@ -503,6 +615,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank1_constData_rank1(self):
       arg0=Data(numpy.array([-0.38855187589347273, 0.14448607308006922]),self.functionspace)
@@ -585,6 +704,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=inner(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank0_expandedData_rank0(self):
       arg0=Data(0.504999017422,self.functionspace)
@@ -732,6 +858,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=inner(arg0,arg1)
+        ref=-ref
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank1_array_rank1(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -857,6 +991,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy() and not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=inner(arg0,arg1)
+        ref=-ref
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank1_constData_rank1(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -869,6 +1011,16 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=msk_ref*(0.114763218027)+(1.-msk_ref)*(0.67942081766)
+        ref=-ref
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank2_constData_rank2(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -918,6 +1070,15 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*(3.91262521622)+(1.-msk_ref)*(-1.17757862828))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank4_constData_rank4(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -972,6 +1133,15 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j      
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*(-0.755579480427)+(1.-msk_ref)*(-0.59812450412))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank0_expandedData_rank0(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -984,6 +1154,15 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j  
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*numpy.array(0.21829611538783306)+(1.-msk_ref)*numpy.array(0.23986351876555856))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank1_expandedData_rank1(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -998,6 +1177,15 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j 
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*(-0.0855848124973)+(1.-msk_ref)*(-0.706977533319))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank2_expandedData_rank2(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -1058,6 +1246,15 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy() and not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j       
+        res=inner(arg0,arg1)
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*(-2.28253572206)+(1.-msk_ref)*(0.205671973309))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank4_expandedData_rank4(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -1133,7 +1330,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       res=outer(arg0,arg1)
       ref=-0.062635374280529704
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result") 
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,complex),"wrong type of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result") 
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_array_rank0(self):
       arg0=0.470792845884
@@ -1143,6 +1346,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=numpy.array(arg1*1j)    
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_constData_rank0(self):
       arg0=0.806587192294
@@ -1152,6 +1362,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j    
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_expandedData_rank0(self):
       arg0=0.62510111232
@@ -1163,6 +1380,16 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j    
+        ref=-ref
+        res=outer(arg0,arg1)        
+        msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
+        ref=-(msk_ref*numpy.array(0.58292971173616104)+(1.-msk_ref)*numpy.array(-0.24958707354962301))
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_array_rank1(self):
       arg0=-0.147579432862
@@ -1172,6 +1399,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j    
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_constData_rank1(self):
       arg0=0.307392721359
@@ -1181,6 +1415,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j    
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_expandedData_rank1(self):
       arg0=-0.533630322003
@@ -1211,6 +1453,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j    
+      ref=-ref
+      res=outer(arg0,arg1)      
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_constData_rank2(self):
       arg0=-0.461803900652
@@ -1255,6 +1504,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j    
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_array_rank3(self):
       arg0=0.329599447984
@@ -1291,6 +1548,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j    
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_expandedData_rank3(self):
       arg0=0.305104655943
@@ -1396,6 +1661,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j    
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_expandedData_rank4(self):
       arg0=0.184226502646
@@ -1474,6 +1746,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank1_float_rank0(self):
       arg0=numpy.array([-0.7077557598550499, 0.34384980137545629])
@@ -1517,6 +1796,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank4_float_rank0(self):
       arg0=numpy.array([[[[-0.87020281833627244, 0.38653786763242937, -0.39861176181522451, -0.65499761474764417], 
@@ -1589,6 +1875,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank3_array_rank0(self):
       arg0=numpy.array([[[0.22890218267521778, 0.14190434160962129], [-0.84482969323319335, 0.91841360418106577]], 
@@ -1643,6 +1936,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(3, 2, 3, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank0_constData_rank0(self):
       arg0=numpy.array(0.887539290361)
@@ -1733,6 +2033,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank0_expandedData_rank0(self):
       arg0=numpy.array(-0.219684990046)
@@ -1779,6 +2086,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=outer(arg0,arg1)
+        ref=-ref      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank3_expandedData_rank0(self):
       arg0=numpy.array([[[0.54236255244194465, 0.75659586809114421], [0.3661458269609339, -0.51975633455419379]], 
@@ -1804,6 +2119,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=outer(arg0,arg1)
+        ref=-ref    
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank4_expandedData_rank0(self):
       arg0=numpy.array([[[[-0.822250636341469, 0.63638809184653833, 0.52959023053761745, -0.0036888432821744477], 
@@ -1897,6 +2220,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref    
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank3_array_rank1(self):
       arg0=numpy.array([[[-0.11650349333382004, -0.93704280994921563], [0.16842803438649012, -0.29159530904824105]], 
@@ -1959,6 +2289,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=outer(arg0,arg1)
+      ref=-ref    
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank3_constData_rank1(self):
       arg0=numpy.array([[[-0.66447070728135871, 0.10916086666511138], [0.26746102869120603, -0.70631701651855239]], 
@@ -1994,6 +2331,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+          arg0=numpy.array(arg0*1j)
+          arg1=arg1*1j
+          ref=-ref
+          res=outer(arg0,arg1)
+          self.assertTrue(isinstance(res,Data),"wrong type of result.")
+          self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+          self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")          
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank1_expandedData_rank1(self):
       arg0=numpy.array([-0.6816375542209947, -0.73859164214492656])
@@ -2008,6 +2353,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+          arg0=numpy.array(arg0*1j)
+          arg1=arg1*1j
+          ref=-ref
+          res=outer(arg0,arg1)
+          self.assertTrue(isinstance(res,Data),"wrong type of result.")
+          self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+          self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")          
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank2_expandedData_rank1(self):
       arg0=numpy.array([[0.11413458360117934, -0.39371474720074584, -0.52902000724565745, 0.4389579328234896, 
@@ -2209,6 +2562,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")          
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank0_constData_rank2(self):
       arg0=numpy.array(0.372750342838)
@@ -2226,6 +2586,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=numpy.array(arg0*1j)
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank1_constData_rank2(self):
       arg0=numpy.array([-0.7248354981517402, -0.71175522139340708])
@@ -2371,6 +2739,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=numpy.array(arg0*1j)
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank1_expandedData_rank2(self):
       arg0=numpy.array([0.45174073833470918, -0.32999422808025969])
@@ -2589,6 +2965,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=numpy.array(arg0*1j)
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank0_array_rank3(self):
       arg0=numpy.array(-0.409786441515)
@@ -2883,6 +3267,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_constData_rank0_float_rank0(self):
       arg0=Data(0.855868237407,self.functionspace)
@@ -3228,6 +3620,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1) 
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_constData_rank4_expandedData_rank0(self):
       arg0=Data(numpy.array([[[[-0.83504325403320423, 0.72986446282641149, 0.86742233864227591, -0.0059036297732719678], 
@@ -4611,6 +5011,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=numpy.array(arg1*1j)
+        ref=-ref
+        res=outer(arg0, arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_expandedData_rank0_constData_rank0(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -5489,6 +5897,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_expandedData_rank1_constData_rank2(self):
       msk_arg0=whereNegative(self.functionspace.getX()[0]-0.5)
@@ -6331,6 +6747,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_maximum_float_rank0_float_rank0(self):
       arg0=0.190833616561
@@ -14268,6 +14692,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank2_array_rank2(self):
       arg0=numpy.array([[-1.0, 5.0, -3.0], [5.0, 7.0, 0.0]])
@@ -14286,6 +14717,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank2_constData_rank2(self):
       arg0=numpy.array([[-5.0, 0.0, -6.0], [-6.0, 2.0, 4.0]])
@@ -14295,6 +14733,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank2_expandedData_rank1(self):
       arg0=numpy.array([[3.0, 5.0, -3.0], [4.0, -5.0, -2.0]])
@@ -14306,6 +14751,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank2_expandedData_rank2(self):
       arg0=numpy.array([[6.0, 7.0, 4.0], [-1.0, -4.0, 4.0]])
@@ -14334,6 +14787,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)        
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_array_rank3(self):
       arg0=numpy.array([[[[6.0, 6.0, 2.0], [0.0, 5.0, 0.0]], [[-6.0, -2.0, -4.0], [-2.0, 4.0, -3.0]], [[-5.0, -6.0, 0.0], [5.0, 
@@ -14426,6 +14886,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)    
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_constData_rank4(self):
       arg0=numpy.array([[[[3.0, -7.0, -1.0], [2.0, 7.0, 0.0]], [[-3.0, 7.0, -6.0], [5.0, -5.0, 5.0]], [[5.0, 7.0, 3.0], [3.0, 
@@ -14489,6 +14956,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)        
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_expandedData_rank3(self):
       arg0=numpy.array([[[[-3.0, 1.0, -6.0], [-6.0, 7.0, -4.0]], [[5.0, 2.0, 5.0], [7.0, -3.0, -2.0]], [[-7.0, 6.0, 5.0], [0.0, 
@@ -14512,6 +14987,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)   
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_expandedData_rank4(self):
       arg0=numpy.array([[[[-5.0, -7.0, 4.0], [-7.0, 3.0, -7.0]], [[3.0, -2.0, 7.0], [-6.0, 2.0, 6.0]], [[-1.0, 6.0, -4.0], 
@@ -14620,6 +15103,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_constData_rank2_expandedData_rank1(self):
       arg0=Data(numpy.array([[6.0, 6.0, -5.0], [5.0, -6.0, -6.0]]),self.functionspace)
@@ -14659,6 +15149,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_constData_rank4_array_rank3(self):
       arg0=Data(numpy.array([[[[-5.0, -4.0, 7.0], [0.0, 5.0, -2.0]], [[1.0, 2.0, -7.0], [6.0, 0.0, 5.0]], [[-2.0, 5.0, 2.0], 
@@ -14838,6 +15335,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")                
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_constData_rank4_expandedData_rank4(self):
       arg0=Data(numpy.array([[[[-2.0, 0.0, -7.0], [-6.0, 0.0, 3.0]], [[-7.0, 0.0, -4.0], [-6.0, 5.0, 3.0]], [[6.0, 5.0, -3.0], 
@@ -15136,6 +15641,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_expandedData_rank4_constData_rank3(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -15384,6 +15897,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       ref=42.0
       self.assertTrue(isinstance(res,float),"wrong type of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,complex),"wrong type of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_float_rank0_array_rank0_offset0(self):
       arg0=0.0
@@ -15413,6 +15932,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_float_rank0_array_rank3_offset0(self):
       arg0=-6.0
@@ -15496,6 +16022,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_float_rank0_expandedData_rank0_offset0(self):
       arg0=7.0
@@ -15623,6 +16156,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank4_array_rank4_offset4(self):
       arg0=numpy.array([[[[0.0, 5.0], [-6.0, -7.0], [-5.0, 1.0]], [[1.0, 3.0], [-6.0, 1.0], [-3.0, -1.0]]], [[[-4.0, 0.0], 
@@ -15654,6 +16194,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank2_array_rank3_offset2(self):
       arg0=numpy.array([[4.0, 2.0, 5.0], [-5.0, 6.0, -6.0]])
@@ -15663,6 +16210,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_array_rank4_offset3(self):
       arg0=numpy.array([[[-5.0, -4.0, 7.0], [-6.0, -3.0, 1.0], [4.0, -7.0, 1.0], [7.0, 5.0, 2.0]], [[2.0, 0.0, 0.0], [5.0, 
@@ -15806,6 +16360,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=4)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank0_constData_rank1_offset0(self):
       arg0=numpy.array(-5.0)
@@ -16123,6 +16685,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=numpy.array(arg0*1j)
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank1_expandedData_rank4_offset1(self):
       arg0=numpy.array([-5.0, -6.0, -3.0])
@@ -16145,6 +16715,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=numpy.array(arg0*1j)
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank0_expandedData_rank4_offset0(self):
       arg0=numpy.array(-1.0)
@@ -16355,6 +16933,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank4_constData_rank3_offset3(self):
       arg0=numpy.array([[[[-3.0, 3.0, 5.0], [-4.0, -2.0, 7.0], [-1.0, -1.0, -1.0], [-6.0, -3.0, 6.0]], [[-7.0, -7.0, 1.0], 
@@ -16647,6 +17232,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank1_expandedData_rank3_offset0(self):
       arg0=numpy.array([-6.0, 6.0])
@@ -17250,6 +17843,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_expandedData_rank3_offset1(self):
       arg0=numpy.array([[[-4.0, 7.0, -5.0], [-6.0, -3.0, 0.0], [1.0, -1.0, -5.0], [-4.0, 5.0, 7.0], [5.0, 7.0, -7.0]], [[1.0, 
@@ -17386,6 +17987,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_float_rank0_offset0(self):
       arg0=numpy.array([[[-4.0, -1.0], [-5.0, 6.0]], [[6.0, 4.0], [-3.0, -2.0]], [[4.0, -2.0], [-2.0, -6.0]], [[-5.0, -6.0], 
@@ -17517,6 +18126,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+         arg0=arg0*1j
+         arg1=arg1*1j
+         ref=-ref
+         res=generalTensorProduct(arg0,arg1,axis_offset=0)
+         self.assertTrue(isinstance(res,Data),"wrong type of result.")
+         self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+         self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")         
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank4_expandedData_rank1_offset1(self):
       arg0=numpy.array([[[[-5.0, 0.0, 6.0], [-2.0, 7.0, -1.0]], [[-3.0, -3.0, -1.0], [4.0, 6.0, 0.0]]], [[[-6.0, 3.0, 7.0], 
@@ -17720,6 +18337,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=1j*arg0
+      arg1=1j*arg1
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank2_array_rank3_offset2(self):
       arg0=Data(numpy.array([[-2.0, 4.0, 5.0], [0.0, -3.0, 0.0]]),self.functionspace)
@@ -17913,6 +18537,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=generalTensorProduct(arg0,arg1,axis_offset=3)
+      ref=-ref
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank0_constData_rank2_offset0(self):
       arg0=Data(-1.0,self.functionspace)
@@ -17978,6 +18609,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank0_constData_rank4_offset0(self):
       arg0=Data(-5.0,self.functionspace)
@@ -18096,6 +18734,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=1j*arg0
+        arg1=1j*arg1
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank3_expandedData_rank4_offset3(self):
       arg0=Data(numpy.array([[[-1.0, 4.0, 3.0], [4.0, 0.0, 3.0], [3.0, -4.0, -6.0], [7.0, 5.0, -4.0]], [[1.0, 4.0, -1.0], [5.0, 
@@ -18237,6 +18883,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank1_float_rank0_offset0(self):
       arg0=Data(numpy.array([3.0, -7.0]),self.functionspace)
@@ -18303,6 +18957,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank3_array_rank3_offset2(self):
       arg0=Data(numpy.array([[[-7.0, -7.0, 0.0], [6.0, 5.0, 7.0]], [[2.0, -7.0, 3.0], [2.0, 3.0, -2.0]]]),self.functionspace)
@@ -18921,6 +19582,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank4_array_rank4_offset2(self):
       arg0=Data(numpy.array([[[[-6.0, 7.0, 1.0], [0.0, 0.0, -5.0]], [[-4.0, 7.0, -5.0], [2.0, 0.0, -7.0]], [[3.0, -1.0, 4.0], 
@@ -19458,6 +20126,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank3_float_rank0_offset0(self):
       arg0=Data(numpy.array([[[4.0, 2.0], [-1.0, 6.0]], [[3.0, 3.0], [7.0, 2.0]], [[-2.0, -4.0], [1.0, -6.0]], [[-6.0, -5.0], 
@@ -19832,6 +20508,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank3_array_rank4_offset3(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -20307,6 +20991,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank0_expandedData_rank2_offset0(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -20369,6 +21061,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank0_expandedData_rank3_offset0(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -21434,6 +22134,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank3_constData_rank1_offset1(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -22194,6 +22902,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank4_constData_rank1_offset1(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -22462,6 +23178,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_array_rank2_array_rank2(self):
       arg0=numpy.array([[6.0, 3.0, 3.0], [7.0, 3.0, 6.0]])
@@ -22512,6 +23235,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        res=matrix_mult(arg0,arg1)
+        ref=-ref
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_constData_rank2_array_rank1(self):
       arg0=Data(numpy.array([[-7.0, -3.0, -3.0], [-1.0, 7.0, -7.0]]),self.functionspace)
@@ -22595,6 +23326,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_expandedData_rank2_constData_rank1(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -22632,6 +23371,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_expandedData_rank2_expandedData_rank2(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -22705,6 +23452,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_constData_rank2_array_rank1(self):
       arg0=Data(numpy.array([[-7.0, 1.0], [5.0, 0.0], [3.0, -3.0]]),self.functionspace)
@@ -22723,6 +23478,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=transposed_matrix_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_constData_rank2_constData_rank1(self):
       arg0=Data(numpy.array([[-7.0, 5.0], [1.0, -7.0], [2.0, 1.0]]),self.functionspace)
@@ -22752,6 +23514,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_constData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[-3.0, 1.0], [0.0, 0.0], [-2.0, 4.0]]),self.functionspace)
@@ -22825,6 +23595,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_expandedData_rank2_expandedData_rank2(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -22898,6 +23676,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_array_rank4_array_rank2(self):
       arg0=numpy.array([[[[-1.0, -6.0, 4.0, 4.0, -2.0], [5.0, 0.0, -2.0, -2.0, -4.0], [-1.0, -2.0, -7.0, -6.0, 3.0], [3.0, 
@@ -22984,6 +23770,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=transposed_tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_array_rank4_constData_rank3(self):
       arg0=numpy.array([[[[-1.0, 3.0, 4.0, -3.0, 0.0], [7.0, 2.0, 6.0, -6.0, -4.0], [-5.0, -2.0, 4.0, 5.0, -7.0], [0.0, -4.0, 
@@ -23155,6 +23948,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_constData_rank2_array_rank1(self):
       arg0=Data(numpy.array([[3.0, -4.0], [-2.0, 5.0], [0.0, -7.0]]),self.functionspace)
@@ -23164,6 +23965,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=transposed_tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")    
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_constData_rank2_array_rank2(self):
       arg0=Data(numpy.array([[3.0, -3.0], [0.0, -6.0], [-2.0, 2.0]]),self.functionspace)
@@ -23202,6 +24010,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_constData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[7.0, 6.0], [4.0, -5.0], [7.0, -4.0]]),self.functionspace)
@@ -23493,6 +24309,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_expandedData_rank2_array_rank2(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -23607,6 +24431,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_expandedData_rank4_array_rank4(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -23937,6 +24769,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_float_rank0_float_rank0_offset0(self):
       arg0=-3.0
@@ -23945,6 +24785,12 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       ref=-3.0
       self.assertTrue(isinstance(res,float),"wrong type of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,complex),"wrong type of result. Got "+str(type(res)))
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_float_rank0_array_rank0_offset0(self):
       arg0=-3.0
@@ -23963,6 +24809,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_float_rank0_array_rank2_offset0(self):
       arg0=-6.0
@@ -24030,6 +24883,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_float_rank0_constData_rank3_offset0(self):
       arg0=2.0
@@ -24317,6 +25177,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      ref=-ref
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(3, 2, 3, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank0_constData_rank0_offset0(self):
       arg0=numpy.array(4.0)
@@ -25209,6 +26076,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank1_expandedData_rank3_offset0(self):
       arg0=numpy.array([0.0, -3.0])
@@ -25603,6 +26478,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank4_constData_rank4_offset2(self):
       arg0=numpy.array([[[[5.0, -6.0, -4.0, 6.0, 2.0], [4.0, -3.0, -3.0, 1.0, 3.0], [2.0, 7.0, 3.0, 7.0, 3.0], [6.0, 7.0, -1.0, 
@@ -25869,6 +26751,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank4_expandedData_rank4_offset2(self):
       arg0=numpy.array([[[[-6.0, -1.0, -3.0, -2.0, -7.0], [-1.0, -3.0, -6.0, 1.0, 3.0], [0.0, -1.0, -2.0, -5.0, 3.0], [-4.0, 
@@ -26058,6 +26948,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank3_expandedData_rank0_offset0(self):
       arg0=numpy.array([[[0.0, -3.0], [1.0, 4.0]], [[-7.0, -2.0], [2.0, 1.0]], [[0.0, 0.0], [3.0, 5.0]], [[2.0, 1.0], [-4.0, 
@@ -26228,6 +27125,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_constData_rank2_array_rank2_offset2(self):
       arg0=Data(numpy.array([[-7.0, -2.0, 0.0], [-5.0, 0.0, -3.0]]),self.functionspace)
@@ -26645,6 +27549,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_constData_rank2_expandedData_rank3_offset2(self):
       arg0=Data(numpy.array([[2.0, 7.0, 0.0], [4.0, 5.0, 0.0]]),self.functionspace)
@@ -27165,6 +28077,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_constData_rank1_expandedData_rank1_offset0(self):
       arg0=Data(numpy.array([1.0, 3.0]),self.functionspace)
@@ -27682,6 +28602,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_constData_rank4_constData_rank4_offset2(self):
       arg0=Data(numpy.array([[[[-2.0, 3.0, -3.0, 5.0, -2.0], [7.0, 5.0, 1.0, 6.0, -6.0], [2.0, -7.0, 7.0, -6.0, 7.0], [7.0, 
@@ -31011,6 +31938,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_transposed_mult_array_rank2_array_rank2(self):
       arg0=numpy.array([[5.0, -3.0, -5.0], [3.0, 1.0, 2.0]])
@@ -31020,6 +31955,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_transposed_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_transposed_mult_array_rank2_constData_rank2(self):
       arg0=numpy.array([[4.0, 1.0, -7.0], [0.0, 2.0, 6.0]])
@@ -31041,6 +31983,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=matrix_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_transposed_mult_constData_rank2_array_rank2(self):
       arg0=Data(numpy.array([[3.0, 1.0, -6.0], [5.0, -6.0, -3.0]]),self.functionspace)
@@ -31059,6 +32009,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_transposed_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_transposed_mult_constData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[0.0, -1.0, 1.0], [-5.0, 2.0, 0.0]]),self.functionspace)
@@ -31118,6 +32075,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_transposed_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_array_rank2_array_rank2(self):
       arg0=numpy.array([[1.0, -6.0, -3.0], [-5.0, 0.0, -1.0]])
@@ -31156,6 +32120,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_array_rank2_expandedData_rank2(self):
       arg0=numpy.array([[0.0, 4.0, 6.0], [-4.0, -5.0, -5.0]])
@@ -31320,6 +32292,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_transposed_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_array_rank4_expandedData_rank2(self):
       arg0=numpy.array([[[[-7.0, -7.0, 0.0], [1.0, 7.0, -7.0]], [[-1.0, -5.0, 6.0], [-3.0, -1.0, 6.0]], [[4.0, 3.0, -3.0], 
@@ -31763,6 +32742,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_expandedData_rank2_array_rank1(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -32245,9 +33232,16 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       arg1=numpy.array(-7.0)
       res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
       ref=numpy.array(-0.0)
-      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result."+str(type(res)))
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=numpy.array(arg1*1j)
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+      ref=-ref
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result."+str(type(res)))
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_float_rank0_array_rank1_offset0(self):
       arg0=4.0
@@ -32324,6 +33318,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_float_rank0_constData_rank3_offset0(self):
       arg0=2.0
@@ -32491,6 +33492,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=4)
+      self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
+      self.assertEqual(res.shape,(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank0_array_rank1_offset0(self):
       arg0=numpy.array(4.0)
@@ -32628,6 +33636,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank2_constData_rank2_offset2(self):
       arg0=numpy.array([[-1.0, 3.0, 5.0], [5.0, -6.0, 2.0]])
@@ -32725,6 +33740,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank2_constData_rank4_offset2(self):
       arg0=numpy.array([[-5.0, 5.0, 3.0], [-5.0, -4.0, -3.0]])
@@ -32848,6 +33870,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=4)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank0_expandedData_rank1_offset0(self):
       arg0=numpy.array(-2.0)
@@ -34053,6 +35083,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank2_expandedData_rank2_offset0(self):
       arg0=numpy.array([[-5.0, 0.0, -4.0, 1.0, -2.0], [-1.0, 2.0, 5.0, -3.0, -3.0], [-1.0, 7.0, 6.0, 2.0, -2.0], [-3.0, 3.0, 
@@ -34695,6 +35733,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank2_constData_rank2_offset2(self):
       arg0=Data(numpy.array([[0.0, -6.0, 3.0], [0.0, 0.0, 2.0]]),self.functionspace)
@@ -34898,6 +35943,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank4_expandedData_rank4_offset4(self):
       arg0=Data(numpy.array([[[[0.0, 1.0], [0.0, 1.0], [-6.0, 6.0]], [[1.0, -3.0], [3.0, -1.0], [6.0, -5.0]]], [[[4.0, 6.0], 
@@ -35309,6 +36362,13 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank3_constData_rank3_offset2(self):
       arg0=Data(numpy.array([[[2.0, 0.0, 2.0], [0.0, 0.0, 5.0]], [[1.0, 4.0, 0.0], [-7.0, 2.0, 3.0]]]),self.functionspace)
@@ -35544,6 +36604,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg1.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank3_expandedData_rank4_offset2(self):
       arg0=Data(numpy.array([[[-5.0, 6.0, 1.0], [-5.0, 6.0, 5.0]], [[-4.0, 5.0, 3.0], [4.0, -6.0, 0.0]]]),self.functionspace)
@@ -37554,6 +38622,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_expandedData_rank1_constData_rank1_offset0(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
@@ -39181,6 +40257,14 @@ class Test_util_binary_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not arg0.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_expandedData_rank4_expandedData_rank2_offset1(self):
       msk_arg0=1-whereZero(self.functionspace.getX()[0],1.e-8)
diff --git a/escriptcore/test/python/test_util_binary_with_tagged_data.py b/escriptcore/test/python/test_util_binary_with_tagged_data.py
index 995c389..74dfc7b 100644
--- a/escriptcore/test/python/test_util_binary_with_tagged_data.py
+++ b/escriptcore/test/python/test_util_binary_with_tagged_data.py
@@ -57,6 +57,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank0_taggedData_rank0(self):
       arg0=numpy.array(0.0765648367337)
@@ -68,6 +75,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)     
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank1_taggedData_rank1(self):
       arg0=numpy.array([-0.86109887228266446, 0.80664771143714975])
@@ -79,6 +93,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank2_taggedData_rank2(self):
       arg0=numpy.array([[0.28546214116183566, -0.25897102118715942, 0.22146243895717355, -0.2341748097231191, 
@@ -102,6 +123,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank3_taggedData_rank3(self):
       arg0=numpy.array([[[-0.36965911393496653, 0.98091128998261667], [-0.77339818424681805, -0.12759468235415383]], 
@@ -127,6 +155,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_array_rank4_taggedData_rank4(self):
       arg0=numpy.array([[[[0.7604938557994263, -0.2064171490048079, 0.0082224477660097772, 0.47661474451358177], 
@@ -180,6 +215,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank0_taggedData_rank0(self):
       arg0=Data(0.124403467791,self.functionspace)
@@ -191,6 +233,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_constData_rank1_taggedData_rank1(self):
       arg0=Data(numpy.array([-0.87777216173835626, 0.84139785859967287]),self.functionspace)
@@ -385,6 +434,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=inner(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_taggedData_rank4_array_rank4(self):
       arg0=Data(numpy.array([[[[-0.31697817736254796, -0.78512493043425224, 0.86570946770719948, -0.30099839115644333], 
@@ -871,6 +927,15 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank0_taggedData_rank0(self):
       arg0=Data(0.947623884385,self.functionspace)
@@ -884,6 +949,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank1_taggedData_rank1(self):
       arg0=Data(numpy.array([-0.28803269141787369, 0.8093167451154808]),self.functionspace)
@@ -897,6 +970,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank2_taggedData_rank2(self):
       arg0=Data(numpy.array([[0.12356938074285129, -0.01920853546063106, 0.99797588924346758, 0.1814521358364265, 
@@ -926,6 +1007,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank3_taggedData_rank3(self):
       arg0=Data(numpy.array([[[-0.72975553555244721, -0.30546473305749067], [-0.56080228686254463, 0.19196525884555093]], 
@@ -959,6 +1048,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=inner(arg0,arg1)      
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inner_expandedData_rank4_taggedData_rank4(self):
       arg0=Data(numpy.array([[[[-0.81947004030336634, 0.67058302145475457, -0.7459191324332497, -0.56685991265962676], 
@@ -1050,6 +1147,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_taggedData_rank2(self):
       arg0=-0.2887604825
@@ -1077,6 +1181,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_taggedData_rank3(self):
       arg0=-0.776324255672
@@ -1108,6 +1219,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_float_rank0_taggedData_rank4(self):
       arg0=-0.833021296853
@@ -1245,6 +1363,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1) 
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank4_taggedData_rank0(self):
       arg0=numpy.array([[[[-0.56243952285768684, -0.38055747729341571, 0.26506266794294953, 0.28759030643565464], 
@@ -1309,6 +1434,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)     
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank1_taggedData_rank1(self):
       arg0=numpy.array([0.92947119363174102, 0.19269891600491529])
@@ -1322,6 +1454,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)   
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank2_taggedData_rank1(self):
       arg0=numpy.array([[-0.75993582440916163, 0.027687316844169141, -0.66568939935561344, 0.1141542489676417, 
@@ -1448,6 +1587,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=numpy.array(arg0*1j)
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1) 
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank2_taggedData_rank2(self):
       arg0=numpy.array([[-0.26931146648060356, 0.91506700842357525, -0.73706998264364509, -0.28720133727824182, 
@@ -1631,6 +1777,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_array_rank0_taggedData_rank3(self):
       arg0=numpy.array(0.704027105071)
@@ -1790,6 +1943,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_constData_rank2_taggedData_rank0(self):
       arg0=Data(numpy.array([[0.4990759241891225, -0.71562567842653735, -0.71491441859956928, -0.16977617098779474, 
@@ -1947,6 +2107,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_constData_rank3_taggedData_rank1(self):
       arg0=Data(numpy.array([[[0.90458365695583587, -0.95884760839529637], [-0.28861525659205856, 0.71780550056875736]], 
@@ -2532,6 +2699,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank2_array_rank0(self):
       arg0=Data(numpy.array([[0.38496421777624312, -0.024425396136277033, -0.63820129158073424, -0.28050797131720473, 
@@ -2969,6 +3143,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank1_expandedData_rank0(self):
       arg0=Data(numpy.array([-0.093748755478083812, 0.46462167631749418]),self.functionspace)
@@ -3045,6 +3227,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank4_expandedData_rank0(self):
       arg0=Data(numpy.array([[[[0.18797918717000339, 0.73794688990673518, 0.23017202170024698, -0.96422361533901357], 
@@ -3213,6 +3403,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank0_constData_rank1(self):
       arg0=Data(-0.204230581137,self.functionspace)
@@ -3413,6 +3610,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank0_expandedData_rank1(self):
       arg0=Data(-0.775151292954,self.functionspace)
@@ -3427,6 +3631,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank1_expandedData_rank1(self):
       arg0=Data(numpy.array([0.0074249411735971549, -0.011510527886748667]),self.functionspace)
@@ -3442,6 +3654,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank2_expandedData_rank1(self):
       arg0=Data(numpy.array([[0.57462282234158835, -0.57043863758709579, 0.65033759305793537, -0.5361596538855089, 
@@ -3521,6 +3741,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6,2,2,2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank0_array_rank2(self):
       arg0=Data(-0.422379565542,self.functionspace)
@@ -4523,6 +4751,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=outer(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6,2,2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank1_array_rank3(self):
       arg0=Data(numpy.array([-0.57447826335126084, -0.54768328937179001]),self.functionspace)
@@ -4721,6 +4956,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(6,2,2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_taggedData_rank1_expandedData_rank3(self):
       arg0=Data(numpy.array([-0.33790913306894765, -0.8715614151541422]),self.functionspace)
@@ -5006,6 +5249,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3,2,3,4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_expandedData_rank0_taggedData_rank0(self):
       arg0=Data(0.321320071636,self.functionspace)
@@ -5603,6 +5854,15 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,6,2,2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
+      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_outer_expandedData_rank0_taggedData_rank4(self):
       arg0=Data(-0.7253342588,self.functionspace)
@@ -5672,6 +5932,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=outer(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3,2,3,4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_maximum_float_rank0_taggedData_rank0(self):
       arg0=0.749376761194
@@ -12543,6 +12811,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")            
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank2_taggedData_rank2(self):
       arg0=numpy.array([[2.0, 4.0, 0.0], [-2.0, 4.0, 1.0]])
@@ -12554,6 +12829,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_taggedData_rank2(self):
       arg0=numpy.array([[[[6.0, -3.0, -4.0], [0.0, 0.0, 0.0]], [[6.0, 6.0, -3.0], [5.0, -4.0, 1.0]], [[0.0, 0.0, 5.0], [1.0, 
@@ -12573,6 +12855,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4,5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_array_rank4_taggedData_rank3(self):
       arg0=numpy.array([[[[3.0, 6.0, 6.0], [-7.0, 0.0, -5.0]], [[1.0, -4.0, 5.0], [7.0, -5.0, 2.0]], [[0.0, -3.0, -1.0], [4.0, 
@@ -12667,6 +12956,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4,5,4,5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_constData_rank2_taggedData_rank1(self):
       arg0=Data(numpy.array([[4.0, 5.0, -3.0], [0.0, -2.0, -4.0]]),self.functionspace)
@@ -12730,6 +13026,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4,5,2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_constData_rank4_taggedData_rank4(self):
       arg0=Data(numpy.array([[[[-4.0, 0.0, 5.0], [0.0, -4.0, 3.0]], [[7.0, -1.0, 4.0], [-3.0, -2.0, -1.0]], [[2.0, 1.0, 7.0], 
@@ -12813,6 +13116,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_taggedData_rank2_array_rank2(self):
       arg0=Data(numpy.array([[-6.0, 0.0, -2.0], [4.0, -6.0, 0.0]]),self.functionspace)
@@ -12883,6 +13193,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_taggedData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[-6.0, 4.0, 4.0], [6.0, -4.0, -3.0]]),self.functionspace)
@@ -13020,6 +13338,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4,5,4,5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_taggedData_rank4_constData_rank2(self):
       arg0=Data(numpy.array([[[[-1.0, -2.0, 2.0], [2.0, -4.0, -5.0]], [[2.0, 2.0, 2.0], [4.0, -3.0, -5.0]], [[0.0, -6.0, -1.0], 
@@ -13416,6 +13741,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4,5,4,5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_expandedData_rank2_taggedData_rank1(self):
       arg0=Data(numpy.array([[-6.0, -3.0, -3.0], [5.0, 7.0, 0.0]]),self.functionspace)
@@ -13429,6 +13762,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_expandedData_rank2_taggedData_rank2(self):
       arg0=Data(numpy.array([[0.0, -4.0, 0.0], [-3.0, -7.0, -3.0]]),self.functionspace)
@@ -13469,6 +13810,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4,5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_mult_expandedData_rank4_taggedData_rank3(self):
       arg0=Data(numpy.array([[[[0.0, -6.0, 5.0], [-3.0, 6.0, 2.0]], [[-7.0, -3.0, 2.0], [0.0, 0.0, 7.0]], [[-4.0, 0.0, -5.0], 
@@ -13592,6 +13941,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_float_rank0_taggedData_rank1_offset0(self):
       arg0=-4.0
@@ -13603,6 +13959,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_float_rank0_taggedData_rank2_offset0(self):
       arg0=7.0
@@ -13659,6 +14022,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(3,2,3,4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank0_taggedData_rank0_offset0(self):
       arg0=numpy.array(-7.0)
@@ -13692,6 +14062,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_taggedData_rank3_offset3(self):
       arg0=numpy.array([[[7.0, -3.0, -5.0], [0.0, -5.0, 5.0], [0.0, 2.0, -6.0], [-7.0, 3.0, 0.0]], [[-3.0, 4.0, 5.0], [5.0, 
@@ -13734,6 +14111,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank1_taggedData_rank2_offset1(self):
       arg0=numpy.array([-4.0, -3.0, 3.0])
@@ -13757,6 +14141,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_taggedData_rank4_offset3(self):
       arg0=numpy.array([[[-1.0, -2.0, 6.0], [-4.0, -2.0, 0.0], [-4.0, 1.0, 6.0], [6.0, -1.0, -6.0]], [[3.0, 0.0, 5.0], [4.0, 
@@ -13788,6 +14179,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1, axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4,5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank1_taggedData_rank3_offset1(self):
       arg0=numpy.array([2.0, 1.0, -3.0])
@@ -13847,6 +14245,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6,2,2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")           
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank1_taggedData_rank4_offset1(self):
       arg0=numpy.array([0.0, 2.0, -3.0])
@@ -13915,6 +14320,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_taggedData_rank2_offset2(self):
       arg0=numpy.array([[[7.0, 1.0, -2.0], [3.0, 4.0, -1.0]], [[-4.0, 0.0, 3.0], [-5.0, -7.0, 5.0]]])
@@ -13963,6 +14375,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank3_taggedData_rank3_offset2(self):
       arg0=numpy.array([[[-4.0, 4.0, 1.0], [-7.0, 7.0, -2.0]], [[0.0, 4.0, -2.0], [4.0, 2.0, -7.0]]])
@@ -14009,6 +14428,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank2_taggedData_rank3_offset1(self):
       arg0=numpy.array([[6.0, -5.0, 1.0], [-4.0, -6.0, 0.0]])
@@ -14205,6 +14631,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_array_rank2_taggedData_rank2_offset0(self):
       arg0=numpy.array([[-5.0, -2.0, 2.0, -1.0, 1.0], [3.0, -4.0, 7.0, 0.0, -1.0], [-3.0, -1.0, -4.0, -4.0, 3.0], [1.0, -6.0, 
@@ -14515,6 +14948,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank3_taggedData_rank3_offset3(self):
       arg0=Data(numpy.array([[[5.0, -3.0, 3.0], [-5.0, 7.0, 1.0], [3.0, 6.0, 0.0], [7.0, 7.0, 3.0]], [[-3.0, 3.0, -4.0], [-5.0, 
@@ -14599,6 +15039,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank0_taggedData_rank2_offset0(self):
       arg0=Data(1.0,self.functionspace)
@@ -14729,6 +15176,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank2_taggedData_rank1_offset1(self):
       arg0=Data(numpy.array([[7.0, 4.0, -5.0], [3.0, -4.0, -1.0]]),self.functionspace)
@@ -14883,6 +15337,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank1_taggedData_rank3_offset0(self):
       arg0=Data(numpy.array([-6.0, 2.0]),self.functionspace)
@@ -15087,6 +15548,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_constData_rank3_taggedData_rank3_offset1(self):
       arg0=Data(numpy.array([[[-2.0, -2.0, -1.0], [0.0, -7.0, -7.0], [7.0, 2.0, 5.0], [6.0, 2.0, 3.0], [-7.0, 6.0, 0.0]], 
@@ -15323,6 +15791,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank0_array_rank0_offset0(self):
       arg0=Data(-4.0,self.functionspace)
@@ -15484,6 +15959,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank0_array_rank3_offset0(self):
       arg0=Data(3.0,self.functionspace)
@@ -15584,6 +16066,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank4_constData_rank4_offset4(self):
       arg0=Data(numpy.array([[[[-1.0, -7.0], [3.0, -5.0], [4.0, 0.0]], [[5.0, -6.0], [6.0, -5.0], [-2.0, 0.0]]], [[[1.0, 0.0], 
@@ -16033,6 +16522,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank2_expandedData_rank2_offset2(self):
       arg0=Data(numpy.array([[4.0, 4.0, 2.0], [-3.0, -4.0, -1.0]]),self.functionspace)
@@ -16087,6 +16584,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=4)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank0_expandedData_rank1_offset0(self):
       arg0=Data(5.0,self.functionspace)
@@ -16184,6 +16689,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank2_expandedData_rank4_offset2(self):
       arg0=Data(numpy.array([[0.0, 0.0, 0.0], [2.0, -7.0, 7.0]]),self.functionspace)
@@ -16543,6 +17056,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank2_constData_rank2_offset1(self):
       arg0=Data(numpy.array([[5.0, -4.0, -1.0], [-1.0, 3.0, 7.0]]),self.functionspace)
@@ -16928,6 +17448,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank3_expandedData_rank2_offset2(self):
       arg0=Data(numpy.array([[[-4.0, 0.0, 3.0], [5.0, -4.0, -3.0]], [[1.0, 4.0, 7.0], [-5.0, -1.0, -1.0]]]),self.functionspace)
@@ -17395,6 +17923,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_taggedData_rank4_array_rank4_offset2(self):
       arg0=Data(numpy.array([[[[6.0, -7.0, 2.0], [0.0, 0.0, -7.0]], [[-7.0, -6.0, 0.0], [5.0, 6.0, 7.0]], [[6.0, 0.0, 3.0], 
@@ -18975,6 +19510,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorProduct(arg0,arg1,axis_offset=2)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorProduct_expandedData_rank3_taggedData_rank3_offset3(self):
       arg0=Data(numpy.array([[[-3.0, 6.0, 0.0], [5.0, 4.0, 6.0], [6.0, -2.0, 4.0], [0.0, -7.0, -7.0]], [[2.0, 1.0, 5.0], [-1.0, 
@@ -19918,6 +20461,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_array_rank2_taggedData_rank2(self):
       arg0=numpy.array([[1.0, 2.0, -1.0], [3.0, -5.0, -1.0]])
@@ -19951,6 +20501,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_mult_taggedData_rank2_array_rank1(self):
       arg0=Data(numpy.array([[-4.0, -6.0, -6.0], [6.0, 0.0, -2.0]]),self.functionspace)
@@ -20071,6 +20628,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_array_rank2_taggedData_rank1(self):
       arg0=numpy.array([[-3.0, 4.0], [-7.0, 2.0], [5.0, 7.0]])
@@ -20196,6 +20761,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_taggedData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[4.0, -3.0], [3.0, -6.0], [-4.0, 0.0]]),self.functionspace)
@@ -20222,6 +20795,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_matrix_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_matrix_mult_expandedData_rank2_taggedData_rank2(self):
       arg0=Data(numpy.array([[0.0, 7.0], [-4.0, 1.0], [3.0, 3.0]]),self.functionspace)
@@ -20257,6 +20838,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=transposed_tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_array_rank4_taggedData_rank2(self):
       arg0=numpy.array([[[[-2.0, 4.0, 7.0, 7.0, 4.0], [4.0, 7.0, -7.0, 4.0, 4.0], [5.0, -3.0, 4.0, 3.0, 0.0], [-4.0, 3.0, 0.0, 
@@ -20769,6 +21357,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=transposed_tensor_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_taggedData_rank4_constData_rank4(self):
       arg0=Data(numpy.array([[[[0.0, -6.0, -6.0, 2.0, -7.0], [5.0, 1.0, 2.0, 6.0, 4.0], [0.0, 6.0, -3.0, -1.0, -1.0], [-1.0, 
@@ -21166,6 +21761,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=transposed_tensor_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_transposed_tensor_mult_expandedData_rank4_taggedData_rank3(self):
       arg0=Data(numpy.array([[[[4.0, 0.0, 4.0, 0.0, 7.0], [2.0, 0.0, -5.0, 7.0, 6.0], [-1.0, 7.0, 6.0, 5.0, -6.0], [0.0, -7.0, 
@@ -21287,6 +21890,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_float_rank0_taggedData_rank1_offset0(self):
       arg0=4.0
@@ -21375,6 +21985,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank2_taggedData_rank2_offset2(self):
       arg0=numpy.array([[2.0, -5.0, 2.0], [0.0, 0.0, 3.0]])
@@ -21400,6 +22017,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank4_taggedData_rank4_offset4(self):
       arg0=numpy.array([[[[3.0, -6.0], [-6.0, -6.0], [-7.0, -5.0]], [[0.0, -7.0], [-5.0, 3.0], [-5.0, 5.0]]], [[[-7.0, 4.0], 
@@ -22155,6 +22779,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_array_rank4_taggedData_rank0_offset0(self):
       arg0=numpy.array([[[[0.0, -4.0, -4.0, 2.0], [-4.0, 4.0, 1.0, 2.0], [4.0, 1.0, 0.0, 0.0]], [[3.0, -5.0, -3.0, -5.0], 
@@ -23310,6 +23941,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank1_constData_rank2_offset1(self):
       arg0=Data(numpy.array([-1.0, -2.0, -5.0]),self.functionspace)
@@ -23379,6 +24017,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank2_constData_rank4_offset2(self):
       arg0=Data(numpy.array([[4.0, -2.0, -1.0], [4.0, -1.0, -5.0]]),self.functionspace)
@@ -23501,6 +24146,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank4_taggedData_rank4_offset4(self):
       arg0=Data(numpy.array([[[[-1.0, -7.0], [5.0, 1.0], [-5.0, 4.0]], [[-6.0, 7.0], [4.0, 3.0], [3.0, 0.0]]], [[[7.0, -2.0], 
@@ -23730,6 +24382,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank2_expandedData_rank2_offset2(self):
       arg0=Data(numpy.array([[7.0, 2.0, 3.0], [0.0, 2.0, 5.0]]),self.functionspace)
@@ -23760,6 +24420,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")         
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank4_expandedData_rank4_offset4(self):
       arg0=Data(numpy.array([[[[-7.0, 1.0], [0.0, -5.0], [-6.0, 1.0]], [[4.0, 3.0], [0.0, 1.0], [-6.0, -7.0]]], [[[-3.0, 2.0], 
@@ -24921,6 +25589,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTransposedTensorProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank2_array_rank1_offset0(self):
       arg0=Data(numpy.array([[1.0, -7.0, -7.0, 5.0, 2.0], [0.0, -3.0, 5.0, 6.0, 7.0], [-4.0, -1.0, -1.0, 3.0, 3.0], [-1.0, 
@@ -25907,6 +26582,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_taggedData_rank3_expandedData_rank2_offset1(self):
       arg0=Data(numpy.array([[[0.0, -1.0, 6.0, -7.0, -6.0], [7.0, 0.0, 2.0, 0.0, -7.0], [-1.0, -2.0, -5.0, -1.0, 7.0], [-5.0, 
@@ -26644,6 +27327,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTransposedTensorProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(3, 2, 3, 4),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")       
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTransposedTensorProduct_expandedData_rank0_taggedData_rank0_offset0(self):
       arg0=Data(-2.0,self.functionspace)
@@ -27628,6 +28319,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=matrix_transposed_mult(arg0,arg1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_matrix_transposed_mult_constData_rank2_taggedData_rank2(self):
       arg0=Data(numpy.array([[5.0, 2.0, 3.0], [7.0, -7.0, 7.0]]),self.functionspace)
@@ -27699,6 +28397,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=matrix_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_array_rank2_taggedData_rank1(self):
       arg0=numpy.array([[0.0, 6.0, 2.0], [0.0, 7.0, 0.0]])
@@ -28050,6 +28756,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_taggedData_rank2_expandedData_rank2(self):
       arg0=Data(numpy.array([[7.0, 0.0, -1.0], [4.0, 3.0, 7.0]]),self.functionspace)
@@ -28474,6 +29188,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=tensor_transposed_mult(arg0,arg1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_tensor_transposed_mult_taggedData_rank4_expandedData_rank3(self):
       arg0=Data(numpy.array([[[[5.0, 0.0, 5.0], [2.0, -4.0, 0.0]], [[7.0, -1.0, -6.0], [-3.0, 0.0, -3.0]], [[7.0, 2.0, 2.0], 
@@ -28801,6 +29523,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_float_rank0_taggedData_rank4_offset0(self):
       arg0=6.0
@@ -28848,6 +29577,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank2_taggedData_rank2_offset2(self):
       arg0=numpy.array([[0.0, 0.0, -1.0], [-2.0, 2.0, -5.0]])
@@ -29140,6 +29876,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=2)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_array_rank4_taggedData_rank4_offset3(self):
       arg0=numpy.array([[[[4.0, -3.0, -4.0], [-5.0, -3.0, -6.0], [-1.0, -6.0, -4.0], [1.0, -7.0, -4.0]], [[4.0, -5.0, 1.0], 
@@ -29672,6 +30415,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")     
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank2_taggedData_rank2_offset2(self):
       arg0=Data(numpy.array([[3.0, -7.0, -1.0], [-4.0, 2.0, 3.0]]),self.functionspace)
@@ -29716,6 +30466,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=4)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_constData_rank0_taggedData_rank1_offset0(self):
       arg0=Data(4.0,self.functionspace)
@@ -31012,6 +31769,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank2_taggedData_rank3_offset2(self):
       arg0=Data(numpy.array([[-1.0, -5.0, -5.0], [6.0, -5.0, -2.0]]),self.functionspace)
@@ -31260,6 +32024,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank1_expandedData_rank2_offset1(self):
       arg0=Data(numpy.array([0.0, 2.0, 4.0]),self.functionspace)
@@ -31741,6 +32513,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=3)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(2, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank1_constData_rank2_offset0(self):
       arg0=Data(numpy.array([6.0, 0.0]),self.functionspace)
@@ -32295,6 +33074,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2, 6, 2, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not res.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2, 6, 2, 2),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")        
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank2_float_rank0_offset0(self):
       arg0=Data(numpy.array([[0.0, -4.0, 2.0, 2.0, -1.0], [6.0, -6.0, 4.0, -2.0, 0.0], [-5.0, -2.0, 0.0, -1.0, -2.0], [6.0, 
@@ -32699,6 +33486,13 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg0=arg0*1j
+      arg1=arg1*1j
+      ref=-ref 
+      res=generalTensorTransposedProduct(arg0,arg1,axis_offset=0)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(4, 5, 2),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank3_constData_rank2_offset1(self):
       arg0=Data(numpy.array([[[-7.0, -4.0, 7.0], [3.0, 6.0, 0.0], [-1.0, 3.0, 2.0], [4.0, 4.0, 2.0], [0.0, 1.0, 6.0]], [[-6.0, 
@@ -33307,6 +34101,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=1)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_taggedData_rank4_expandedData_rank2_offset2(self):
       arg0=Data(numpy.array([[[[-2.0, -5.0, 2.0], [4.0, -4.0, 2.0]], [[-2.0, 4.0, 7.0], [0.0, 6.0, -4.0]], [[-1.0, -2.0, 4.0], 
@@ -34417,6 +35219,14 @@ class Test_util_binary_with_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      if not ref.isLazy():
+        arg0=arg0*1j
+        arg1=arg1*1j
+        ref=-ref
+        res=generalTensorTransposedProduct(arg0,arg1,axis_offset=3)
+        self.assertTrue(isinstance(res,Data),"wrong type of result.")
+        self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
+        self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_generalTensorTransposedProduct_expandedData_rank1_taggedData_rank1_offset0(self):
       arg0=Data(numpy.array([3.0, 0.0]),self.functionspace)
diff --git a/escriptcore/test/python/test_util_slicing_no_tagged_data.py b/escriptcore/test/python/test_util_slicing_no_tagged_data.py
index 4d976f3..e7619fa 100644
--- a/escriptcore/test/python/test_util_slicing_no_tagged_data.py
+++ b/escriptcore/test/python/test_util_slicing_no_tagged_data.py
@@ -54,6 +54,12 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(1,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([0.0j, 1.0j, 2.0j]),self.functionspace)
+      res=arg[2:]
+      ref=Data(numpy.array([2.0j]),self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(1,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_getslice_constData_rank1_R(self):
       arg=Data(numpy.array([0.0, 1.0, 2.0]),self.functionspace)
@@ -78,6 +84,12 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3,),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([0.0j, 1.0j, 2.0j]),self.functionspace)
+      res=arg[:]
+      ref=Data(numpy.array([0.0j, 1.0j, 2.0j]),self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(3,),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_getslice_constData_rank1_P(self):
       arg=Data(numpy.array([0.0, 1.0, 2.0]),self.functionspace)
@@ -214,6 +226,12 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(1, 4),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0j, 7.0], [8.0, 9.0, 10.0, 11.0]]),self.functionspace)
+      res=arg[1:2]
+      ref=Data(numpy.array([[4.0, 5.0, 6.0j, 7.0]]),self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(1, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_getslice_constData_rank2_CN(self):
       arg=Data(numpy.array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0]]),self.functionspace)
@@ -337,6 +355,12 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0j]]),self.functionspace)
+      res=arg[1:2]
+      ref=Data(numpy.array([[4.0, 5.0, 6.0, 7.0]]),self.functionspace)
+      self.assertTrue(isinstance(res,Data),"wrong type of result.")
+      self.assertEqual(res.getShape(),(1, 4),"wrong shape of result.")
+      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_getslice_constData_rank3_CRN(self):
       arg=Data(numpy.array([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0], [9.0, 10.0, 11.0]], [[12.0, 13.0, 14.0], [15.0, 
@@ -18662,6 +18686,11 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       arg[2:]=val
       ref=Data(numpy.array([6.0, 2.0, 7.0]),self.functionspace)
       self.assertTrue(Lsup(arg-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([6.0j, 2.0, 7.0]),self.functionspace)
+      val=numpy.array([7.0])
+      arg[2:]=val
+      ref=Data(numpy.array([6.0j, 2.0, 7.0]),self.functionspace)
+      self.assertTrue(Lsup(arg-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_setslice_constData_rank1_constData_L(self):
       arg=Data(numpy.array([2.0, 8.0, 8.0]),self.functionspace)
@@ -18714,6 +18743,11 @@ class Test_util_slicing_no_tagged_data(Test_util_base):
       arg[:]=val
       ref=Data(numpy.array([2.0, 1.0, 2.0]),self.functionspace)
       self.assertTrue(Lsup(arg-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      arg=Data(numpy.array([8.0j, 2.0, 5.0j]),self.functionspace)
+      val=Data(numpy.array([2.0, 1.0, 2.0j]),self.functionspace)
+      arg[:]=val
+      ref=Data(numpy.array([2.0, 1.0, 2.0j]),self.functionspace)
+      self.assertTrue(Lsup(arg-ref)<=self.RES_TOL*Lsup(ref),"wrong result")      
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_setslice_constData_rank1_expandedData_C(self):
       arg=Data(numpy.array([4.0, 2.0, 5.0]),self.functionspace)
diff --git a/escriptcore/test/python/test_util_spatial_functions1.py b/escriptcore/test/python/test_util_spatial_functions1.py
index 5943037..15a55db 100644
--- a/escriptcore/test/python/test_util_spatial_functions1.py
+++ b/escriptcore/test/python/test_util_spatial_functions1.py
@@ -42,7 +42,7 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 import esys.escriptcore.utestselect as unittest
 from esys.escript import *
 from numpy import array
-
+import numpy
 
 class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
    RES_TOL=1.e-8
@@ -65,10 +65,10 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       """
       test the approximation order 
       """
-      self.assertTrue(self.order == Solution(self.domain).getApproximationOrder(), "wrong order (Solution)")
-      self.assertTrue(self.order == ContinuousFunction(self.domain).getApproximationOrder(), "wrong order (continuous function)")
-      self.assertTrue(1 == ReducedSolution(self.domain).getApproximationOrder(), "wrong order (ReducedSolution)")
-      self.assertTrue(1 == ReducedContinuousFunction(self.domain).getApproximationOrder(), "wrong order (Reduced continuous function)")
+      self.assertEqual(self.order, Solution(self.domain).getApproximationOrder(), "wrong order (Solution)")
+      self.assertEqual(self.order, ContinuousFunction(self.domain).getApproximationOrder(), "wrong order (continuous function)")
+      self.assertEqual(1, ReducedSolution(self.domain).getApproximationOrder(), "wrong order (ReducedSolution)")
+      self.assertEqual(1, ReducedContinuousFunction(self.domain).getApproximationOrder(), "wrong order (Reduced continuous function)")
       for i in range(self.domain.getDim()):
          for k in range(Function(self.domain).getApproximationOrder()+1):
              self.assertAlmostEqual(integrate(Function(self.domain).getX()[i]**k),1./(k+1),8,"wrong integral (i=%s, order = %s)"%(i,k))
@@ -102,7 +102,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
      res=f.getNormal()
      self.assertEqual(res.getShape(),(dim,),"wrong shape of result.")
      self.assertEqual(res.getFunctionSpace(),f,"wrong functionspace of result.")
-     self.assertTrue(Lsup(ref-res)<=self.RES_TOL,"wrong result")
+     self.assertLess(Lsup(ref-res), self.RES_TOL, "wrong result")
 
    def test_normal_ReducedFunctionOnBoundary(self):
      """
@@ -130,7 +130,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
      res=f.getNormal()
      self.assertEqual(res.getShape(),(dim,),"wrong shape of result.")
      self.assertEqual(res.getFunctionSpace(),f,"wrong functionspace of result.")
-     self.assertTrue(Lsup(ref-res)<=self.RES_TOL,"wrong result")
+     self.assertLess(Lsup(ref-res), self.RES_TOL, "wrong result")
 
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ContinuousFunction_rank0(self):
@@ -154,7 +154,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.924736057229)/(o+1.)+(0.203305293867)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref), "wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -183,7 +183,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref), "wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -284,7 +284,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -401,7 +401,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ContinuousFunction_rank4(self):
       """
@@ -902,7 +902,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref), "wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Solution_rank0(self):
       """
@@ -925,7 +925,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.506913280568)/(o+1.)+(0.914528341811)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref), "wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Solution_rank1(self):
       """
@@ -954,7 +954,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref), "wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Solution_rank2(self):
       """
@@ -1055,7 +1055,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Solution_rank3(self):
       """
@@ -1172,7 +1172,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Solution_rank4(self):
       """
@@ -1673,7 +1673,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -1696,7 +1696,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.577517867896)/(o+1.)+(-0.311056057931)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -1725,7 +1725,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -1826,7 +1826,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -1943,7 +1943,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedSolution_rank4(self):
       """
@@ -2444,7 +2444,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -2467,7 +2467,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.577517867896)/(o+1.)+(-0.311056057931)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -2496,7 +2496,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -2597,7 +2597,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -2714,7 +2714,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -3215,7 +3215,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Function_rank0(self):
       """
@@ -3238,7 +3238,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.660452513249)/(o+1.)+(0.0485806965265)
       res=integrate(arg)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Function_rank1(self):
       """
@@ -3267,7 +3267,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Function_rank2(self):
       """
@@ -3368,7 +3368,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Function_rank3(self):
       """
@@ -3485,7 +3485,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunction_fromData_Function_rank4(self):
       """
@@ -3986,7 +3986,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -4009,7 +4009,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.924736057229)/(o+1.)+(0.203305293867)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -4038,7 +4038,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -4139,7 +4139,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -4256,7 +4256,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ContinuousFunction_rank4(self):
       """
@@ -4757,7 +4757,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_Solution_rank0(self):
       """
@@ -4780,7 +4780,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.506913280568)/(o+1.)+(0.914528341811)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_Solution_rank1(self):
       """
@@ -4809,7 +4809,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_Solution_rank2(self):
       """
@@ -4910,7 +4910,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_Solution_rank3(self):
       """
@@ -5027,7 +5027,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_Solution_rank4(self):
       """
@@ -5528,7 +5528,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -5551,7 +5551,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.577517867896)/(o+1.)+(-0.311056057931)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -5580,7 +5580,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -5681,7 +5681,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -5798,7 +5798,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedSolution_rank4(self):
       """
@@ -6299,7 +6299,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -6322,7 +6322,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.577517867896)/(o+1.)+(-0.311056057931)
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -6351,7 +6351,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -6452,7 +6452,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -6569,7 +6569,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -7070,7 +7070,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedFunction_rank0(self):
       """
@@ -7093,7 +7093,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.660452513249)/(o+1.)+(0.0485806965265)
       res=integrate(arg)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedFunction_rank1(self):
       """
@@ -7122,7 +7122,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedFunction_rank2(self):
       """
@@ -7223,7 +7223,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedFunction_rank3(self):
       """
@@ -7340,7 +7340,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunction_fromData_ReducedFunction_rank4(self):
       """
@@ -7841,7 +7841,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ContinuousFunction_rank0(self):
       """
@@ -7864,7 +7864,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.135634896213)*(1+2.*(dim-1.)/(o+1.))+(1.14751193474)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ContinuousFunction_rank1(self):
       """
@@ -7893,7 +7893,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ContinuousFunction_rank2(self):
       """
@@ -7994,7 +7994,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ContinuousFunction_rank3(self):
       """
@@ -8111,7 +8111,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ContinuousFunction_rank4(self):
       """
@@ -8612,7 +8612,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_Solution_rank0(self):
       """
@@ -8635,7 +8635,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.671549206889)*(1+2.*(dim-1.)/(o+1.))+(0.590841678335)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_Solution_rank1(self):
       """
@@ -8664,7 +8664,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_Solution_rank2(self):
       """
@@ -8765,7 +8765,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_Solution_rank3(self):
       """
@@ -8882,7 +8882,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_Solution_rank4(self):
       """
@@ -9383,7 +9383,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedSolution_rank0(self):
       """
@@ -9406,7 +9406,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.0121419382123)*(1+2.*(dim-1.)/(o+1.))+(1.51340172611)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedSolution_rank1(self):
       """
@@ -9435,7 +9435,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedSolution_rank2(self):
       """
@@ -9536,7 +9536,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedSolution_rank3(self):
       """
@@ -9653,7 +9653,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedSolution_rank4(self):
       """
@@ -10154,7 +10154,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -10177,7 +10177,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.0121419382123)*(1+2.*(dim-1.)/(o+1.))+(1.51340172611)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -10206,7 +10206,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -10307,7 +10307,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -10424,7 +10424,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -10925,7 +10925,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank0(self):
       """
@@ -10948,7 +10948,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.109474972552)*(1+2.*(dim-1.)/(o+1.))+(-0.989858320706)*dim
       res=integrate(arg)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank1(self):
       """
@@ -10977,7 +10977,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank2(self):
       """
@@ -11078,7 +11078,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank3(self):
       """
@@ -11195,7 +11195,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank4(self):
       """
@@ -11696,7 +11696,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank0(self):
       """
@@ -11719,7 +11719,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.135634896213)*(1+2.*(dim-1.)/(o+1.))+(1.14751193474)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank1(self):
       """
@@ -11748,7 +11748,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank2(self):
       """
@@ -11849,7 +11849,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank3(self):
       """
@@ -11966,7 +11966,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank4(self):
       """
@@ -12467,7 +12467,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_Solution_rank0(self):
       """
@@ -12490,7 +12490,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(-0.671549206889)*(1+2.*(dim-1.)/(o+1.))+(0.590841678335)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_Solution_rank1(self):
       """
@@ -12519,7 +12519,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_Solution_rank2(self):
       """
@@ -12620,7 +12620,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_Solution_rank3(self):
       """
@@ -12737,7 +12737,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_Solution_rank4(self):
       """
@@ -13238,7 +13238,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank0(self):
       """
@@ -13261,7 +13261,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.0121419382123)*(1+2.*(dim-1.)/(o+1.))+(1.51340172611)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank1(self):
       """
@@ -13290,7 +13290,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank2(self):
       """
@@ -13391,7 +13391,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank3(self):
       """
@@ -13508,7 +13508,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank4(self):
       """
@@ -14009,7 +14009,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -14032,7 +14032,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.0121419382123)*(1+2.*(dim-1.)/(o+1.))+(1.51340172611)*dim
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -14061,7 +14061,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -14162,7 +14162,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -14279,7 +14279,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -14780,7 +14780,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg,where=w_ref)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank0(self):
       """
@@ -14803,7 +14803,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
         ref=(0.109474972552)*(1+2.*(dim-1.)/(o+1.))+(-0.989858320706)*dim
       res=integrate(arg)
       self.assertTrue(isinstance(res,float),"wrong type of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank1(self):
       """
@@ -14832,7 +14832,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank2(self):
       """
@@ -14933,7 +14933,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank3(self):
       """
@@ -15050,7 +15050,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_integrate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank4(self):
       """
@@ -15551,7 +15551,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=integrate(arg)
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -15580,7 +15580,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -15616,7 +15616,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -15778,7 +15778,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -15968,7 +15968,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_Solution_rank0(self):
       """
@@ -15997,7 +15997,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_Solution_rank1(self):
       """
@@ -16033,7 +16033,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_Solution_rank2(self):
       """
@@ -16195,7 +16195,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_Solution_rank3(self):
       """
@@ -16385,7 +16385,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -16414,7 +16414,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -16450,7 +16450,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -16612,7 +16612,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -16802,7 +16802,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -16831,7 +16831,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -16867,7 +16867,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -17029,7 +17029,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -17219,7 +17219,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -17248,7 +17248,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -17284,7 +17284,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -17446,7 +17446,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -17636,7 +17636,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_Solution_rank0(self):
       """
@@ -17665,7 +17665,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_Solution_rank1(self):
       """
@@ -17701,7 +17701,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_Solution_rank2(self):
       """
@@ -17863,7 +17863,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_Solution_rank3(self):
       """
@@ -18053,7 +18053,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -18082,7 +18082,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -18118,7 +18118,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -18280,7 +18280,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -18470,7 +18470,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedContinuousFunction__rank0(self):
       """
@@ -18499,7 +18499,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),()+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedContinuousFunction__rank1(self):
       """
@@ -18535,7 +18535,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(2,)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedContinuousFunction__rank2(self):
       """
@@ -18697,7 +18697,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(4, 5)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_grad_onReducedFunction_fromData_ReducedContinuousFunction__rank3(self):
       """
@@ -18887,7 +18887,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       res=grad(arg,where=w_ref)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(6, 2, 2)+(dim,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -18913,7 +18913,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -18943,7 +18943,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -19045,7 +19045,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -19163,7 +19163,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ContinuousFunction_rank4(self):
       """
@@ -19665,7 +19665,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Solution_rank0(self):
       """
@@ -19691,7 +19691,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Solution_rank1(self):
       """
@@ -19721,7 +19721,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Solution_rank2(self):
       """
@@ -19823,7 +19823,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Solution_rank3(self):
       """
@@ -19941,7 +19941,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Solution_rank4(self):
       """
@@ -20443,7 +20443,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -20469,7 +20469,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -20499,7 +20499,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -20601,7 +20601,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -20719,7 +20719,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedSolution_rank4(self):
       """
@@ -21221,7 +21221,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref),self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -21247,7 +21247,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -21277,7 +21277,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -21379,7 +21379,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -21497,7 +21497,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -21999,7 +21999,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Function_rank0(self):
       """
@@ -22025,7 +22025,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Function_rank1(self):
       """
@@ -22055,7 +22055,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Function_rank2(self):
       """
@@ -22157,7 +22157,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Function_rank3(self):
       """
@@ -22275,7 +22275,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunction_fromData_Function_rank4(self):
       """
@@ -22777,7 +22777,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ContinuousFunction_rank0(self):
       """
@@ -22803,7 +22803,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -22829,7 +22829,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -22859,7 +22859,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -22961,7 +22961,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -23079,7 +23079,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ContinuousFunction_rank4(self):
       """
@@ -23581,7 +23581,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_Solution_rank0(self):
       """
@@ -23607,7 +23607,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_Solution_rank1(self):
       """
@@ -23637,7 +23637,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_Solution_rank2(self):
       """
@@ -23739,7 +23739,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_Solution_rank3(self):
       """
@@ -23857,7 +23857,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_Solution_rank4(self):
       """
@@ -24359,7 +24359,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -24385,7 +24385,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -24415,7 +24415,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -24517,7 +24517,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -24635,7 +24635,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedSolution_rank4(self):
       """
@@ -25137,7 +25137,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -25163,7 +25163,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -25193,7 +25193,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -25295,7 +25295,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -25413,7 +25413,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -25915,7 +25915,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedFunction_rank0(self):
       """
@@ -25941,7 +25941,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedFunction_rank1(self):
       """
@@ -25971,7 +25971,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedFunction_rank2(self):
       """
@@ -26073,7 +26073,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedFunction_rank3(self):
       """
@@ -26191,7 +26191,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunction_fromData_ReducedFunction_rank4(self):
       """
@@ -26693,7 +26693,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank0(self):
       """
@@ -26719,7 +26719,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ContinuousFunction_rank1(self):
       """
@@ -26749,7 +26749,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ContinuousFunction_rank2(self):
       """
@@ -26851,7 +26851,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ContinuousFunction_rank3(self):
       """
@@ -26969,7 +26969,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ContinuousFunction_rank4(self):
       """
@@ -27471,7 +27471,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_Solution_rank0(self):
       """
@@ -27497,7 +27497,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_Solution_rank1(self):
       """
@@ -27527,7 +27527,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_Solution_rank2(self):
       """
@@ -27629,7 +27629,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_Solution_rank3(self):
       """
@@ -27747,7 +27747,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_Solution_rank4(self):
       """
@@ -28249,7 +28249,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedSolution_rank0(self):
       """
@@ -28275,7 +28275,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedSolution_rank1(self):
       """
@@ -28305,7 +28305,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedSolution_rank2(self):
       """
@@ -28407,7 +28407,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedSolution_rank3(self):
       """
@@ -28525,7 +28525,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedSolution_rank4(self):
       """
@@ -29027,7 +29027,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -29053,7 +29053,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -29083,7 +29083,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -29185,7 +29185,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -29303,7 +29303,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -29805,7 +29805,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank0(self):
       """
@@ -29831,7 +29831,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank1(self):
       """
@@ -29861,7 +29861,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank2(self):
       """
@@ -29963,7 +29963,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank3(self):
       """
@@ -30081,7 +30081,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onFunctionOnBoundary_fromData_FunctionOnBoundary_rank4(self):
       """
@@ -30583,7 +30583,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank1(self):
       """
@@ -30613,7 +30613,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank2(self):
       """
@@ -30715,7 +30715,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank3(self):
       """
@@ -30833,7 +30833,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ContinuousFunction_rank4(self):
       """
@@ -31335,7 +31335,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_Solution_rank0(self):
       """
@@ -31361,7 +31361,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_Solution_rank1(self):
       """
@@ -31391,7 +31391,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_Solution_rank2(self):
       """
@@ -31493,7 +31493,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_Solution_rank3(self):
       """
@@ -31611,7 +31611,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_Solution_rank4(self):
       """
@@ -32113,7 +32113,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank0(self):
       """
@@ -32139,7 +32139,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank1(self):
       """
@@ -32169,7 +32169,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank2(self):
       """
@@ -32271,7 +32271,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank3(self):
       """
@@ -32389,7 +32389,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedSolution_rank4(self):
       """
@@ -32891,7 +32891,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -32917,7 +32917,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -32947,7 +32947,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -33049,7 +33049,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -33167,7 +33167,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -33669,7 +33669,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank0(self):
       """
@@ -33695,7 +33695,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank1(self):
       """
@@ -33725,7 +33725,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank2(self):
       """
@@ -33827,7 +33827,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank3(self):
       """
@@ -33945,7 +33945,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedFunctionOnBoundary_fromData_ReducedFunctionOnBoundary_rank4(self):
       """
@@ -34447,7 +34447,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_ContinuousFunction_rank0(self):
       """
@@ -34473,7 +34473,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_ContinuousFunction_rank1(self):
       """
@@ -34503,7 +34503,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_ContinuousFunction_rank2(self):
       """
@@ -34605,7 +34605,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_ContinuousFunction_rank3(self):
       """
@@ -34723,7 +34723,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_ContinuousFunction_rank4(self):
       """
@@ -35225,7 +35225,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_Solution_rank0(self):
       """
@@ -35251,7 +35251,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_Solution_rank1(self):
       """
@@ -35281,7 +35281,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_Solution_rank2(self):
       """
@@ -35383,7 +35383,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_Solution_rank3(self):
       """
@@ -35501,7 +35501,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onSolution_fromData_Solution_rank4(self):
       """
@@ -36003,7 +36003,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ContinuousFunction_rank0(self):
       """
@@ -36029,7 +36029,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ContinuousFunction_rank1(self):
       """
@@ -36059,7 +36059,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ContinuousFunction_rank2(self):
       """
@@ -36161,7 +36161,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ContinuousFunction_rank3(self):
       """
@@ -36279,7 +36279,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ContinuousFunction_rank4(self):
       """
@@ -36781,7 +36781,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_Solution_rank0(self):
       """
@@ -36807,7 +36807,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_Solution_rank1(self):
       """
@@ -36837,7 +36837,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_Solution_rank2(self):
       """
@@ -36939,7 +36939,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_Solution_rank3(self):
       """
@@ -37057,7 +37057,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_Solution_rank4(self):
       """
@@ -37559,7 +37559,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedSolution_rank0(self):
       """
@@ -37585,7 +37585,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedSolution_rank1(self):
       """
@@ -37615,7 +37615,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedSolution_rank2(self):
       """
@@ -37717,7 +37717,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedSolution_rank3(self):
       """
@@ -37835,7 +37835,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedSolution_rank4(self):
       """
@@ -38337,7 +38337,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -38363,7 +38363,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -38393,7 +38393,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -38495,7 +38495,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -38613,7 +38613,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedSolution_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -39115,7 +39115,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ContinuousFunction_rank0(self):
       """
@@ -39141,7 +39141,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ContinuousFunction_rank1(self):
       """
@@ -39171,7 +39171,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ContinuousFunction_rank2(self):
       """
@@ -39273,7 +39273,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ContinuousFunction_rank3(self):
       """
@@ -39391,7 +39391,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ContinuousFunction_rank4(self):
       """
@@ -39893,7 +39893,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_Solution_rank0(self):
       """
@@ -39919,7 +39919,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_Solution_rank1(self):
       """
@@ -39949,7 +39949,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_Solution_rank2(self):
       """
@@ -40051,7 +40051,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_Solution_rank3(self):
       """
@@ -40169,7 +40169,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_Solution_rank4(self):
       """
@@ -40671,7 +40671,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedSolution_rank0(self):
       """
@@ -40697,7 +40697,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedSolution_rank1(self):
       """
@@ -40727,7 +40727,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedSolution_rank2(self):
       """
@@ -40829,7 +40829,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedSolution_rank3(self):
       """
@@ -40947,7 +40947,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedSolution_rank4(self):
       """
@@ -41449,7 +41449,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedContinuousFunction_rank0(self):
       """
@@ -41475,7 +41475,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedContinuousFunction_rank1(self):
       """
@@ -41505,7 +41505,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(2,),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedContinuousFunction_rank2(self):
       """
@@ -41607,7 +41607,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedContinuousFunction_rank3(self):
       """
@@ -41725,7 +41725,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(6, 2, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_interpolate_onReducedContinuousFunction_fromData_ReducedContinuousFunction_rank4(self):
       """
@@ -42227,7 +42227,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong functionspace of result.")
       self.assertEqual(res.getShape(),(4, 5, 3, 2),"wrong shape of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onFunction_fromData_ContinuousFunction(self):
       """
@@ -42256,7 +42256,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onFunction_fromData_Solution(self):
       """
@@ -42285,7 +42285,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onFunction_fromData_ReducedSolution(self):
       """
@@ -42314,7 +42314,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onFunction_fromData_ReducedContinuousFunction(self):
       """
@@ -42343,7 +42343,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onReducedFunction_fromData_ContinuousFunction(self):
       """
@@ -42372,7 +42372,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onReducedFunction_fromData_Solution(self):
       """
@@ -42401,7 +42401,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onReducedFunction_fromData_ReducedSolution(self):
       """
@@ -42430,7 +42430,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_div_onReducedFunction_fromData_ReducedContinuousFunction(self):
       """
@@ -42459,7 +42459,7 @@ class Test_Util_SpatialFunctions_noGradOnBoundary_noContact(unittest.TestCase):
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(),"wrong shape of result.")
       self.assertEqual(res.getFunctionSpace(),w_ref,"wrong function space of result.")
-      self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+      self.assertLess(Lsup(res-ref), self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_L2_onFunction_fromData_rank0(self):
       """
diff --git a/escriptcore/test/python/test_util_spatial_functions2.py b/escriptcore/test/python/test_util_spatial_functions2.py
index 5eea53c..e0067d4 100644
--- a/escriptcore/test/python/test_util_spatial_functions2.py
+++ b/escriptcore/test/python/test_util_spatial_functions2.py
@@ -42,6 +42,7 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 import esys.escriptcore.utestselect as unittest
 from esys.escript import *
 from numpy import array
+import numpy
 
 from test_util_spatial_functions1 import Test_Util_SpatialFunctions_noGradOnBoundary_noContact
 
diff --git a/escriptcore/test/python/test_util_spatial_functions3.py b/escriptcore/test/python/test_util_spatial_functions3.py
index a9b24e6..a5f8138 100644
--- a/escriptcore/test/python/test_util_spatial_functions3.py
+++ b/escriptcore/test/python/test_util_spatial_functions3.py
@@ -42,7 +42,7 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 import esys.escriptcore.utestselect as unittest
 from esys.escript import *
 from numpy import array
-
+import numpy
 
 from test_util_spatial_functions2 import Test_Util_SpatialFunctions_noGradOnBoundary
 
diff --git a/escriptcore/test/python/test_util_unary_no_tagged_data.py b/escriptcore/test/python/test_util_unary_no_tagged_data.py
index f3d4b0b..2840413 100644
--- a/escriptcore/test/python/test_util_unary_no_tagged_data.py
+++ b/escriptcore/test/python/test_util_unary_no_tagged_data.py
@@ -39,13 +39,47 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 
 import esys.escriptcore.utestselect as unittest
 import numpy
+import math
+import cmath
 from esys.escript import *
 from test_util_base import Test_util_base
 
+haveLapack = hasFeature('lapack')
+
 class Test_util_unary_no_tagged_data(Test_util_base):
    """
    test for unary operations. No tagged data are tested.
    """
+   def iterateops(self, ops, vals):
+      for p in ops:
+          o,c,z=p
+          for v in vals:
+            res=o(v)
+            if isinstance(v,complex):
+               ref=z(v)
+            else:
+               ref=c(v)
+            self.assertTrue(isinstance(res,type(ref)),"wrong type of result.")
+            self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
+            d=Data(v)
+            res=o(d)
+            self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result for data")
+
+   #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   def test_complex_nonzero(self):
+      #Compare results of unary ops provided by util and their python equivalents
+      ops=[(sin,math.sin,cmath.sin), (cos,math.cos,cmath.cos), (tan,math.tan,cmath.tan), (log,math.log,cmath.log), (log10, math.log10, cmath.log10), (Abs, abs, abs),
+(acos,math.acos,cmath.acos), (acosh,math.acosh,cmath.acosh), (asin,math.asin,cmath.asin), (asinh, math.asinh,cmath.asinh),
+(cosh, math.cosh, cmath.cosh), (exp, math.exp, cmath.exp), (sinh, math.sinh, cmath.sinh), (sqrt, math.sqrt, cmath.sqrt)]
+      vals=[1+0j,-1+0j,1j, -1j, math.pi*1j,3+4j]
+      self.iterateops(ops,vals)
+      ops=[(atan,math.atan,cmath.atan)]
+      vals=[1+0j,-1+0j, math.pi*1j,3+4j]
+      self.iterateops(ops,vals)
+      ops=[(atanh,math.atanh,cmath.atanh)]
+      vals=[1j, -1j, math.pi*1j,3+4j]
+      self.iterateops(ops,vals)
+
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_log10_float_rank0(self):
       arg=52.2519689858
@@ -8406,7 +8440,6 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertTrue(Lsup(matrix_mult(res,arg)-kronecker(3))<=self.RES_TOL,"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_constData_dim4(self):
-      haveLapack=(getEscriptParamInt('LAPACK_SUPPORT')>0)
       arg=Data(numpy.array([[2.7386874118759605, 0.24320192932450824, -0.98551433885644957,1], [-0.47237322912611623, 
 2.0770157618999936, -0.70532850079694409,2], [-0.55077067435537064, -0.72886734893974148, 
 1.8532894898622749,3],[0,1,0,1]]),self.functionspace)
@@ -8443,7 +8476,7 @@ class Test_util_unary_no_tagged_data(Test_util_base):
         else:
           self.fail('Singular matrix (3x3) did not fail to invert.')
         #Unsupported matrix sizes are checked in the _dim4 tests so I won't check it here
-        if getEscriptParamInt('LAPACK_SUPPORT')>0:
+        if haveLapack:
             arg=Data([[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]], self.functionspace)
             try:
                 inverse(arg)
@@ -8483,7 +8516,6 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertTrue(Lsup(matrix_mult(res,arg)-kronecker(3))<=self.RES_TOL,"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_expandedData_dim4(self):
-      haveLapack=(getEscriptParamInt('LAPACK_SUPPORT')>0)
       msk_arg=whereNegative(self.functionspace.getX()[0]-0.5)
       arg=msk_arg*numpy.array([[2.2418112382624198, 0.81650662903581805, -0.094756021110331989,0], [-0.79442369492917742, 
 1.9995946283413137, 0.99436683143019566,1], [0.20497870840231047, 0.56858349778979589, 
@@ -8501,21 +8533,21 @@ class Test_util_unary_no_tagged_data(Test_util_base):
         self.assertTrue(Lsup(matrix_mult(res,arg)-kronecker(4))<=self.RES_TOL,"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_expandedData_singular(self):
-        arg=Data([[0]],self.functionspace, expand=True)
+        arg=Data([[0]],self.functionspace, True)
         try:
            inverse(arg)
         except RuntimeError:
            pass
         else:
            self.fail('Singular matrix (1x1) did not fail to invert.')
-        arg=Data([[0,0],[0,1]],self.functionspace, expand=True)
+        arg=Data([[0,0],[0,1]],self.functionspace, True)
         try:
           inverse(arg)
         except RuntimeError:
           pass
         else:
           self.fail('Singular matrix (2x2) did not fail to invert.')
-        arg=Data([[0,0,0],[0,1,0],[1,1,1]],self.functionspace, expand=True)
+        arg=Data([[0,0,0],[0,1,0],[1,1,1]],self.functionspace, True)
         try:
           inverse(arg)
         except RuntimeError:
@@ -8523,8 +8555,8 @@ class Test_util_unary_no_tagged_data(Test_util_base):
         else:
           self.fail('Singular matrix (3x3) did not fail to invert.')
         #Unsupported matrix sizes are checked in the _dim4 tests so I won't check it here
-        if getEscriptParamInt('LAPACK_SUPPORT')>0:
-            arg=Data([[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]], self.functionspace, expand=True)
+        if haveLapack:
+            arg=Data([[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]], self.functionspace, True)
             try:
                 inverse(arg)
             except RuntimeError:
@@ -9895,19 +9927,19 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertEqual(res.getShape(),(2, 3, 2, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_array_rank2(self):
+   def test_antisymmetric_array_rank2(self):
       arg=numpy.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=numpy.array([[0.0, -1.0, -2.0], [1.0, 0.0, -1.0], [2.0, 1.0, 0.0]])
       self.assertTrue(isinstance(res,numpy.ndarray),"wrong type of result.")
       self.assertEqual(res.shape,(3, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_array_rank4(self):
+   def test_antisymmetric_array_rank4(self):
       arg=numpy.array([[[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], [[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]], [[12.0, 13.0, 14.0], [15.0, 
 16.0, 17.0]]], [[[18.0, 19.0, 20.0], [21.0, 22.0, 23.0]], [[24.0, 25.0, 26.0], [27.0, 28.0, 29.0]], [[30.0, 31.0, 32.0], [33.0, 
 34.0, 35.0]]]])
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=numpy.array([[[[0.0, -2.5, -5.0], [-7.5, -10.0, -12.5]], [[2.5, 0.0, -2.5], [-5.0, -7.5, -10.0]], [[5.0, 2.5, 0.0], 
 [-2.5, -5.0, -7.5]]], [[[7.5, 5.0, 2.5], [0.0, -2.5, -5.0]], [[10.0, 7.5, 5.0], [2.5, 0.0, -2.5]], [[12.5, 10.0, 7.5], [5.0, 
 2.5, 0.0]]]])
@@ -9915,19 +9947,19 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertEqual(res.shape,(2, 3, 2, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_constData_rank2(self):
+   def test_antisymmetric_constData_rank2(self):
       arg=Data(numpy.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]]),self.functionspace)
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=Data(numpy.array([[0.0, -1.0, -2.0], [1.0, 0.0, -1.0], [2.0, 1.0, 0.0]]),self.functionspace)
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_constData_rank4(self):
+   def test_antisymmetric_constData_rank4(self):
       arg=Data(numpy.array([[[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], [[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]], [[12.0, 13.0, 14.0], 
 [15.0, 16.0, 17.0]]], [[[18.0, 19.0, 20.0], [21.0, 22.0, 23.0]], [[24.0, 25.0, 26.0], [27.0, 28.0, 29.0]], [[30.0, 31.0, 32.0], 
 [33.0, 34.0, 35.0]]]]),self.functionspace)
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=Data(numpy.array([[[[0.0, -2.5, -5.0], [-7.5, -10.0, -12.5]], [[2.5, 0.0, -2.5], [-5.0, -7.5, -10.0]], [[5.0, 2.5, 
 0.0], [-2.5, -5.0, -7.5]]], [[[7.5, 5.0, 2.5], [0.0, -2.5, -5.0]], [[10.0, 7.5, 5.0], [2.5, 0.0, -2.5]], [[12.5, 10.0, 7.5], 
 [5.0, 2.5, 0.0]]]]),self.functionspace)
@@ -9935,11 +9967,11 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertEqual(res.getShape(),(2, 3, 2, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_expandedData_rank2(self):
+   def test_antisymmetric_expandedData_rank2(self):
       msk_arg=whereNegative(self.functionspace.getX()[0]-0.5)
       arg=msk_arg*numpy.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])+(1.-msk_arg)*numpy.array([[-0.0, -1.0, 
 -2.0], [-3.0, -4.0, -5.0], [-6.0, -7.0, -8.0]])
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
       ref=msk_ref*numpy.array([[0.0, -1.0, -2.0], [1.0, 0.0, -1.0], [2.0, 1.0, 0.0]])+(1.-msk_ref)*numpy.array([[0.0, 1.0, 
 2.0], [-1.0, 0.0, 1.0], [-2.0, -1.0, 0.0]])
@@ -9947,14 +9979,14 @@ class Test_util_unary_no_tagged_data(Test_util_base):
       self.assertEqual(res.getShape(),(3, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_expandedData_rank4(self):
+   def test_antisymmetric_expandedData_rank4(self):
       msk_arg=whereNegative(self.functionspace.getX()[0]-0.5)
       arg=msk_arg*numpy.array([[[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], [[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]], [[12.0, 13.0, 14.0], 
 [15.0, 16.0, 17.0]]], [[[18.0, 19.0, 20.0], [21.0, 22.0, 23.0]], [[24.0, 25.0, 26.0], [27.0, 28.0, 29.0]], [[30.0, 31.0, 32.0], 
 [33.0, 34.0, 35.0]]]])+(1.-msk_arg)*numpy.array([[[[-0.0, -1.0, -2.0], [-3.0, -4.0, -5.0]], [[-6.0, -7.0, -8.0], [-9.0, -10.0, 
 -11.0]], [[-12.0, -13.0, -14.0], [-15.0, -16.0, -17.0]]], [[[-18.0, -19.0, -20.0], [-21.0, -22.0, -23.0]], [[-24.0, -25.0, 
 -26.0], [-27.0, -28.0, -29.0]], [[-30.0, -31.0, -32.0], [-33.0, -34.0, -35.0]]]])
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       msk_ref=whereNegative(self.functionspace.getX()[0]-0.5)
       ref=msk_ref*numpy.array([[[[0.0, -2.5, -5.0], [-7.5, -10.0, -12.5]], [[2.5, 0.0, -2.5], [-5.0, -7.5, -10.0]], [[5.0, 2.5, 
 0.0], [-2.5, -5.0, -7.5]]], [[[7.5, 5.0, 2.5], [0.0, -2.5, -5.0]], [[10.0, 7.5, 5.0], [2.5, 0.0, -2.5]], [[12.5, 10.0, 7.5], 
diff --git a/escriptcore/test/python/test_util_unary_with_tagged_data.py b/escriptcore/test/python/test_util_unary_with_tagged_data.py
index a6db0f7..d81b856 100644
--- a/escriptcore/test/python/test_util_unary_with_tagged_data.py
+++ b/escriptcore/test/python/test_util_unary_with_tagged_data.py
@@ -42,6 +42,8 @@ import numpy
 from esys.escript import *
 from test_util_base import Test_util_base
 
+haveLapack = hasFeature('lapack')
+
 class Test_util_unary_with_tagged_data(Test_util_base):
    """
    test for unary operations. only tagged data are tested.
@@ -3632,7 +3634,6 @@ class Test_util_unary_with_tagged_data(Test_util_base):
       self.assertTrue(Lsup(matrix_mult(res,arg)-kronecker(3))<=self.RES_TOL,"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_taggedData_dim4(self):
-      haveLapack=(getEscriptParamInt('LAPACK_SUPPORT')>0)
       arg=Data(numpy.array([[1.0566731035132446, -0.23529223422203982, -0.73657527200271922,0], [-0.90461086237095145, 
 2.3942152365412581, -0.0078023115760492701,0], [-0.32951652966235834, 0.5634604257647613, 
 1.716379935670141,0],[0,0,0,1]]),self.functionspace)
@@ -3650,7 +3651,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_taggedData_singularDefaultTag(self):
         #In this test the other tag is definitely invertible the error is in the default tag
-        arg=Data([[0]],self.functionspace, expand=True)
+        arg=Data([[0]],self.functionspace, True)
         arg.setTaggedValue(1,[[1]])
         try:
            inverse(arg)
@@ -3658,7 +3659,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
            pass
         else:
            self.fail('Singular matrix (1x1) did not fail to invert.')
-        arg=Data([[0,0],[0,1]],self.functionspace, expand=True)
+        arg=Data([[0,0],[0,1]],self.functionspace, True)
         arg.setTaggedValue(1,[[1,0],[0,1]])
         try:
           inverse(arg)
@@ -3666,7 +3667,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
           pass
         else:
           self.fail('Singular matrix (2x2) did not fail to invert.')
-        arg=Data([[0,0,0],[0,1,0],[1,1,1]],self.functionspace, expand=True)
+        arg=Data([[0,0,0],[0,1,0],[1,1,1]],self.functionspace, True)
         arg.setTaggedValue(1,[[1,0,0],[0,1,0],[0,0,1]])
         try:
           inverse(arg)
@@ -3675,8 +3676,8 @@ class Test_util_unary_with_tagged_data(Test_util_base):
         else:
           self.fail('Singular matrix (3x3) did not fail to invert.')
         #Unsupported matrix sizes are checked in the _dim4 tests so I won't check it here
-        if getEscriptParamInt('LAPACK_SUPPORT')>0:
-            arg=Data([[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]], self.functionspace, expand=True)
+        if haveLapack:
+            arg=Data([[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]], self.functionspace, True)
             arg.setTaggedValue(1, [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]])
             try:
                 inverse(arg)
@@ -3687,7 +3688,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    def test_inverse_taggedData_singularNonDefaultTag(self):
         #In this test the default tag is definitely invertible the error is in the other tag
-        arg=Data([[1]],self.functionspace, expand=True)
+        arg=Data([[1]],self.functionspace, True)
         arg.setTaggedValue(1,[[0]])
         try:
            inverse(arg)
@@ -3695,7 +3696,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
            pass
         else:
            self.fail('Singular matrix (1x1) did not fail to invert.')
-        arg=Data([[1,0],[0,1]],self.functionspace, expand=True)
+        arg=Data([[1,0],[0,1]],self.functionspace, True)
         arg.setTaggedValue(1,[[0,0],[0,1]])
         try:
           inverse(arg)
@@ -3703,7 +3704,7 @@ class Test_util_unary_with_tagged_data(Test_util_base):
           pass
         else:
           self.fail('Singular matrix (2x2) did not fail to invert.')
-        arg=Data([[1,0,0],[0,1,0],[0,0,1]],self.functionspace, expand=True)
+        arg=Data([[1,0,0],[0,1,0],[0,0,1]],self.functionspace, True)
         arg.setTaggedValue(1,[[0,0,0],[0,1,0],[1,1,1]])
         try:
           inverse(arg)
@@ -3712,8 +3713,8 @@ class Test_util_unary_with_tagged_data(Test_util_base):
         else:
           self.fail('Singular matrix (3x3) did not fail to invert.')
         #Unsupported matrix sizes are checked in the _dim4 tests so I won't check it here
-        if getEscriptParamInt('LAPACK_SUPPORT')>0:
-            arg=Data([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], self.functionspace, expand=True)
+        if haveLapack:
+            arg=Data([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], self.functionspace, True)
             arg.setTaggedValue(1,[[0,0,0,0],[1,4,5,8],[1.0007, 4.00005, 19.00001, 34.000],[-1,1,-243,0]] )
             try:
                 inverse(arg)
@@ -4342,24 +4343,24 @@ class Test_util_unary_with_tagged_data(Test_util_base):
       self.assertEqual(res.getShape(),(2, 3, 2, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_taggedData_rank2(self):
+   def test_antisymmetric_taggedData_rank2(self):
       arg=Data(numpy.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]]),self.functionspace)
       arg.setTaggedValue(1,numpy.array([[-0.0, -1.0, -2.0], [-3.0, -4.0, -5.0], [-6.0, -7.0, -8.0]]))
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=Data(numpy.array([[0.0, -1.0, -2.0], [1.0, 0.0, -1.0], [2.0, 1.0, 0.0]]),self.functionspace)
       ref.setTaggedValue(1,numpy.array([[0.0, 1.0, 2.0], [-1.0, 0.0, 1.0], [-2.0, -1.0, 0.0]]))
       self.assertTrue(isinstance(res,Data),"wrong type of result.")
       self.assertEqual(res.getShape(),(3, 3),"wrong shape of result.")
       self.assertTrue(Lsup(res-ref)<=self.RES_TOL*Lsup(ref),"wrong result")
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-   def test_nonsymmetric_taggedData_rank4(self):
+   def test_antisymmetric_taggedData_rank4(self):
       arg=Data(numpy.array([[[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], [[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]], [[12.0, 13.0, 14.0], 
 [15.0, 16.0, 17.0]]], [[[18.0, 19.0, 20.0], [21.0, 22.0, 23.0]], [[24.0, 25.0, 26.0], [27.0, 28.0, 29.0]], [[30.0, 31.0, 32.0], 
 [33.0, 34.0, 35.0]]]]),self.functionspace)
       arg.setTaggedValue(1,numpy.array([[[[-0.0, -1.0, -2.0], [-3.0, -4.0, -5.0]], [[-6.0, -7.0, -8.0], [-9.0, -10.0, -11.0]], 
 [[-12.0, -13.0, -14.0], [-15.0, -16.0, -17.0]]], [[[-18.0, -19.0, -20.0], [-21.0, -22.0, -23.0]], [[-24.0, -25.0, -26.0], 
 [-27.0, -28.0, -29.0]], [[-30.0, -31.0, -32.0], [-33.0, -34.0, -35.0]]]]))
-      res=nonsymmetric(arg)
+      res=antisymmetric(arg)
       ref=Data(numpy.array([[[[0.0, -2.5, -5.0], [-7.5, -10.0, -12.5]], [[2.5, 0.0, -2.5], [-5.0, -7.5, -10.0]], [[5.0, 2.5, 
 0.0], [-2.5, -5.0, -7.5]]], [[[7.5, 5.0, 2.5], [0.0, -2.5, -5.0]], [[10.0, 7.5, 5.0], [2.5, 0.0, -2.5]], [[12.5, 10.0, 7.5], 
 [5.0, 2.5, 0.0]]]]),self.functionspace)
diff --git a/esysUtils/src/EsysAssert.h b/esysUtils/src/EsysAssert.h
deleted file mode 100644
index 877bd40..0000000
--- a/esysUtils/src/EsysAssert.h
+++ /dev/null
@@ -1,100 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_EsysAssert_20040330_H
-#define escript_EsysAssert_20040330_H
-#include "system_dep.h"
-/**
-   \brief
-   EsysAssert is a MACRO that will throw an exception if the boolean
-   condition specified is false.
-
-   Description:
-   EsysAssert is conditionally compiled into code only when DOASSERT is
-   defined.  When DOASSERT is not defined, the EsysAssert statement is
-   entirely removed from code.
-*/
-
-//
-// Note that the ANSI C Standard requires all headers to be idempotent except
-// <assert.h> which is explicitly required not to be idempotent (section 4.1.2).
-// This version of EsysAssert follows this requirement, consequently this
-// part of the header is intentionally outside the single pass guard.
-//
-
-#undef EsysAssert
-
-#if defined DOASSERT
-
-//
-// DOASSERT is defined, replace EsysAssert with Exception throw
-//
-
-#include "EsysAssertException.h"
-#include <sstream>
-
-namespace esysUtils {
-
-  class ErrStream
-  {
-    public:
-    template <typename Tmpl>
-    ErrStream& operator<<(Tmpl t)
-    {
-      std::stringstream str;
-      str << t;
-      m_msg += str.str();
-      
-      return *this;
-    }
-    
-    inline
-    const std::string &toString() const
-    {
-      return m_msg;
-    }
-
-    private:
-      std::string m_msg;
-  };
-
-  inline
-  std::ostream& operator<<(std::ostream& oStream,
-                                  const ErrStream& errStream)
-  {
-    oStream << errStream.toString();
-    return oStream;
-  }
-
-}
-
-#define EsysAssert(AssertTest,AssertMessage) \
-   (void)((AssertTest) || \
-           ((esysUtils::EsysAssertException::assertFailure(#AssertTest, __DATE__, __FILE__, __LINE__, \
-             (esysUtils::ErrStream()<<AssertMessage).toString())),0),0)
-
-#else
-
-//
-// DOASSERT os not defined, replace EsysAssert with "NO-OP"
-//
-
-#define EsysAssert(AssertTest,AssertMessage) ((void)0)
-
-#endif
-
-#endif
diff --git a/esysUtils/src/EsysAssertException.cpp b/esysUtils/src/EsysAssertException.cpp
deleted file mode 100644
index 578a65d..0000000
--- a/esysUtils/src/EsysAssertException.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "EsysAssertException.h"
-#include <sstream>
-
-
-using namespace esysUtils;
-
-const std::string 
-EsysAssertException::exceptionNameValue("EsysAssertException");
-
-
-const std::string &
-EsysAssertException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
-
-void 
-EsysAssertException::assertFailure (const std::string& assertion,
-                           const std::string& date, const std::string& file,
-                           int line, const std::string& errDesc)
-{
-  std::stringstream message;
- 
-  message << std::endl
-          << "EsysAssert(" << assertion << ") failed with message - " 
-          << std::endl
-          << "\"" << errDesc << "\"" << std::endl
-          << "Assertion is located in File : " << file
-          << " at Line: " << line << std::endl
-          << "File Compilation Date: " << date << std::endl;
- 
-  throw EsysAssertException(message.str());
-}
diff --git a/esysUtils/src/EsysAssertException.h b/esysUtils/src/EsysAssertException.h
deleted file mode 100644
index dbe7dbe..0000000
--- a/esysUtils/src/EsysAssertException.h
+++ /dev/null
@@ -1,118 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined escript_EsysAssertException_20040330_H
-#define escript_EsysAssertException_20040330_H
-#include "system_dep.h"
-
-#include "EsysException.h"
-
-
-namespace esysUtils {
-
-  /**
-  \brief
-  EsysAssertException exception class.
-
-  Description:
-  EsysAssertException exception class.
-  The class provides a public function returning the exception name.
-  */
-  class EsysAssertException : public EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    ESYSUTILS_DLL_API
-    inline
-    EsysAssertException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESYSUTILS_DLL_API
-    inline
-    EsysAssertException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    ESYSUTILS_DLL_API
-    inline
-    EsysAssertException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    ESYSUTILS_DLL_API
-    inline
-    EsysAssertException(const EsysAssertException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    ESYSUTILS_DLL_API
-    inline 
-    EsysAssertException &
-    operator=(const EsysAssertException &other) THROW(NO_ARG)
-        {
-           Parent::operator=(other);
-           updateMessage();   
-           return *this;
-        }
-
-
-    /// Destructor
-    ESYSUTILS_DLL_API
-    virtual ~EsysAssertException() THROW(NO_ARG) {}
-
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    ESYSUTILS_DLL_API
-    virtual const std::string & exceptionName() const;
-
-    /**
-    \brief
-    Builds a formatted message and throws an EsysAssertException.
-    */
-    ESYSUTILS_DLL_API
-    static void assertFailure (const std::string& assertion,
-                               const std::string& date,
-                               const std::string& file,
-                               int line, const std::string& errDesc);
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
-
-} // end of namespace
- 
-#endif
diff --git a/esysUtils/src/EsysException.cpp b/esysUtils/src/EsysException.cpp
deleted file mode 100644
index c9f44fe..0000000
--- a/esysUtils/src/EsysException.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "EsysException.h"
-
-using namespace esysUtils;
-
-const std::string EsysException::exceptionNameValue("GeneralEsysException");
-
-std::ostream &operator<<(std::ostream &output, EsysException &inException){
-  output << inException.toString();
-  return output;
-}
-
-EsysException::EsysException():
-Parent(),
-m_reason()
-{
-  updateMessage();   
-}
-
-EsysException::EsysException(const std::string &exceptionReason):
-Parent(),
-m_reason(exceptionReason)
-{
-  updateMessage();   
-}
-
-// Copy Constructor.
-// Do not call the parent copy constructor as it has
-// undefined effects. In particular, it mat call what() to
-// which will result on the parent storing a pointer to
-// m_exceptionMessage's storage.... esp on winblows.
-EsysException::EsysException(const EsysException &other):
-Parent(),
-m_reason(other.m_reason)
-{
-  updateMessage();   
-}
-
-EsysException &
-EsysException::operator=(const EsysException &other) THROW(NO_ARG) 
-{
-  m_reason = other.m_reason;
-  updateMessage();   
-  return *this;
-}
-
-EsysException::EsysException( const char *cStr ):
-Parent(),
-m_reason(cStr) 
-{
-  updateMessage();   
-}
-
-EsysException::~EsysException() THROW(NO_ARG)
-{}
-
-const std::string & EsysException::exceptionName() const 
-{
-  return exceptionNameValue;
-}
-
diff --git a/esysUtils/src/EsysException.h b/esysUtils/src/EsysException.h
deleted file mode 100644
index e6821f2..0000000
--- a/esysUtils/src/EsysException.h
+++ /dev/null
@@ -1,209 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef ESYSEXCEPTION_H
-#define ESYSEXCEPTION_H
-#include "system_dep.h"
-
-#include <string>
-#include <exception>
-#include <iostream>
-
-namespace esysUtils
-{
-  /**
-  \brief A base class for exception classes used within Esys system.
-  */
-  class EsysException : public std::exception
-  {
-
-  protected:
-
-     typedef std::exception Parent;
-
-
-  public:
-    /**
-    \brief
-    Default Constructor. Creates an exception with no message.
-    */
-    ESYSUTILS_DLL_API
-    EsysException();
-
-    /**
-    * \brief
-    Constructor which creates a EsysException with the given message
-     
-    @param exceptionReason Input - Exception message.
-    */
-    ESYSUTILS_DLL_API
-    EsysException(const std::string &exceptionReason);
-
-    /**
-    * \brief
-    Constructor which creates a EsysException with the given message
-
-    @param cStr - Exception message.
-    */
-    ESYSUTILS_DLL_API
-    EsysException( const char *cStr );
-
-    /**
-    * \brief
-    Copy constructor   
-
-    @param other Input - EsysException
-    */
-    ESYSUTILS_DLL_API
-    EsysException(const EsysException &other);
-
-    /// Destructor
-    ESYSUTILS_DLL_API
-    virtual ~EsysException() THROW(NO_ARG);
-
-    /**
-    \brief
-    Assignment needed to override any automatic assignment
-    of std::exception, which can potentially copy around char *'s,
-    causeing trouble in some implementations of STL.
-    It will only copy the reason string, and update the message.
-
-    @return re-assigned exception.
-    */
-    ESYSUTILS_DLL_API
-    EsysException &
-    operator=(const EsysException &other) THROW(NO_ARG);
-
-    /**
-    \brief
-    Return the exception message in the form
-    <Exception Name>: <Exception Message>
-
-    @return the exception message.
-    */
-    inline
-    const std::string & toString() const;
-
-    /**
-    \brief
-    Return the name of the exception. This is expected to be overloaded
-    in derived classes with the derived class name.
-
-    @return the name of the exception.
-    */
-    ESYSUTILS_DLL_API
-    virtual const std::string & exceptionName() const;
-
-    /**
-    \brief
-    Return a reference to the string that contains the exception reason.
-     
-    @return the string for the exception reason.
-    */
-    inline
-    const std::string& reason() const;
-
-    /**
-    \brief
-    set the string for the reason for the exception.
-    This allows ousiders to modify m_reason, but the practice is discouraged.
-    If string insertions are required, use string methods.
-    */
-    inline
-    void setReason(const std::string &new_reason);
-
-    /**
-    \brief
-    Return a description of the exception in the same format as the toString
-    method.
-
-    @return a description of the exception.
-    */
-    ESYSUTILS_DLL_API
-    inline
-    virtual const char* what() const THROW(NO_ARG);
-
-
-    /**
-    \brief
-    update m_exceptionMessage after a reason update.
-    **/
-    inline
-    void updateMessage();
-
-
-  private:
-    //
-    // the exception reason
-    std::string m_reason;
-
-    //
-    // the full exception message 
-    std::string m_exceptionMessage;
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-
-  };
-
-  /**
-  \brief
-  Stream insertion (print) operator for EsysExceptions
-
-  @param output Input - Output stream.
-  @param inException Input - The exception to be inserted into the output 
-  stream.
-  */ 
-  ESYSUTILS_DLL_API
-  std::ostream &operator<<(std::ostream &output, EsysException &inException);
-
-
-  ////////////////////////////////////////////////////////////////////
-
-  const std::string & EsysException::reason() const
-  {
-    return m_reason;
-  }
-  
-  // return the message as a std::string
-  const std::string & EsysException::toString() const
-  {
-    return m_exceptionMessage;
-  }
-
-  void EsysException::setReason(const std::string &new_reason)
-  {
-    m_reason = new_reason;
-    updateMessage();
-  }
-
-  const char*  EsysException::what() const THROW(NO_ARG)
-  {
-    return m_exceptionMessage.c_str();
-  }
-
-  void EsysException::updateMessage()
-  {
-    m_exceptionMessage = exceptionName() + ": " + m_reason;
-  }
-
-}
-
-#endif
diff --git a/esysUtils/src/EsysRandom.cpp b/esysUtils/src/EsysRandom.cpp
deleted file mode 100644
index c3d798a..0000000
--- a/esysUtils/src/EsysRandom.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-/*****************************************************************************
-*
-* Copyright (c) 2013-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include <vector>
-#include <boost/random/mersenne_twister.hpp>
-#include <cstring>
-#include "Esys_MPI.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-using namespace std;
-
-namespace {
-    
-boost::mt19937 base;		// used to seed all the other generators  
-vector<boost::mt19937*> gens;
-vector<boost::uint32_t> seeds;
-
-void seedGens(long seed)
-{
-#ifdef _OPENMP
-    int numthreads=omp_get_max_threads();
-#else
-    int numthreads=1;
-#endif
-    if (gens.size()==0)		// we haven't instantiated the generators yet  
-    {
-        gens.resize(numthreads);	
-        seeds.resize(numthreads);
-    }  
-    if (seed!=0)
-    {
-       int i;
-       base.seed((boost::uint32_t)seed);	// without this cast, icc gets confused
-       for (int i=0;i<numthreads;++i)
-       {
-	    boost::uint32_t b=base();
-            seeds[i]=b;	// initialise each generator with successive random values      
-       }
-       #pragma omp parallel for private(i)
-       for (i=0;i<numthreads;++i) 
-       {
-	   gens[i]=new boost::mt19937(seeds[i]);
-       }
-    }
-}
-  
-  
-}
-
-namespace esysUtils
-{
-
-// Put n random values from the interval [0,1] into array
-// Idea here is to create an array of seeds by feeding the original seed into the random generator
-// The code at the beginning of the function to compute the seed if one is given is
-// just supposed to introduce some variety (and ensure that multiple ranks don't get the same seed).
-// I make no claim about how well these initial seeds are distributed
-// uses openmp
-// don't forget to call CHECK_FOR_EX_WRITE if using this on Data
-void randomFillArray(long seed, double* array, size_t n)
-{
-    static unsigned prevseed=0;	// So if we create a bunch of objects we don't get the same start seed 
-    if (seed==0)		// for each one
-    {
-	if (prevseed==0) 
-	{
-	    time_t s=time(0);
-	    seed=s;
-	}
-	else
-	{
-	    seed=prevseed+419;	// these numbers are arbitrary
-	    if (seed>3040101)		// I want to avoid overflow on 32bit systems
-	    {
-		seed=((int)(seed)%0xABCD)+1;
-	    }
-	}
-    }  
-    // now we need to consider MPI since we don't want each rank to start with the same seed. Rank in COMM_WORLD will do
-#ifdef ESYS_MPI
-    Esys_MPI_rank rank;
-    int mperr=MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    if (mperr!=MPI_SUCCESS) {
-        rank=0;
-    }
-    seed+=rank*5;
-#endif
-    prevseed=seed;  
-    
-    boost::mt19937::result_type RMAX=base.max();
-    seedGens(seed);
-    long i;
-    
-    #pragma omp parallel private(i)
-    {
-	int tnum=0;
-	#ifdef _OPENMP
-	tnum=omp_get_thread_num();
-	#endif
-	boost::mt19937& generator=*(gens[tnum]);
-	
-    	#pragma omp for schedule(static)
-    	for (i=0;i<n;++i)
-    	{
-#ifdef _WIN32
-	    array[i]=((double)generator())/RMAX;
-#else
-	    array[i]=((double)generator())/RMAX;
-#endif
-    	}
-    }
-}
-
-// see patternFillArray for details on parameters
-void patternFillArray2D(size_t x, size_t y, double* array, size_t spacing, size_t basex, size_t basey, size_t numpoints)
-{
-      memset(array, 0, x*y*sizeof(double)*numpoints);
-      size_t xoff=basex%spacing;
-      size_t yoff=basey%spacing;
-      for (int r=0;r<y;++r)
-      {
-	  size_t step=((r+yoff)%spacing)?spacing:1; 
-	  for (int c=0;c<x;++c)
-	  {
-	      if ((c+xoff)%step==0)
-	      {
-		  for (int p=0;p<numpoints;++p)
-		  {
-		      array[(c+r*x)*numpoints+p]=1+p;
-		  }		
-	      }	    
-	  }
-      } 
-}
-
-
-// fill the array (which we assume is 3D with x by y by z points in it) with a pattern.
-// The base? params give the coordinates (in # of elements) of the origin of _this_ rank
-//  used to ensure patterns are generated consistantly across multiple ranks
-// This is only for internal debug so the patterns (or this function) may disappear 
-// without notice
-void patternFillArray(int pattern, size_t x, size_t y, size_t z, double* array, size_t spacing, size_t basex, size_t basey, size_t basez, size_t numpoints)
-{
-    if (pattern==0)	// a cross pattern in the z=0 plane, repeated for each z layer
-    {
-	memset(array, 0, x*y*sizeof(double)*numpoints);
-	size_t xoff=basex%spacing;
-	size_t yoff=basey%spacing;
-	for (int r=0;r<y;++r)
-	{
-	    size_t step=((r+yoff)%spacing)?spacing:1;
-	    for (int c=0;c<x;++c)
-	    {
-		if ((c+xoff)%step==0)
-		{
-		    for (int p=0;p<numpoints;++p)
-		    {
-			array[(c+r*x)*numpoints+p]=p+1;
-		    }
-		}
-	    }
-	}
-	for (int l=1;l<z;++l)
-	{
-	    memcpy(array+(x*y*l*numpoints), array, x*y*sizeof(double)*numpoints);
-	}
-    }
-    else		// pattern 1. A grid in all 3 dimensions 
-    {
-	if (z<2)
-	{
-	    patternFillArray(0, x, y, z, array, spacing, basex, basey, basez, numpoints);
-	    return;	// this pattern needs a minimum of 2 layers
-	}
-	size_t xoff=basex%spacing;
-	size_t yoff=basey%spacing;
-	size_t zoff=basez%spacing;
-	
-	double* buff1=new double[x*y*numpoints];	// stores the main cross pattern
-	double* buff2=new double[x*y*numpoints];	// stores the "verticals"
-	memset(buff1, 0, x*y*sizeof(double)*numpoints);
-	memset(buff2, 0, x*y*sizeof(double)*numpoints);
-	    // fill in buff1
-	for (size_t r=0;r<y;++r)
-	{
-	    size_t step=((r+yoff)%spacing)?spacing:1;
-	    for (int c=0;c<x;++c)
-	    {
-		if ((c+xoff)%step==0)
-		{
-		    for (int p=0;p<numpoints;++p)
-		    {
-			buff1[(c+r*x)*numpoints+p]=p+1;
-		    }
-		}
-	    }	    
-	}
-	
-	for (size_t r=(spacing-yoff)%spacing;r<y;r+=spacing)
-	{
-	    for (size_t c=(spacing-xoff)%spacing;c<x;c+=spacing)
-	    {
-		for (int p=0;p<numpoints;++p)
-		{
-		    buff2[(c+r*x)*numpoints+p]=p+1;
-		}
-	    }
-	}	
-	for (size_t l=0;l<z;++l)
-	{
-	    if ((l+zoff)%spacing)
-	    {
-		memcpy(array+(x*y*l*numpoints), buff2, x*y*sizeof(double)*numpoints);
-	    }
-	    else
-	    {
-		memcpy(array+(x*y*l*numpoints), buff1, x*y*sizeof(double)*numpoints);
-	    }
-	}
-	delete[] buff1;
-	delete[] buff2;
-    }
-  
-  
-  
-}
-
-} // end namespace
diff --git a/esysUtils/src/Esys_MPI.cpp b/esysUtils/src/Esys_MPI.cpp
deleted file mode 100644
index 6220790..0000000
--- a/esysUtils/src/Esys_MPI.cpp
+++ /dev/null
@@ -1,387 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <vector>
-
-#include "Esys_MPI.h"
-#include "index.h"
-#include "mem.h"
-#include "error.h"
-#include "EsysException.h"
-
-
-#include <iostream>        // temp for debugging
-
-namespace esysUtils
-{
-  
-JMPI makeInfo(MPI_Comm comm, bool owncom)
-{
-    if (esysUtils::NoCOMM_WORLD::active() && comm==MPI_COMM_WORLD)
-    {
-        throw esysUtils::EsysException("Attempt to use the MPI_COMM_WORLD communicator when it is blocked.");
-    }
-    JMPI_* p=new JMPI_(comm, owncom);
-    return JMPI(p);
-}
-
-
-JMPI_::JMPI_(MPI_Comm mpicomm, bool owncom)
-        : comm(mpicomm), ownscomm(owncom)
-{
-        msg_tag_counter = 0;
-#ifdef ESYS_MPI
-    if (mpicomm!=MPI_COMM_NULL)
-    {
-        if (MPI_Comm_rank(comm, &rank)!=MPI_SUCCESS || MPI_Comm_size(comm, &size)!=MPI_SUCCESS)
-        {
-            Esys_setError( ESYS_MPI_ERROR, "Esys_MPIInfo_alloc : error finding comm rank/size" );
-        }
-    }
-    else
-    {
-	rank=0;
-	size=0;
-    }
-#else
-        rank=0;
-        size=1;        
-#endif        
-}
-
-JMPI_::~JMPI_()
-{
-#ifdef ESYS_MPI
-    if (ownscomm && (comm!=MPI_COMM_NULL))
-    {
-        MPI_Comm_free(&comm);
-    }
-#endif
-}
-
-dim_t JMPI_::setDistribution(index_t min_id,index_t max_id,index_t* distribution)
-{
-   int rest=0, p;
-   int s=size;
-   dim_t N=max_id-min_id+1;
-   if (N>0) {
-      int local_N=N/s;
-      rest=N-local_N*s;
-      for (p=0; p<s; ++p) {
-         if (p<rest) {
-             distribution[p]=min_id+(local_N+1)*p;
-         } else {
-             distribution[p]=min_id+rest+local_N*p;
-         }
-      }
-      distribution[s]=max_id+1;
-      if (rest==0) {
-         return local_N;
-      } else {
-         return local_N+1;
-      }
-  } else {
-      for (p=0; p<s+1; ++p) distribution[p]=min_id;
-      return 0;
-  }  
-  
-  
-}
-
-void JMPI_::split(dim_t N, dim_t* local_N,index_t* offset) 
-{
-   int rest=0;
-   int s=size;
-   int r=rank;
-   *local_N=N/s;
-   rest=N-(*local_N)*s;
-   if (r<rest) {
-       (*local_N)++;
-       (*offset)=(*local_N)*r;
-   } else {
-       (*offset)=(*local_N)*r+rest;
-   }
-}
-
-}
-
-
-
-dim_t Esys_MPIInfo_setDistribution(esysUtils::JMPI& mpi_info ,index_t min_id,index_t max_id,index_t* distribution) {
-   int rest=0, p;
-   int s=mpi_info->size;
-   dim_t N=max_id-min_id+1;
-   if (N>0) {
-      int local_N=N/s;
-      rest=N-local_N*s;
-      for (p=0; p<s; ++p) {
-         if (p<rest) {
-             distribution[p]=min_id+(local_N+1)*p;
-         } else {
-             distribution[p]=min_id+rest+local_N*p;
-         }
-      }
-      distribution[s]=max_id+1;
-      if (rest==0) {
-         return local_N;
-      } else {
-         return local_N+1;
-      }
-  } else {
-      for (p=0; p<s+1; ++p) distribution[p]=min_id;
-      return 0;
-  }
-}
-
-
-
-/* N = #CPUs, k is a CPU number but out of range or even negative. Return a CPU number in 0...n-1. */
-index_t esysUtils::mod_rank(index_t n, index_t k) 
-{
-    index_t q, out=0;
-    if (n>1) {
-        q=k/n;
-        if (k>0) {
-           out=k-n*q;
-        } else if (k<0) {
-           out=k-n*(q-1);
-        }
-    }
-    return out;
-}
-
-
-/* checks that there is no error across all processes in a communicator */
-/* NOTE : does not make guarantee consistency of error string on each process */
-bool esysUtils::Esys_MPIInfo_noError( const esysUtils::JMPI& mpi_info )
-{
-  int errorLocal = Esys_noError() ? 0 : 1;
-  int errorGlobal = errorLocal;
-
-#ifdef ESYS_MPI
-  if (!checkResult(errorLocal, errorGlobal, mpi_info))
-  {
-      return false;
-  }
-  if( (errorLocal==0) && (errorGlobal==1)) 
-  {
-     Esys_setError( ESYS_MPI_ERROR, "Esys_MPIInfo_noError() : there was an error on another MPI process" );
-  }
-#endif
-  
-  return (errorGlobal==0);
-}
-
-// Throw all values in and get the maximum --- used for error checking.
-// This used to be implemented as a simple AllReduce.
-// However, if there are other (overlapping) communicators in the system, they don't
-// react well to getting unexpected/untagged messages.
-// To avoid this, we do individual sends to the root which sends the result back.
-bool esysUtils::checkResult(int res, int& mres, const esysUtils::JMPI& info)
-{
-    if (info->size==1)
-    {
-        mres=res;
-        return true;
-    }
-#ifdef ESYS_MPI
-    const int leader=0;
-    const int BIGTAG=esysUtils::getSubWorldTag();
-    if (info->rank!=leader)
-    {  
-        if (MPI_Send(&res, 1, MPI_INT, leader, BIGTAG, info->comm)!=MPI_SUCCESS)
-            return false;
-        MPI_Status status;
-        if (MPI_Recv(&mres, 1, MPI_INT, leader, BIGTAG, info->comm, &status)!=MPI_SUCCESS)
-            return false;
-    }
-    else
-    {
-        std::vector<MPI_Status> status(info->size - 1);
-        MPI_Request* reqs=new MPI_Request[info->size-1];
-        int* eres=new int[info->size-1];
-        for (int i=0;i<info->size-1;++i)
-        {
-            MPI_Irecv(eres+i, 1, MPI_INT, i+1, BIGTAG, info->comm, reqs+i);          
-        }  
-        if (MPI_Waitall(info->size-1, reqs, &status[0])!=MPI_SUCCESS)
-        {
-            delete[] reqs;
-            delete[] eres;
-            return false;
-        }
-        // now we have them all, find the max
-        mres=res;
-        for (int i=0;i<info->size-1;++i)
-        {
-            if (mres<eres[i])
-            {
-                mres=eres[i];
-            }
-        }
-        delete[] eres;
-        // now we know what the result should be
-        // send it to the others
-        for (int i=0;i<info->size-1;++i)
-        {
-            MPI_Isend(&mres, 1, MPI_INT, i+1, BIGTAG, info->comm, reqs+i);          
-        }
-        if (MPI_Waitall(info->size-1, reqs, &status[0])!=MPI_SUCCESS)
-        {
-            delete[] reqs;
-            return false;
-        }
-        delete[] reqs;
-      
-    }
-#endif
-    return true;
-}
-
-
-
-
-
-
-
-
-
-
-
-// ensure that the any ranks with an empty src argument end up with the string from
-// one of the other ranks
-// with no-mpi, it makes dest point at a copy of src
-// Expected use case for this code is to ship error messages between ranks
-// as such, it is not written to be speedy
-bool esysUtils::shipString(const char* src, char** dest, MPI_Comm& comm)
-{
-#ifdef ESYS_MPI  
-    Esys_MPI_rank rank=0;
-    if (MPI_Comm_rank( comm, &rank )!=MPI_SUCCESS)
-    {
-        return false;        // we have no reason to believe MPI works anymore
-    }
-    
-    int slen=strlen(src);
-    // everybody needs to tell everyone if they have a string
-    // send your rank if you have a non-empty string else
-    // send -1
-    int in=(slen?rank:-1);
-    int out;
-    if (MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_MAX, comm)!=MPI_SUCCESS)
-    {
-        return false;
-    }
-    if (out==-1)                // should not be called under these conditions, but noone had a string
-    {
-        *dest=new char[1];
-        *dest[0]='\0';
-        return true;
-    }
-    // since we will be using broadcast, we need to tell everyone how big the string is going to be
-    // with an additional bcast
-    
-    if (MPI_Bcast(&slen, 1, MPI_INT, out, comm)!=MPI_SUCCESS)
-    {
-        return false;
-    }
-    // now broadcast that string to everyone
-    if (rank==out)
-    {
-        // I could const _ cast src but instead I'll make a copy
-        
-        *dest=new char[slen+1];
-        strcpy(*dest, src);
-        
-        // this guy should just send the string
-        if (MPI_Bcast(*dest, slen+1, MPI_CHAR, out, comm)!=MPI_SUCCESS)
-        {
-            return false;
-        }
-        return true;
-    }
-    else
-    {
-        *dest=new char[slen+1];
-        if (MPI_Bcast(*dest, slen+1, MPI_CHAR, out, comm)!=MPI_SUCCESS)
-        {
-            return false;
-        }
-        return true;
-    }
-#else
-    *dest=new char[strlen(src)+1];
-    strcpy(*dest, src);
-    return true;
-#endif
-  
-}
-
-namespace 
-{
-    // true if a split world call is currently running and MPI_COMM_WORLD should not be allowed by default
-    bool nocommworldplease=false;
-}
-
-esysUtils::NoCOMM_WORLD::NoCOMM_WORLD()
-{
-    if (nocommworldplease)
-    {
-        throw EsysException("NoCOMM_WORLD does not nest.");
-    }
-    nocommworldplease=true;
-}
-
-esysUtils::NoCOMM_WORLD::~NoCOMM_WORLD()
-{
-    nocommworldplease=false;
-}  
-
-bool esysUtils::NoCOMM_WORLD::active()
-{
-    return nocommworldplease;
-}
-
-/**************************************************
-                 WRAPPERS 
-**************************************************/
-
-int Esys_MPIInfo_initialized( void )
-{
-  #ifdef ESYS_MPI
-     int error=0, initialised=0;
-     error = MPI_Initialized( &initialised );
-     if( error!=MPI_SUCCESS )
-         Esys_setError( ESYS_MPI_ERROR, "mpi_initialised : MPI error" );
-     return initialised;
-  #else
-     return TRUE;
-  #endif
-}
-
-#ifndef _OPENMP 
-int serial_get_max_threads(void) {
-   return 1;
-}
-int serial_get_thread_num(void) {
-   return 0;
-}
-#endif
-
diff --git a/esysUtils/src/Esys_MPI.h b/esysUtils/src/Esys_MPI.h
deleted file mode 100644
index fb5b0f1..0000000
--- a/esysUtils/src/Esys_MPI.h
+++ /dev/null
@@ -1,166 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef INC_ESYS_MPI
-#define INC_ESYS_MPI
-
-#include "system_dep.h"
-#include "types.h"
-
-#include <sstream>
-#include <boost/shared_ptr.hpp>
-
-#ifdef ESYS_MPI
-
-#include <mpi.h>
-
-#ifdef ESYS_INDEXTYPE_LONG
-#define MPI_DIM_T MPI_LONG
-#else
-#define MPI_DIM_T MPI_INT
-#endif
-
-#else
-   typedef int MPI_Comm;
-   typedef int MPI_Request;
-   typedef int MPI_Op;
-   typedef int MPI_Status;
-   #define MPI_INT 6
-   #define MPI_DOUBLE 11
-   #define MPI_COMM_WORLD 91
-   #define MPI_COMM_NULL 0
-   
-// MPI_Op replacements for non-MPI - these values are arbitrary
-
-   #define MPI_SUM 100
-   #define MPI_MIN 101
-   #define MPI_MAX 102
-
-   #define MPI_OP_NULL 17
-// end MPI_op
-
-   
-#endif
-
-typedef int Esys_MPI_rank;
-
-#define ESYS_MPI_TODO 	{ fprintf( stdout, "\nTODO : %s:%d\n", __FILE__, __LINE__);	MPI_Finalize(); exit(1); }
-
-// Modding by 7 digit prime to avoid overflow
-#define ESYS_MPI_INC_COUNTER(V,I) {(V).msg_tag_counter=((V).msg_tag_counter+(I))%1010201;}
-#define ESYS_MPI_SET_COUNTER(V,I) {(V).msg_tag_counter=(I)%1010201;}
-
-namespace esysUtils {
-
-/** \brief tag reserved for use by SubWorld code
-    this value should be higher than the modulus used in JMPI_::setCounter, apart from that, its value
-    is not particularly significant.
-*/
-ESYSUTILS_DLL_API
-inline int getSubWorldTag()	
-{
-    return (('S'<< 24) + ('u' << 16) + ('b' << 8) + 'W')%1010201;
-}
-  
-class JMPI_;
-
-typedef boost::shared_ptr<JMPI_> JMPI;
-
-class JMPI_
-{
-public:
-    ~JMPI_();
-    int size;
-    Esys_MPI_rank rank;
-    MPI_Comm comm;
-    int msg_tag_counter;
-    bool ownscomm;	// if true, destroy comm on destruct    
-    
-    dim_t setDistribution(index_t min_id,index_t max_id,index_t* distribution);
-    void split(dim_t N, dim_t* local_N,index_t* offset);     
-    
-    void incCounter(int i)
-    {
-	msg_tag_counter+=i;
-	msg_tag_counter%=1010201;		// there is no particular significance here other than being 7 digits 
-    }					// and prime (because why not). It just needs to be big.
-    
-    void setCounter(int i)
-    {
-	msg_tag_counter%=1010201;
-    }
-
-    bool isValid()
-    {
-	return comm!=MPI_COMM_NULL;
-    }
-private:
-    JMPI_(MPI_Comm comm, bool ocomm);
-    friend JMPI makeInfo(MPI_Comm comm, bool owncom);
-};
-
-JMPI makeInfo(MPI_Comm comm, bool owncom=false);
-
-ESYSUTILS_DLL_API
-bool Esys_MPIInfo_noError( const JMPI& mpi_info);
-
-ESYSUTILS_DLL_API
-index_t mod_rank(index_t n, index_t k);
-
-
-/// Appends MPI rank to a file name if MPI size > 1
-ESYSUTILS_DLL_API
-inline std::string appendRankToFileName(const std::string &fileName,
-                                        int mpiSize, int mpiRank)
-{
-    std::stringstream ss;
-    ss << fileName;
-    if (mpiSize > 1) {
-        ss << '.';
-        ss.fill('0');
-        ss.width(4);
-        ss << mpiRank;
-    }
-    std::string result(ss.str());
-    return result;
-}
-
-// ensure that the any ranks with an empty src argument end up with the string from
-// one of the other ranks
-// with no-mpi, it makes dest point at a copy of src
-ESYSUTILS_DLL_API
-bool shipString(const char* src, char** dest, MPI_Comm& comm);
-
-
-// Everyone puts in their error code and everyone gets the largest one
-ESYSUTILS_DLL_API
-bool checkResult(int input, int& output, const JMPI& comm);
-
-
-// Does not cope with nested calls
-class NoCOMM_WORLD
-{
-public:
-    NoCOMM_WORLD();
-    ~NoCOMM_WORLD();
-    static bool active();
-};
-
-} // namespace esysUtils
-
-#endif /* INC_ESYS_MPI */
-
diff --git a/esysUtils/src/SConscript b/esysUtils/src/SConscript
deleted file mode 100644
index 8119797..0000000
--- a/esysUtils/src/SConscript
+++ /dev/null
@@ -1,72 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-Import('*')
-local_env = env.Clone()
-
-sources = """
-    EsysAssertException.cpp
-    EsysException.cpp
-    EsysRandom.cpp
-    Esys_MPI.cpp
-    error.cpp
-    esysExceptionTranslator.cpp
-    blocktimer.cpp
-    pyerr.cpp
-""".split()
-
-headers = """
-    EsysAssert.h
-    EsysAssertException.h
-    EsysException.h
-    EsysRandom.h
-    Esys_MPI.h
-    IndexList.h
-    error.h
-    esysExceptionTranslator.h
-    esysFileWriter.h
-    blocktimer.h
-    mem.h
-    index.h
-    maths.h
-    pyerr.h
-    system_dep.h
-    first.h
-    types.h
-""".split()
-
-lib_name = 'esysUtils'
-
-if IS_WINDOWS:
-    local_env.Append(CPPDEFINES = ['ESYSUTILS_EXPORTS'])
-
-include_path = Dir(lib_name, local_env['incinstall'])
-hdr_inst = local_env.Install(include_path, headers)
-
-if local_env['build_shared']:
-    lib = local_env.SharedLibrary(lib_name, sources)
-else:
-    lib = local_env.StaticLibrary(lib_name, sources)
-
-lib_inst = local_env.Install(local_env['libinstall'], lib)
-
-env.Alias('build_esysUtils_lib', lib)
-env.Alias('install_esysUtils_headers', hdr_inst)
-env.Alias('install_esysUtils_lib', lib_inst)
-
-# configure the unit tests
-local_env.SConscript(dirs=['#/esysUtils/test'], variant_dir='test', duplicate=0)
-
diff --git a/esysUtils/src/blocktimer.cpp b/esysUtils/src/blocktimer.cpp
deleted file mode 100644
index 16c6bdb..0000000
--- a/esysUtils/src/blocktimer.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-#include <search.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "blocktimer.h"
-#include "stdlib.h"
-#include "string.h"
-
-#ifdef ESYS_MPI
-#include "mpi.h"
-#endif
-
-#ifdef BLOCKTIMER
-static char  *g_names[NUM_TIMERS];	/* Names of the timers */
-static int    g_count[NUM_TIMERS];	/* How many times was the timer incremented? */
-static double g_times[NUM_TIMERS];	/* The total time spent in the block */
-static double g_start_time;		/* Start time for the entire program */
-static int    g_initialized = 0;	/* Has the blocktimer been initialized? */
-static int    g_end_computed = 0;	/* Has the end time been set? */
-#endif /* BLOCKTIMER */
-
-void
-blocktimer_initialize()
-{
-#ifdef BLOCKTIMER
-  int i;
-
-  for (i=0; i<NUM_TIMERS; i++) {
-    g_names[i] = (char *)NULL;
-    g_times[i] = 0.0;
-    g_count[i] = 0;
-  }
-
-  if (hcreate(NUM_TIMERS) == 0) {
-    perror("hcreate");
-    fprintf(stderr, "blocktimer_initialize: Could not initialize hash table\n");
-    exit(1);
-  }
-
-  g_initialized = 1;
-
-  g_start_time = blocktimer_time();
-
-  /* Initialize timer for "entire program" to zero so it appears first in the report */
-  blocktimer_increment("entire program", g_start_time);
-  g_count[0] = 0; /* Reset counter for "entire program" to zero */
-#endif /* BLOCKTIMER */
-}
-
-void
-blocktimer_increment(__const char *name, double start_time)
-{
-#ifdef BLOCKTIMER
-  int id;
-
-  if (!g_initialized) { return; }
-
-  id = blocktimer_getOrCreateTimerId(name);
-
-  g_times[id] += blocktimer_time() - start_time;
-  g_count[id] += 1;
-#endif /* BLOCKTIMER */
-}
-
-int
-blocktimer_getOrCreateTimerId(__const char *name)
-{
-  int id=0;
-#ifdef BLOCKTIMER
-  char *tmp_str;
-  static int nextId = 0;		/* Next timer ID to assign */
-  ENTRY item, *found_item;
-
-  if (!g_initialized) { return(0); }
-
-  /* Has a timer with 'name' already been defined? */
-  item.key = (char *)name;
-  item.data = (void *) NULL;
-  found_item = hsearch(item, FIND);
-
-  if (found_item != NULL) {	/* Already defined so retrieve it from the hash */
-    /* Return the ID of the entry we found */
-    int *idTmp = reinterpret_cast<int*>(found_item->data);
-    id = *idTmp;
-  }
-  else {			/* Not already defined so create one */
-    /* malloc new int, can't use stack var or all items share same data */
-    int *idTmp = (int *)malloc(sizeof(int));
-    /* Enter the new name in the hash */
-    if (nextId >= NUM_TIMERS) {
-      fprintf(stderr, "blocktimer: exceeded limit of %d timers, increase NUM_TIMERS\n", NUM_TIMERS);
-      exit(1);
-    }
-    *idTmp = nextId++;
-    item.key = (char *)name;
-    item.data = (void *) idTmp;
-    hsearch(item, ENTER);
-    id = *idTmp;
-    /* Make a copy of the name and save with other names */
-    tmp_str = (char*)malloc(strlen(name)+1);
-    strcpy(tmp_str, name);
-    g_names[id] = tmp_str;
-  }
-
-#endif /* BLOCKTIMER */
-  return(id);
-}
-
-void
-blocktimer_reportSortByName()
-{
-#ifdef BLOCKTIMER
-  int i;
-
-  if (!g_initialized) { return; }
-
-  if (!g_end_computed) {
-    blocktimer_increment("entire program", g_start_time);
-    g_end_computed = 1;
-  }
-  printf("BlockTimer sorted by name (sorting TBD):\n");
-  for(i=0; i<NUM_TIMERS; i++) {
-    if (g_names[i] != (char *) NULL) {
-      printf("	%7d %15.2f   %s\n", g_count[i], g_times[i], g_names[i]);
-    }
-  }
-#endif /* BLOCKTIMER */
-}
-
-void
-blocktimer_reportSortByTime()
-{
-#ifdef BLOCKTIMER
-  int i;
-
-  if (!g_initialized) { return; }
-
-  if (!g_end_computed) {
-    blocktimer_increment("entire program", g_start_time);
-    g_end_computed = 1;
-  }
-  printf("BlockTimer sorted by time (sorting TBD):\n");
-  for(i=0; i<NUM_TIMERS; i++) {
-    if (g_names[i] != (char *) NULL) {
-      printf("	%7d %15.2f seconds for %s\n", g_count[i], g_times[i], g_names[i]);
-    }
-  }
-#endif /* BLOCKTIMER */
-}
-
-/* Copied from Paso_timer() */
-double
-blocktimer_time()
-{
-  double out=0.0;
-#ifdef ESYS_MPI
-  out = MPI_Wtime();
-#else
-#ifdef _OPENMP
-  out=omp_get_wtime();
-#else
-  out=((double) clock())/CLOCKS_PER_SEC;
-#endif
-#endif
-  return(out);
-}
-
diff --git a/esysUtils/src/blocktimer.h b/esysUtils/src/blocktimer.h
deleted file mode 100644
index 7609057..0000000
--- a/esysUtils/src/blocktimer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef blocktimer_h
-#define blocktimer_h
-
-#include <stdio.h>
-#include <search.h>
-#include "system_dep.h"
-
-/* Enable the block timer (or remove this and use -DBLOCKTIMER) */
-/* # define BLOCKTIMER */
-
-# define NUM_TIMERS 1024
-
-ESYSUTILS_DLL_API
-void blocktimer_initialize();
-ESYSUTILS_DLL_API
-void blocktimer_increment(__const char *name, double start_time);
-ESYSUTILS_DLL_API
-int blocktimer_getOrCreateTimerId(__const char *name);
-ESYSUTILS_DLL_API
-void blocktimer_reportSortByName();
-ESYSUTILS_DLL_API
-void blocktimer_reportSortByTime();
-ESYSUTILS_DLL_API
-double blocktimer_time();
-
-
-#endif
diff --git a/esysUtils/src/error.cpp b/esysUtils/src/error.cpp
deleted file mode 100644
index cefaa46..0000000
--- a/esysUtils/src/error.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2010-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "error.h"
-#include "Esys_MPI.h"
-
-#include <string.h>
-#include <time.h>
-#ifdef _OPENMP 
-#include <omp.h>
-#endif
-
-#define MIN(X,Y) ((X)<(Y)?(X):(Y))
-
-Esys_ErrorCodeType Esys_ErrorCode_=NO_ERROR;
-char Esys_ErrorMsg_[LenErrorMsg_MAX]={'\0'};
-
-/* reset the error to NO_ERROR */
-void Esys_resetError(void) {
-  Esys_ErrorCode_=NO_ERROR;
-}
-                                                                                                                                                                                                     
-/* sets an error */
-void Esys_setError(Esys_ErrorCodeType err,__const char* msg) {
-  size_t lenMsg=strlen(msg);
-  if (Esys_noError()) {
-/* printf("error set = %d %s\n",err,msg); */
-     Esys_ErrorCode_=err;
-     strncpy(Esys_ErrorMsg_,msg,MIN(LenErrorMsg_MAX,lenMsg));
-     Esys_ErrorMsg_[MIN(LenErrorMsg_MAX,lenMsg)]='\0';
-  }
-}
-                                                                                                                                                                                                     
-/* checks if there is no error */
-bool Esys_noError(void) {
-   Esys_ErrorCodeType err=Esys_getErrorType();
-   /* return (err==NO_ERROR ||  err==WARNING);*/
-   return (err==NO_ERROR);
-}
-/* This function checks if the pointer ptr has a target. If not an
-   error is raised and true is returned. */
-
-bool Esys_checkPtr(void* ptr) {
-   if (ptr==NULL) {
-      Esys_setError(MEMORY_ERROR,"Out of memory.");
-      return true;
-   } else {
-      return false;
-   }
-} 
-
-/* This function returns a timer */
-double Esys_timer(void) {
-  double out;
-
-#ifdef ESYS_MPI
-  out = MPI_Wtime();
-#else
-#ifdef _OPENMP 
-  out=omp_get_wtime();
-#else
-  out=((double) clock())/CLOCKS_PER_SEC;
-#endif
-#endif
-  return out;
-}
-
-
-
-/* return the error code */
-Esys_ErrorCodeType Esys_getErrorType(void) {
-   return Esys_ErrorCode_;
-}
-
-/* return the error message */
-char* Esys_getErrorMessage(void) {
-   return Esys_ErrorMsg_;
-}
-
diff --git a/esysUtils/src/error.h b/esysUtils/src/error.h
deleted file mode 100644
index 70582c8..0000000
--- a/esysUtils/src/error.h
+++ /dev/null
@@ -1,94 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2010-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/************************************************************************************/
-
-/*    Functions for C error handling  (and timing)*/
-
-/************************************************************************************/
-
-
-#ifndef INC_ESYS_ERROR
-#define INC_ESYS_ERROR
-
-#include "system_dep.h"
-#include "types.h"
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <stdio.h>	/* For FILENAME_MAX */
-#define LenString_MAX FILENAME_MAX*2
-#define LenErrorMsg_MAX LenString_MAX
-
-/************************************************************************************/
-
-typedef enum {
-  NO_ERROR,
-  WARNING,
-  DIVERGED,
-  VALUE_ERROR,
-  TYPE_ERROR,
-  MEMORY_ERROR,
-  IO_ERROR,
-  ZERO_DIVISION_ERROR,
-  EOF_ERROR,
-  FLOATING_POINT_ERROR,
-  INDEX_ERROR,
-  OS_ERROR,
-  OVERFLOW_ERROR,
-  SYSTEM_ERROR,
-  ESYS_MPI_ERROR,
-  NO_PROGRESS_ERROR
-} Esys_ErrorCodeType;
-
-/* interfaces */
-
-
-ESYSUTILS_DLL_API
-double Esys_timer(void);
-
-ESYSUTILS_DLL_API
-bool Esys_checkPtr(void*);
-
-ESYSUTILS_DLL_API
-void Esys_resetError(void);
-
-ESYSUTILS_DLL_API
-void Esys_setError(Esys_ErrorCodeType err,__const char* msg);
-
-ESYSUTILS_DLL_API
-bool Esys_noError(void);
-
-ESYSUTILS_DLL_API
-Esys_ErrorCodeType Esys_getErrorType(void);
-
-ESYSUTILS_DLL_API
-char* Esys_getErrorMessage(void);
-
-#ifndef _OPENMP
-int serial_get_max_threads(void);
-int serial_get_thread_num(void);
-
-/* Nasty hack to get 3.2 out */
-#define omp_get_max_threads serial_get_max_threads
-#define omp_get_thread_num serial_get_thread_num
-#endif
-
-
-#endif /* #ifndef INC_ESYS_ERROR */
diff --git a/esysUtils/src/esysExceptionTranslator.h b/esysUtils/src/esysExceptionTranslator.h
deleted file mode 100644
index 88a662c..0000000
--- a/esysUtils/src/esysExceptionTranslator.h
+++ /dev/null
@@ -1,44 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  esysUtils_esysExceptionTranslator_20040419_H
-#define esysUtils_esysExceptionTranslator_20040419_H
-#include "system_dep.h"
-
-#include "EsysException.h"
-
-#include "boost/python/errors.hpp"
-
-#include <iostream>
-
-namespace esysUtils {
-  /**
-     \brief
-     Function which translates an EsysException into a python RuntimeError
-  */
-  ESYSUTILS_DLL_API
-  void RuntimeErrorTranslator(EsysException const& e);
-
-  /**
-     \brief
-     Function which translates an EsysException into a python ValueError
-  */
-  ESYSUTILS_DLL_API
-  void ValueErrorTranslator(EsysException const& e);
-} // end of namespace
-
-#endif
diff --git a/esysUtils/src/first.h b/esysUtils/src/first.h
deleted file mode 100644
index da9ce27..0000000
--- a/esysUtils/src/first.h
+++ /dev/null
@@ -1,26 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2015-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/* The purpose is to gather anything which "needs to be first include" */
-
-#ifndef esysutils_first_h
-#define esysutils_first_h
-
-#ifdef ESNEEDPYTHON
-#include "Python.h"
-#endif
-
-#endif
diff --git a/esysUtils/src/index.h b/esysUtils/src/index.h
deleted file mode 100644
index c610f6e..0000000
--- a/esysUtils/src/index.h
+++ /dev/null
@@ -1,63 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef INC_ESYS_INDEX
-#define INC_ESYS_INDEX
-
-/************************************************************************************/
-
-/*    Macros for array indexing       */
-
-/************************************************************************************/
-
-/************************************************************************************/
-
-/*   some useful functions: */
-
-#include <limits.h>
-
-
-#define FALSE 0
-#define TRUE 1
-#define UNKNOWN -1
-#define DBLE(_x_) (double)(_x_)
-#define INDEX1(_X1_) (_X1_)
-#define INDEX2(_X1_,_X2_,_N1_) ((_X1_)+(_N1_)*(_X2_))
-#define INDEX3(_X1_,_X2_,_X3_,_N1_,_N2_) ((_X1_)+(_N1_)*INDEX2(_X2_,_X3_,_N2_))
-#define INDEX4(_X1_,_X2_,_X3_,_X4_,_N1_,_N2_,_N3_) ((_X1_)+(_N1_)*INDEX3(_X2_,_X3_,_X4_,_N2_,_N3_))
-#define INDEX5(_X1_,_X2_,_X3_,_X4_,_X5_,_N1_,_N2_,_N3_,_N4_) ((_X1_)+(_N1_)*INDEX4(_X2_,_X3_,_X4_,_X5_,_N2_,_N3_,_N4_))
-#define INDEX6(_X1_,_X2_,_X3_,_X4_,_X5_,_X6_,_N1_,_N2_,_N3_,_N4_,_N5_) ((_X1_)+(_N1_)*INDEX5(_X2_,_X3_,_X4_,_X5_,_X6_,_N2_,_N3_,_N4_,_N5_))
-
-#define MAX(_arg1_,_arg2_) ((_arg1_)>(_arg2_) ?  (_arg1_) : (_arg2_))
-#define MAX3(_arg1_,_arg2_,_arg3_) MAX(_arg1_,MAX(_arg2_,_arg3_))
-#define MIN(_arg1_,_arg2_) ((_arg1_)>(_arg2_) ?  (_arg2_) : (_arg1_)) 
-#define MIN3(_arg1_,_arg2_,_arg3_) MIN(_arg1_,MIN(_arg2_,_arg3_))
-#define ABS(_arg_) MAX((_arg_),-(_arg_))
-#define SIGN(_arg_) ((_arg_)>0 ?  1  : ((_arg_)<0 ? -1 : 0 ))
-#define SAMESIGN(_arg1_, _arg2_) ( ( ( (_arg1_)>=0 ) && ( (_arg2_)>=0 ) ) || ( ((_arg1_)<=0 ) && ( (_arg2_)<=0 ) ) )
-#define SWAP(_a0_,_a1_,_type_) { \
-                                _type_ s; \
-                                s=(_a0_); \
-                                _a0_= (_a1_); \
-                                _a1_=s; \
-                               }
-#define XNOR(_a0_,_a1_) ( ( (_a0_) && (_a1_) ) || ( !(_a0_) && !(_a1_) ) )
-
-#define INDEX_T_MAX INT_MAX
-#define INDEX_T_MIN -INT_MAX
-
-#endif 
diff --git a/esysUtils/src/maths.h b/esysUtils/src/maths.h
deleted file mode 100644
index 20e19db..0000000
--- a/esysUtils/src/maths.h
+++ /dev/null
@@ -1,30 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef INC_ESYS_MATHS
-#define INC_ESYS_MATHS
-
-/************************************************************************************/
-
-/*    Pull in a maths library */
-#include <cmath>
-
-#define EPSILON DBL_EPSILON
-#define LARGE_POSITIVE_FLOAT DBL_MAX
-#define SMALL_NEGATIVE_FLOAT -DBL_MAX
-
-#endif 
diff --git a/esysUtils/src/mem.h b/esysUtils/src/mem.h
deleted file mode 100644
index 0caf626..0000000
--- a/esysUtils/src/mem.h
+++ /dev/null
@@ -1,100 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef INC_ESYS_MEM
-#define INC_ESYS_MEM
-
-/****************************************************************************/
-/*   Macros to deal with memory management */
-/********************************************/
-
-
-/****************************************************************************/
-/*    memory allocation:                                      */
-/*    Wise to not use PASO_MALLOC/FREE/REALLOC and            */
-/*    PASO_THREAD... directly. These are only for tailoring   */
-/*    the main macros that follow                             */
-/****************************************************************************/
-
-
-#include <stdlib.h>
-
-#define PASO_MALLOC malloc
-#define PASO_FREE free
-#define PASO_REALLOC realloc
-
-
-/* FIXME: This is not satisfactory.                                */
-/* _ECC, __INTEL_COMPILER, and other                               */
-/* intel compiler pre-defines need to be handled                   */
-/* (__ICL, __ICC come to mind)                                     */
-/* Also, _WIN32 may take this branch one day...                    */
-/* SO KEEP ALL THREAD_MEMALLOC/FREEs CONFINED TO THE PASO LIBRARY. */
-
-#if defined(__ECC) && defined(_OPENMP) /* ECC version of intel compiler with openmp. */
-  #include <omp.h>
-  #define PASO_THREAD_MALLOC kmp_malloc
-  #define PASO_THREAD_FREE kmp_free
-#else
-  #define PASO_THREAD_MALLOC PASO_MALLOC
-  #define PASO_THREAD_FREE PASO_FREE
-#endif
-
-
-/******************The main macros ************************************/ 
-
-#define MEMALLOC(_LENGTH_,_TYPE_)                                     \
-  (_TYPE_*) PASO_MALLOC(((size_t)(_LENGTH_))*sizeof(_TYPE_))
-
-/* do {} while(0) -  an old trick for bracketing a macro that */
-/* makes sure a semi-colon does no harm.                      */
-
-#define MEMFREE(_PTR_)                                                  \
-do                                                                      \
-{                                                                       \
-  if ((void *)(_PTR_) != NULL ) { PASO_FREE(_PTR_); (_PTR_) = NULL; }   \
-} while(0)
-
-#define MEMREALLOC(_RETP_,_POINTER_,_LENGTH_,_TYPE_)                    \
-do                                                                        \
-{                                                                         \
-   if( (_POINTER_)!=NULL )                                                \
-   {                                                                      \
-      _RETP_ = (_TYPE_*)PASO_REALLOC((void*)(_POINTER_),               \
-                                   ((size_t)(_LENGTH_))*sizeof(_TYPE_) ); \
-   }                                                                      \
-   else                                                                   \
-   {                                                                      \
-      _RETP_ = (_TYPE_*)PASO_MALLOC( ((size_t)(_LENGTH_))*sizeof(_TYPE_) ); \
-   }                                                                      \
-} while(0)
-
-#define TMPMEMALLOC MEMALLOC
-#define TMPMEMFREE MEMFREE
-#define TMPMEMREALLOC MEMREALLOC
-
-#define THREAD_MEMALLOC(_LENGTH_,_TYPE_)                          \
-   (_TYPE_*) PASO_THREAD_MALLOC/**/(((size_t)(_LENGTH_))*sizeof(_TYPE_))
-
-#define THREAD_MEMFREE(_PTR_)                                                \
-do                                                                           \
-{                                                                            \
-  if ((void *)(_PTR_) != NULL ) { PASO_THREAD_FREE(_PTR_); (_PTR_) = NULL; } \
-} while(0)
-
-
-#endif 
diff --git a/esysUtils/src/pyerr.cpp b/esysUtils/src/pyerr.cpp
deleted file mode 100644
index 7e3121c..0000000
--- a/esysUtils/src/pyerr.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*****************************************************************************
-*
-* Copyright (c) 2015-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include <boost/python/object.hpp>
-#include <boost/python/import.hpp>
-#include <boost/python/list.hpp>
-#include <boost/python/extract.hpp>
-
-#include "first.h"
-#include "pyerr.h"
-
-// Function factored out of SubWorld code
-
-void getStringFromPyException(boost::python::error_already_set e, std::string& errormsg)
-{
-	using namespace boost::python;
-
-  	PyObject* ptype=0;
- 	PyObject* pvalue=0;
- 	PyObject* ptraceback=0;
- 	PyErr_Fetch(&ptype, &pvalue, &ptraceback);
-	PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
-	object tb = import("traceback"); 
-	object trace(handle<>(borrowed(ptraceback)));
-	object li=tb.attr("extract_tb")(trace);
-	object li2=tb.attr("format_list")(li);
-	list l=extract<list>(li2)();
-	
-
-
-#ifdef ESPYTHON3	
-	std::string ss;
-	for (int i=0;i<len(l);++i) {
-	    object o=l[i];
-	    PyObject* rr=PyUnicode_AsASCIIString(o.ptr());
-	    ss+=PyBytes_AsString(rr);
-	    Py_XDECREF(rr);
-	}
-	
-	PyObject* errobj=PyObject_Str(pvalue);	
-	
-	PyObject* rr=PyUnicode_AsASCIIString(errobj);
-	errormsg=PyBytes_AsString(rr);
-	errormsg+="\n";
-	Py_XDECREF(rr);
-	errormsg+=ss;
-#else
-	
-	std::string ss;
-	for (int i=0;i<len(l);++i) {
-	    ss+=extract<std::string>(l[i])();
-	}
-	
-	PyObject* errobj=PyObject_Str(pvalue);	
-	
-	errormsg=PyString_AsString(errobj);
-	errormsg+="\n";
-	errormsg+=ss;
-#endif
-	Py_XDECREF(errobj);
-
-	Py_XDECREF(ptype);
-	Py_XDECREF(pvalue);
-	Py_XDECREF(ptraceback);
-}
diff --git a/esysUtils/src/system_dep.h b/esysUtils/src/system_dep.h
deleted file mode 100644
index 3248558..0000000
--- a/esysUtils/src/system_dep.h
+++ /dev/null
@@ -1,58 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/**
-\file esysUtils/src/system_dep.h
-\ingroup Other
- */
-/*
-* @(#) system_dep.h
-*/
-
-#ifndef esysutils_system_dep_h
-#define esysutils_system_dep_h
-
-#include <cmath>
-
-#define ESYSUTILS_DLL_API
-
-#ifdef _WIN32
-#   ifndef ESYSUTILS_STATIC_LIB
-#      undef ESYSUTILS_DLL_API
-#      ifdef ESYSUTILS_EXPORTS
-#         define ESYSUTILS_DLL_API __declspec(dllexport)
-#      else
-#         define ESYSUTILS_DLL_API __declspec(dllimport)
-#      endif
-#   endif
-
-/* This is because of the different declarations of std::exception mentods
-*  on windows.
-* Also, putting a "throw" in any declaration on windows causes a warning!!!!!!
-* If you wish to generate a throw() on other systems, please use 
-* THROW(NO_ARG). This is because windows generates warnings if you say
-* THROW(), so the NO_ARG trick must be used to avoid the mass of warnings.
-*/
-
-#   define THROW(ARG)
-#else
-#   define THROW(ARG) throw(ARG)
-#endif
-
-#define NO_ARG
-
-#endif
diff --git a/esysUtils/src/types.h b/esysUtils/src/types.h
deleted file mode 100644
index a7a2abf..0000000
--- a/esysUtils/src/types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2010-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef __ESYS_TYPES_H__
-#define __ESYS_TYPES_H__
- 
-#ifdef ESYS_INDEXTYPE_LONG
-typedef long index_t;
-#else
-typedef int index_t;
-#endif
-
-typedef index_t dim_t;
-typedef int type_t;
-typedef int err_t;
-
-#endif // __ESYS_TYPES_H__
-
diff --git a/esysUtils/test/EsysExceptionTestCase.cpp b/esysUtils/test/EsysExceptionTestCase.cpp
deleted file mode 100644
index db2ecdb..0000000
--- a/esysUtils/test/EsysExceptionTestCase.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "EsysExceptionTestCase.h"
-#include "esysUtils/EsysException.h"
-
-#include <cppunit/TestCaller.h>
-#include <iostream>
-
-using namespace std;
-using namespace CppUnit;
-using namespace esysUtils;
-
-class DerivedEx : public EsysException {
-
-   typedef EsysException Parent;
-
-public:
-
-   /// Default Constructor for Exception
-   DerivedEx() : Parent() { updateMessage(); }
-
-   /// Constructor for Exception
-   DerivedEx(const char *cstr) : Parent(cstr) { updateMessage(); }
-
-   /// Constructor for Exception
-   DerivedEx(const string &str) : Parent(str) { updateMessage(); }
-
-   // Copy Constructor.
-   DerivedEx(const DerivedEx &other): Parent(other) { updateMessage(); } 
-
-   inline virtual DerivedEx &
-   operator=(const DerivedEx &other) THROW(NO_ARG)
-      {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-      }
-
-   /// Return the exception name
-   virtual const string & exceptionName() const
-      {
-         return rhubarb;
-      }
-        
-   static const string rhubarb;
-};
-
-const string DerivedEx::rhubarb("DerivedException");
-
-void EsysExceptionTestCase::testCase0()
-{
-	CPPUNIT_ASSERT(true);
-}
-
-void EsysExceptionTestCase::testCase1() {
-
-	EsysException defEx;
-
-	//
-	// default exception text should have contents of exceptionName near start
-	//
-	string defString = defEx.toString();
-	CPPUNIT_ASSERT(defString.find(defEx.exceptionName()) != string::npos);
-	CPPUNIT_ASSERT(defString.find(defEx.exceptionName()) < 4);
-
-	//
-	// default exception text shouldn't be much longer than contents of exception name
-	//
-	CPPUNIT_ASSERT(defString.size() > defEx.exceptionName().size());
-	CPPUNIT_ASSERT((defString.size() - defEx.exceptionName().size()) < 10);
-
-	string ex1Text("My first funny exception message.");
-	EsysException ex1(ex1Text);
-
-	//
-	// exception text should have contents of exceptionName near start
-	//
-	string ex1String = ex1.toString();
-	CPPUNIT_ASSERT(ex1String.find(ex1.exceptionName()) != string::npos);
-	CPPUNIT_ASSERT(defString.find(ex1.exceptionName()) < 4);
-
-	//
-	// exception text should contain entered exception message
-	//
-	CPPUNIT_ASSERT(ex1String.find(ex1Text) != string::npos);
-
-	//
-	// copy constructed exception should match original
-	//
-	EsysException copyEx(ex1);
-	string copyString = copyEx.toString();
-	CPPUNIT_ASSERT(ex1String == copyString);
-
-	//
-	// copy assigned exception should match original
-	//
-	EsysException assEx;
-	assEx = ex1;
-	string assString = assEx.toString();
-	CPPUNIT_ASSERT(ex1String == assString);
-
-	//
-	// check throw/catch mechanism
-	//
-	string ex2Text("My second funny exception message.");
-	try {
-
-		EsysException ex2(ex2Text);
-		throw(ex2);
-
-	}
-
-	catch(EsysException& e) {
-
-		//
-		// exception text should have contents of exceptionName near start
-		//
-		string eString = e.toString();
-		CPPUNIT_ASSERT(eString.find(e.exceptionName()) != string::npos);
-		CPPUNIT_ASSERT(defString.find(e.exceptionName()) < 4);
-
-		//
-		// exception text should contain entered exception message
-		//
-		CPPUNIT_ASSERT(eString.find(ex2Text) != string::npos);
-
-	}
-
-}
-
-//
-// test derived EsysException
-//
-void EsysExceptionTestCase::testCase2() {
-
-	DerivedEx defEx;
-	//
-	// default exception text should have contents of exceptionName near start
-	//
-	string defString = defEx.toString();
-	CPPUNIT_ASSERT(defString.find(defEx.exceptionName()) != string::npos);
-	CPPUNIT_ASSERT(defString.find(defEx.exceptionName()) < 4);
-
-	//
-	// default exception text shouldn't be much longer than contents of exception name
-	//
-	CPPUNIT_ASSERT(defString.size() > defEx.exceptionName().size());
-	CPPUNIT_ASSERT((defString.size() - defEx.exceptionName().size()) < 10);
-
-	string ex1Text("asdjhieurncidhfjsnfkjefkjndfjkhsdrdfjksdhfweh");
-	DerivedEx ex1(ex1Text);
-	//
-	// exception text should have contents of exceptionName near start
-	//
-	string ex1String = ex1.toString();
-	CPPUNIT_ASSERT(ex1String.find(ex1.exceptionName()) != string::npos);
-	CPPUNIT_ASSERT(defString.find(ex1.exceptionName()) < 4);
-
-	//
-	// exception text should contain entered exception message
-	//
-	CPPUNIT_ASSERT(ex1String.find(ex1Text) != string::npos);
-
-	//
-	// copy constructed exception should match original
-	//
-	DerivedEx copyEx(ex1);
-	string copyString = copyEx.toString();
-	CPPUNIT_ASSERT(ex1String == copyString);
-
-	//
-	// copy assigned exception should match original
-	//
-	DerivedEx assEx;
-	assEx = ex1;
-	string assString = assEx.toString();
-	CPPUNIT_ASSERT(ex1String == assString);
-
-	//
-	// check throw/catch mechanism
-	//
-	string ex2Text("pjkkjhdfbnkjerbkjsduflfkjahalkgjlklhjhj");
-	try {
-
-		DerivedEx ex2(ex2Text);
-		throw(ex2);
-
-	}
-
-	catch(DerivedEx& e) {
-
-		//
-		// exception text should have contents of exceptionName near start
-		//
-		string eString = e.toString();
-		CPPUNIT_ASSERT(eString.find(e.exceptionName()) != string::npos);
-		CPPUNIT_ASSERT(defString.find(e.exceptionName()) < 4);
-
-		//
-		// exception text should contain entered exception message
-		//
-		CPPUNIT_ASSERT(eString.find(ex2Text) != string::npos);
-	}
-
-	//
-	// check throw/catch mechanism
-	//
-	string ex3Text("irfjvniouf;iarhglAKSDIghlAKSDghladg");
-	try {
-
-		DerivedEx ex3(ex3Text);
-		throw(ex3);
-
-	}
-	catch(EsysException& e) {
-
-		//
-		// exception text should have contents of exceptionName near start
-		//
-		DerivedEx ex4;
-		std::string eString = e.toString();
-		CPPUNIT_ASSERT(eString.find(ex4.exceptionName()) != string::npos);
-		CPPUNIT_ASSERT(defString.find(ex4.exceptionName()) < 4);
-
-		//
-		// exception text should contain entered exception message
-		//
-		CPPUNIT_ASSERT(eString.find(ex3Text) != string::npos);
-
-	}
-
-	//
-	// test to see if exception name gets lost on rethrow
-	//
-        try {
-	  try {
-	    DerivedEx ex4("D ex4 text.");
-	    throw ex4;
-	  }
-	  catch (EsysException& e) {
-	    cout << endl << e.toString() << endl;
-	    throw;
-	  }
-        }
-        catch (EsysException& e) {
-	  cout << e.toString() << endl;
-	}
-
-	cout << "Test EsysException may be caught as a std::exception" << endl;
-       	try {
-	  DerivedEx ex4("Exception caught as std::exception");
-	  throw ex4;
-       	}
- 	catch (exception& e) {
-          // cout << e.what() << endl;
-          CPPUNIT_ASSERT(e.what() == string("DerivedException: Exception caught"
-                                    " as std::exception")
-                 );
-  	}
-	catch (...) {
-           //
-           // if the exception is caught here there is a problem
-	   CPPUNIT_ASSERT(false);
-	}
-}
-
-TestSuite* EsysExceptionTestCase::suite()
-{
-  //
-  // create the suite of tests to perform.
-  TestSuite *testSuite = new TestSuite("EsysExceptionTestCase");
-
-  testSuite->addTest(new TestCaller<EsysExceptionTestCase>(
-              "testCase0",&EsysExceptionTestCase::testCase0));
-  testSuite->addTest(new TestCaller<EsysExceptionTestCase>(
-              "testCase1",&EsysExceptionTestCase::testCase1));
-  testSuite->addTest(new TestCaller<EsysExceptionTestCase>(
-              "testCase2",&EsysExceptionTestCase::testCase2));
-  return testSuite;
-}
-
diff --git a/esysUtils/test/SConscript b/esysUtils/test/SConscript
deleted file mode 100644
index 643db75..0000000
--- a/esysUtils/test/SConscript
+++ /dev/null
@@ -1,40 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-Import('*')
-local_env = env.Clone()
-
-if local_env['cppunit']:
-    # get the test source file names
-    sources = Glob('*.cpp')+Glob('*.c')
-    testname='esysUtils_UnitTest'
-
-    # build the executable
-    local_env.Append(LIBS=['esysUtils']+env['cppunit_libs'])
-    program = local_env.Program(testname, sources)
-
-    # run the tests - but only if test_targets are stale
-    local_env.RunUnitTest(testname)
-
-    # add unit test to target alias
-    Alias('build_tests', program)
-    Alias("run_tests", testname+'.passed')
-
-    # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/esysUtils/test", ('./'+testname,))
-    TestGroups.append(tgroup)
-
diff --git a/esysUtils/test/esysUtils_UnitTest.cpp b/esysUtils/test/esysUtils_UnitTest.cpp
deleted file mode 100644
index 2b04233..0000000
--- a/esysUtils/test/esysUtils_UnitTest.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "EsysExceptionTestCase.h"
-#include "EsysFileWriterTestCase.h"
-#include <cppunit/CompilerOutputter.h>
-#include <cppunit/TestResult.h>
-#include <cppunit/TestResultCollector.h>
-#include <cppunit/TestRunner.h>
-
-using namespace CppUnit;
-
-#include "esysUtils/Esys_MPI.h"
-
-int main(int argc, char **argv)
-{
-#ifdef ESYS_MPI
-    int status = MPI_Init(&argc, &argv);
-    if (status != MPI_SUCCESS) {
-        std::cerr << argv[0] << ": MPI_Init failed, exiting." << std::endl;
-        return status;
-    }
-#endif
-    TestResult controller;
-    TestResultCollector result;
-    controller.addListener(&result);
-    TestRunner runner;
-    runner.addTest(EsysExceptionTestCase::suite());
-    runner.addTest(EsysFileWriterTestCase::suite());
-    runner.run(controller);
-    CompilerOutputter outputter( &result, std::cerr );
-    outputter.write();
-#ifdef ESYS_MPI
-    MPI_Finalize();
-#endif
-    return result.wasSuccessful() ? 0 : 1;
-
-}
-
diff --git a/pasowrap/py_src/SConscript b/finley/SConscript
similarity index 56%
copy from pasowrap/py_src/SConscript
copy to finley/SConscript
index 05a35ee..8ea77e7 100644
--- a/pasowrap/py_src/SConscript
+++ b/finley/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,19 +13,18 @@
 #
 ##############################################################################
 
+Import('env')
+if 'finley' in env['domains']:
+    if not env['paso'] and not env['trilinos']:
+        print("Finley requires a solver library! Please either enable Paso or Trilinos.")
+        env.Exit(1)
 
-import os
-Import('*')
-
-local_env = env.Clone()
-
-# get the source file names
-sources = Glob('*.py')
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-# compile
-pyc = local_env.PyCompile(sources)
+    # configure python module
+    env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# install
-py_inst = local_env.Install(local_env['pyinstall']+'/pasowrap', pyc)
-env.Alias('install_pasowrap_py', py_inst)
+    # configure unit tests
+    env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/finley/py_src/SConscript b/finley/py_src/SConscript
index 911cd1a..92ca9f7 100644
--- a/finley/py_src/SConscript
+++ b/finley/py_src/SConscript
@@ -24,6 +24,6 @@ sources = Glob('*.py')
 pyc = local_env.PyCompile(sources)
 
 # install
-py_inst = local_env.Install(local_env['pyinstall']+'/finley', pyc)
-env.Alias('install_finley_py', py_inst)
+py_inst = local_env.Install(Dir('finley', local_env['pyinstall']), pyc)
+env.Alias('install_finley', py_inst)
 
diff --git a/finley/py_src/__init__.py b/finley/py_src/__init__.py
index 4f9f653..b1fd560 100644
--- a/finley/py_src/__init__.py
+++ b/finley/py_src/__init__.py
@@ -28,8 +28,6 @@ __url__="https://launchpad.net/escript-finley"
 
 
 import esys.escript
-import esys.pasowrap    #if you don't import this, you won't be able to see methods not in AbstractSystemmatrix
-#from esys.escript import *
 from .finleycpp import *
 from .factorywrappers import *
 from .readers import *
diff --git a/finley/src/Assemble.h b/finley/src/Assemble.h
index 098c257..9fa70c4 100644
--- a/finley/src/Assemble.h
+++ b/finley/src/Assemble.h
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assemblage routines: header file
@@ -27,19 +26,20 @@
 #include "Finley.h"
 #include "ElementFile.h"
 #include "NodeFile.h"
-#include "paso/SystemMatrix.h"
+#include <escript/AbstractSystemMatrix.h>
 
 namespace finley {
 
-struct AssembleParameters {
+struct AssembleParameters
+{
     AssembleParameters(const NodeFile* nodes, const ElementFile* ef,
-                       paso::SystemMatrix_ptr sm, escript::Data& rhs,
+                       escript::ASM_ptr sm, escript::Data& rhs,
                        bool reducedOrder);
 
     /// element file these parameters apply to
     const ElementFile* elements;
     /// system matrix to be updated
-    paso::SystemMatrix_ptr S;
+    escript::ASM_ptr S;
     /// right-hand side to be updated
     escript::Data& F;
     /// total number of quadrature nodes = numQuadSub * numQuadSub
@@ -78,12 +78,14 @@ struct AssembleParameters {
 /// AssembleParameters structure and calls appropriate method for the actual
 /// work.
 void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
-                  paso::SystemMatrix_ptr S, escript::Data& F,
+                  escript::ASM_ptr S, escript::Data& F,
                   const escript::Data& A, const escript::Data& B,
                   const escript::Data& C, const escript::Data& D,
                   const escript::Data& X, const escript::Data& Y);
 
-void Assemble_PDE_Points(const AssembleParameters& p, const escript::Data& d_dirac,
+template<typename Scalar>
+void Assemble_PDE_Points(const AssembleParameters& p,
+                         const escript::Data& d_dirac,
                          const escript::Data& y_dirac);
 
 void Assemble_PDE_Single_1D(const AssembleParameters& p,
@@ -91,16 +93,19 @@ void Assemble_PDE_Single_1D(const AssembleParameters& p,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_Single_2D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_Single_3D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_Single_C(const AssembleParameters& p, const escript::Data& D,
                            const escript::Data& Y);
 
@@ -109,106 +114,131 @@ void Assemble_PDE_System_1D(const AssembleParameters& p,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_System_2D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_System_3D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y);
 
+template<typename Scalar>
 void Assemble_PDE_System_C(const AssembleParameters& p, const escript::Data& D,
                            const escript::Data& Y);
 
-void Assemble_addToSystemMatrix(paso::SystemMatrix_ptr S, const int NN_Equa,
-        const index_t* Nodes_Equa, const int num_Equa, const int NN_Sol,
-        const index_t* Nodes_Sol, const int num_Sol, const double* array);
+template<typename Scalar = double>
+void Assemble_addToSystemMatrix(escript::ASM_ptr S, int NN_Equa,
+                  const index_t* Nodes_Equa, int num_Equa, int NN_Sol,
+                  const index_t* Nodes_Sol, int num_Sol, const Scalar* array);
 
 void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
                            escript::Data& lumpedMat, const escript::Data& D,
                            bool useHRZ);
 
+/// averages data
 void Assemble_AverageElementData(const ElementFile* elements,
                                  escript::Data& out, const escript::Data& in);
 
+/// copies data between different types of elements
 void Assemble_CopyElementData(const ElementFile* elements, escript::Data& out,
                               const escript::Data& in);
 
+/// copies data between different types of nodal representations
 void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
                             const escript::Data& in);
 
-void Assemble_NodeCoordinates(const NodeFile* nodes, escript::Data& out);
+/// copies node coordinates into expanded Data object `x`
+void Assemble_NodeCoordinates(const NodeFile* nodes, escript::Data& x);
 
+/// calculates the normal vector at quadrature points on face elements
 void Assemble_getNormal(const NodeFile* nodes, const ElementFile* elements,
                         escript::Data& normals);
 
+/// calculates the minimum distance between two vertices of elements and
+/// assigns the value to each quadrature point in `size`
 void Assemble_getSize(const NodeFile* nodes, const ElementFile* elements,
                       escript::Data& size);
 
+/// Assemblage of Jacobians: calculates the gradient of nodal data at
+/// quadrature points
 void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
                        escript::Data& gradient, const escript::Data& data);
 
+/// integrates data on quadrature points
 void Assemble_integrate(const NodeFile* nodes, const ElementFile* elements,
                         const escript::Data& data, double* integrals);
 
+/// interpolates nodal data in a data array onto elements (=integration points)
 void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
                           const escript::Data& data, escript::Data& output);
 
 void Assemble_jacobians_1D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_2D_M1D_E1D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_2D_M1D_E2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_2D_M1D_E2D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_3D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_3D_M2D_E2D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_3D_M2D_E3D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
+
 void Assemble_jacobians_3D_M2D_E3D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId);
 
diff --git a/finley/src/Assemble_AverageElementData.cpp b/finley/src/Assemble_AverageElementData.cpp
index fd23f58..3ea6a0f 100644
--- a/finley/src/Assemble_AverageElementData.cpp
+++ b/finley/src/Assemble_AverageElementData.cpp
@@ -21,19 +21,16 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_AverageElementData(const ElementFile* elements,
                                  escript::Data& out, const escript::Data& in)
 {
-    resetError();
     if (!elements)
         return;
 
@@ -57,13 +54,13 @@ void Assemble_AverageElementData(const ElementFile* elements,
     const int numComps=out.getDataPointSize();
 
     if (numComps != in.getDataPointSize()) {
-        setError(TYPE_ERROR, "Assemble_AverageElementData: number of components of input and output data do not match.");
+        throw escript::ValueError("Assemble_AverageElementData: number of components of input and output data do not match.");
     } else if (!in.numSamplesEqual(numQuad_in,numElements)) {
-        setError(TYPE_ERROR, "Assemble_AverageElementData: illegal number of samples of input Data object");
+        throw escript::ValueError("Assemble_AverageElementData: illegal number of samples of input Data object");
     } else if (!out.numSamplesEqual(numQuad_out,numElements)) {
-        setError(TYPE_ERROR, "Assemble_AverageElementData: illegal number of samples of output Data object");
+        throw escript::ValueError("Assemble_AverageElementData: illegal number of samples of output Data object");
     } else if (!out.actsExpanded()) {
-        setError(TYPE_ERROR, "Assemble_AverageElementData: expanded Data object is expected for output data.");
+        throw escript::ValueError("Assemble_AverageElementData: expanded Data object is expected for output data.");
     } else {
         if (in.actsExpanded()) {
             double vol=0.;
diff --git a/finley/src/Assemble_CopyElementData.cpp b/finley/src/Assemble_CopyElementData.cpp
index 68254db..8193d71 100644
--- a/finley/src/Assemble_CopyElementData.cpp
+++ b/finley/src/Assemble_CopyElementData.cpp
@@ -21,10 +21,6 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
@@ -33,7 +29,6 @@ namespace finley {
 void Assemble_CopyElementData(const ElementFile* elements, escript::Data& out,
                               const escript::Data& in)
 {
-    resetError();
     if (!elements)
         return;
 
@@ -48,13 +43,13 @@ void Assemble_CopyElementData(const ElementFile* elements, escript::Data& out,
     const int numComps=out.getDataPointSize();
 
     if (numComps != in.getDataPointSize()) {
-        setError(TYPE_ERROR,"Assemble_CopyElementData: number of components of input and output Data do not match.");
+        throw escript::ValueError("Assemble_CopyElementData: number of components of input and output Data do not match.");
     } else if (!in.numSamplesEqual(numQuad,numElements)) {
-        setError(TYPE_ERROR,"Assemble_CopyElementData: illegal number of samples of input Data object");
+        throw escript::ValueError("Assemble_CopyElementData: illegal number of samples of input Data object");
     } else if (!out.numSamplesEqual(numQuad,numElements)) {
-        setError(TYPE_ERROR,"Assemble_CopyElementData: illegal number of samples of output Data object");
+        throw escript::ValueError("Assemble_CopyElementData: illegal number of samples of output Data object");
     } else if (!out.actsExpanded()) {
-        setError(TYPE_ERROR,"Assemble_CopyElementData: expanded Data object is expected for output data.");
+        throw escript::ValueError("Assemble_CopyElementData: expanded Data object is expected for output data.");
     } else {
         if (in.actsExpanded()) {
             const size_t len_size=numComps*numQuad*sizeof(double);
diff --git a/finley/src/Assemble_CopyNodalData.cpp b/finley/src/Assemble_CopyNodalData.cpp
index 0821391..c44e201 100644
--- a/finley/src/Assemble_CopyNodalData.cpp
+++ b/finley/src/Assemble_CopyNodalData.cpp
@@ -14,18 +14,6 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Assemblage routines: copies data between different types of nodal
-  representations
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
@@ -34,100 +22,96 @@ namespace finley {
 void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
                             const escript::Data& in)
 {
-    resetError();
     if (!nodes)
         return;
 
     const int mpiSize = nodes->MPIInfo->size;
     const int numComps = out.getDataPointSize();
-    const int in_data_type=in.getFunctionSpace().getTypeCode();
-    const int out_data_type=out.getFunctionSpace().getTypeCode();
+    const int in_data_type = in.getFunctionSpace().getTypeCode();
+    const int out_data_type = out.getFunctionSpace().getTypeCode();
 
     // check out and in
     if (numComps != in.getDataPointSize()) {
-        setError(TYPE_ERROR,"Assemble_CopyNodalData: number of components of input and output Data do not match.");
+        throw escript::ValueError("Assemble_CopyNodalData: number of components of input and output Data do not match.");
     } else if (!out.actsExpanded()) {
-        setError(TYPE_ERROR,"Assemble_CopyNodalData: expanded Data object is expected for output data.");
+        throw escript::ValueError("Assemble_CopyNodalData: expanded Data object is expected for output data.");
     }
 
     // more sophisticated test needed for overlapping node/DOF counts
     if (in_data_type == FINLEY_NODES) {
         if (!in.numSamplesEqual(1, nodes->getNumNodes())) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal number of samples of input Data object");
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
         }
     } else if (in_data_type == FINLEY_REDUCED_NODES) {
         if (!in.numSamplesEqual(1, nodes->getNumReducedNodes())) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal number of samples of input Data object");
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
         }
     } else if (in_data_type == FINLEY_DEGREES_OF_FREEDOM) {
         if (!in.numSamplesEqual(1, nodes->getNumDegreesOfFreedom())) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal number of samples of input Data object");
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
         }
         if (((out_data_type == FINLEY_NODES) || (out_data_type == FINLEY_DEGREES_OF_FREEDOM)) && !in.actsExpanded() && (mpiSize>1)) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: FINLEY_DEGREES_OF_FREEDOM to FINLEY_NODES or FINLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
+            throw escript::ValueError("Assemble_CopyNodalData: FINLEY_DEGREES_OF_FREEDOM to FINLEY_NODES or FINLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
         }
     } else if (in_data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
         if (!in.numSamplesEqual(1, nodes->getNumReducedDegreesOfFreedom())) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal number of samples of input Data object");
+            throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of input Data object");
         }
         if ((out_data_type == FINLEY_DEGREES_OF_FREEDOM) && !in.actsExpanded() && (mpiSize>1)) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: FINLEY_REDUCED_DEGREES_OF_FREEDOM to FINLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
+            throw escript::ValueError("Assemble_CopyNodalData: FINLEY_REDUCED_DEGREES_OF_FREEDOM to FINLEY_DEGREES_OF_FREEDOM requires expanded input data on more than one processor.");
         }
     } else {
-        setError(TYPE_ERROR, "Assemble_CopyNodalData: illegal function space type for target object");
+        throw escript::ValueError( "Assemble_CopyNodalData: illegal function space type for target object");
     }
 
-    dim_t numOut=0;
+    dim_t numOut = 0;
     switch (out_data_type) {
         case FINLEY_NODES:
-            numOut=nodes->getNumNodes();
+            numOut = nodes->getNumNodes();
             break;
 
         case FINLEY_REDUCED_NODES:
-            numOut=nodes->getNumReducedNodes();
+            numOut = nodes->getNumReducedNodes();
             break;
 
         case FINLEY_DEGREES_OF_FREEDOM:
-            numOut=nodes->getNumDegreesOfFreedom();
+            numOut = nodes->getNumDegreesOfFreedom();
             break;
 
         case FINLEY_REDUCED_DEGREES_OF_FREEDOM:
-            numOut=nodes->getNumReducedDegreesOfFreedom();
+            numOut = nodes->getNumReducedDegreesOfFreedom();
             break;
 
         default:
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal function space type for source object");
+            throw escript::ValueError("Assemble_CopyNodalData: illegal function space type for source object");
     }
 
     if (!out.numSamplesEqual(1, numOut)) {
-        setError(TYPE_ERROR,"Assemble_CopyNodalData: illegal number of samples of output Data object");
+        throw escript::ValueError("Assemble_CopyNodalData: illegal number of samples of output Data object");
     }
 
-    if (!noError())
-        return;
+    const size_t numComps_size = numComps * sizeof(double);
 
-    const size_t numComps_size = numComps*sizeof(double);
-
-    /*********************** FINLEY_NODES ********************************/
+    /**************************** FINLEY_NODES ******************************/
     if (in_data_type == FINLEY_NODES) {
         out.requireWrite();
         if (out_data_type == FINLEY_NODES) {
 #pragma omp parallel for
-            for (index_t n=0; n<numOut; n++) {
+            for (index_t n = 0; n < numOut; n++) {
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n), numComps_size);
             }
         } else if (out_data_type == FINLEY_REDUCED_NODES) {
-            const std::vector<index_t>& map = nodes->borrowReducedNodesTarget();
+            const IndexVector& map = nodes->borrowReducedNodesTarget();
             const dim_t mapSize = map.size();
 #pragma omp parallel for
-            for (index_t n=0; n<mapSize; n++) {
+            for (index_t n = 0; n < mapSize; n++) {
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(map[n]),
                        numComps_size);
             }
         } else if (out_data_type == FINLEY_DEGREES_OF_FREEDOM) {
-            const std::vector<index_t>& map = nodes->borrowDegreesOfFreedomTarget();
+            const IndexVector& map = nodes->borrowDegreesOfFreedomTarget();
 #pragma omp parallel for
-            for (index_t n=0; n<numOut; n++) {
+            for (index_t n = 0; n < numOut; n++) {
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(map[n]),
                        numComps_size);
             }
@@ -140,10 +124,10 @@ void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
             }
         }
 
-    /*********************** FINLEY_REDUCED_NODES ***************************/
+    /************************ FINLEY_REDUCED_NODES **************************/
     } else if (in_data_type == FINLEY_REDUCED_NODES) {
         if (out_data_type == FINLEY_NODES) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: cannot copy from reduced nodes to nodes.");
+            throw escript::ValueError("Assemble_CopyNodalData: cannot copy from reduced nodes to nodes.");
         } else if (out_data_type == FINLEY_REDUCED_NODES) {
             out.requireWrite();
             const dim_t nNodes = nodes->getNumNodes();
@@ -152,78 +136,140 @@ void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n), numComps_size);
             }
        } else if (out_data_type == FINLEY_DEGREES_OF_FREEDOM) {
-            setError(TYPE_ERROR,"Assemble_CopyNodalData: cannot copy from reduced nodes to degrees of freedom.");
+            throw escript::ValueError("Assemble_CopyNodalData: cannot copy from reduced nodes to degrees of freedom.");
        } else if (out_data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
             out.requireWrite();
             const index_t* target = nodes->borrowTargetReducedNodes();
-            const std::vector<index_t>& map = nodes->borrowReducedDegreesOfFreedomTarget();
+            const IndexVector& map = nodes->borrowReducedDegreesOfFreedomTarget();
 #pragma omp parallel for
-            for (index_t n=0; n<numOut; n++) {
+            for (index_t n = 0; n < numOut; n++) {
                memcpy(out.getSampleDataRW(n),
                       in.getSampleDataRO(target[map[n]]), numComps_size);
             }
         }
-
-    /******************** FINLEY_DEGREES_OF_FREEDOM *********************/
+    /********************** FINLEY_DEGREES_OF_FREEDOM ***********************/
     } else if (in_data_type == FINLEY_DEGREES_OF_FREEDOM) {
         out.requireWrite();
         if (out_data_type == FINLEY_NODES) {
-            paso::Coupler_ptr coupler(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps));
-            if (Esys_noError()) {
-                // Coupler holds the pointer but it doesn't appear to get
-                // used so RO should work.
-                const_cast<escript::Data*>(&in)->resolve();
-                coupler->startCollect(in.getDataRO());
-                const double *recv_buffer=coupler->finishCollect();
-                const index_t upperBound=nodes->getNumDegreesOfFreedom();
-                const index_t* target = nodes->borrowTargetDegreesOfFreedom();
-                const dim_t nNodes = nodes->numNodes;
+            const_cast<escript::Data*>(&in)->resolve();
+            const index_t* target = nodes->borrowTargetDegreesOfFreedom();
+#ifdef ESYS_HAVE_PASO
+            paso::Coupler_ptr coupler(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps, nodes->MPIInfo));
+            coupler->startCollect(in.getDataRO());
+            const double* recv_buffer = coupler->finishCollect();
+            const index_t upperBound = nodes->getNumDegreesOfFreedom();
 #pragma omp parallel for
-                for (index_t n=0; n < nNodes; n++) {
-                    const index_t k=target[n];
-                    if (k < upperBound) {
-                        memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
-                               numComps_size);
-                    } else {
-                        memcpy(out.getSampleDataRW(n),
-                               &recv_buffer[(k-upperBound)*numComps],
-                               numComps_size);
-                    }
+            for (index_t n = 0; n < numOut; n++) {
+                const index_t k = target[n];
+                if (k < upperBound) {
+                    memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
+                           numComps_size);
+                } else {
+                    memcpy(out.getSampleDataRW(n),
+                           &recv_buffer[(k - upperBound) * numComps],
+                           numComps_size);
                 }
             }
-        } else if  (out_data_type == FINLEY_REDUCED_NODES) {
-            paso::Coupler_ptr coupler(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps));
-            if (Esys_noError()) {
-                const_cast<escript::Data*>(&in)->resolve();
-                coupler->startCollect(in.getDataRO());
-                const double *recv_buffer=coupler->finishCollect();
-                const index_t upperBound=nodes->getNumDegreesOfFreedom();
-                const std::vector<index_t>& map = nodes->borrowReducedNodesTarget();
-                const index_t* target = nodes->borrowTargetDegreesOfFreedom();
-                const dim_t mapSize = map.size();
+#elif defined(ESYS_HAVE_TRILINOS)
+            using namespace esys_trilinos;
+
+            Teuchos::RCP<const MapType> colMap;
+            Teuchos::RCP<const MapType> rowMap;
+            MapType colPointMap;
+            MapType rowPointMap;
+            if (numComps > 1) {
+                colPointMap = RealBlockVector::makePointMap(
+                                             *nodes->trilinosColMap, numComps);
+                rowPointMap = RealBlockVector::makePointMap(
+                                             *nodes->trilinosRowMap, numComps);
+                colMap = Teuchos::rcpFromRef(colPointMap);
+                rowMap = Teuchos::rcpFromRef(rowPointMap);
+            } else {
+                colMap = nodes->trilinosColMap;
+                rowMap = nodes->trilinosRowMap;
+            }
+
+            const ImportType importer(rowMap, colMap);
+            const Teuchos::ArrayView<const real_t> localIn(
+                                               in.getSampleDataRO(0),
+                                               in.getNumDataPoints()*numComps);
+            Teuchos::RCP<RealVector> lclData = rcp(new RealVector(rowMap,
+                                                  localIn, localIn.size(), 1));
+            Teuchos::RCP<RealVector> gblData = rcp(new RealVector(colMap, 1));
+            gblData->doImport(*lclData, importer, Tpetra::INSERT);
+            Teuchos::ArrayRCP<const real_t> gblArray(gblData->getData(0));
+#pragma omp parallel for
+            for (index_t i = 0; i < numOut; i++) {
+                const real_t* src = &gblArray[target[i] * numComps];
+                std::copy(src, src+numComps, out.getSampleDataRW(i));
+            }
+#endif
+        } else if (out_data_type == FINLEY_REDUCED_NODES) {
+            const_cast<escript::Data*>(&in)->resolve();
+            const index_t* target = nodes->borrowTargetDegreesOfFreedom();
+            const IndexVector& map = nodes->borrowReducedNodesTarget();
+#ifdef ESYS_HAVE_PASO
+            paso::Coupler_ptr coupler(new paso::Coupler(nodes->degreesOfFreedomConnector, numComps, nodes->MPIInfo));
+            coupler->startCollect(in.getDataRO());
+            const double* recv_buffer = coupler->finishCollect();
+            const index_t upperBound = nodes->getNumDegreesOfFreedom();
+            const dim_t mapSize = map.size();
 
 #pragma omp parallel for
-                for (index_t n=0; n < mapSize; n++) {
-                    const index_t k=target[map[n]];
-                    if (k < upperBound) {
-                        memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
-                               numComps_size);
-                    } else {
-                        memcpy(out.getSampleDataRW(n),
-                               &recv_buffer[(k-upperBound)*numComps],
-                               numComps_size);
-                    }
+            for (index_t n = 0; n < mapSize; n++) {
+                const index_t k = target[map[n]];
+                if (k < upperBound) {
+                    memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
+                           numComps_size);
+                } else {
+                    memcpy(out.getSampleDataRW(n),
+                           &recv_buffer[(k-upperBound)*numComps],
+                           numComps_size);
                 }
             }
+#elif defined(ESYS_HAVE_TRILINOS)
+            using namespace esys_trilinos;
+
+            Teuchos::RCP<const MapType> colMap;
+            Teuchos::RCP<const MapType> rowMap;
+            MapType colPointMap;
+            MapType rowPointMap;
+            if (numComps > 1) {
+                colPointMap = RealBlockVector::makePointMap(
+                                             *nodes->trilinosColMap, numComps);
+                rowPointMap = RealBlockVector::makePointMap(
+                                             *nodes->trilinosRowMap, numComps);
+                colMap = Teuchos::rcpFromRef(colPointMap);
+                rowMap = Teuchos::rcpFromRef(rowPointMap);
+            } else {
+                colMap = nodes->trilinosColMap;
+                rowMap = nodes->trilinosRowMap;
+            }
+
+            const ImportType importer(rowMap, colMap);
+            const Teuchos::ArrayView<const real_t> localIn(
+                                               in.getSampleDataRO(0),
+                                               in.getNumDataPoints()*numComps);
+            Teuchos::RCP<RealVector> lclData = rcp(new RealVector(rowMap,
+                                                  localIn, localIn.size(), 1));
+            Teuchos::RCP<RealVector> gblData = rcp(new RealVector(colMap, 1));
+            gblData->doImport(*lclData, importer, Tpetra::INSERT);
+            Teuchos::ArrayRCP<const real_t> gblArray(gblData->getData(0));
+#pragma omp parallel for
+            for (index_t i = 0; i < numOut; i++) {
+                const real_t* src = &gblArray[target[map[i]] * numComps];
+                std::copy(src, src+numComps, out.getSampleDataRW(i));
+            }
+#endif
         } else if (out_data_type == FINLEY_DEGREES_OF_FREEDOM) {
 #pragma omp parallel for
-            for (index_t n=0; n<numOut; n++) {
+            for (index_t n = 0; n < numOut; n++) {
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n),
                        numComps_size);
             }
         } else if (out_data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
-            const std::vector<index_t>& map = nodes->borrowReducedDegreesOfFreedomTarget();
             const index_t* target = nodes->borrowTargetDegreesOfFreedom();
+            const std::vector<index_t>& map = nodes->borrowReducedDegreesOfFreedomTarget();
 #pragma omp parallel for
             for (index_t n=0; n<numOut; n++) {
                 memcpy(out.getSampleDataRW(n),
@@ -231,42 +277,75 @@ void Assemble_CopyNodalData(const NodeFile* nodes, escript::Data& out,
             }
         }
 
-    /**************** FINLEY_REDUCED_DEGREES_OF_FREEDOM *****************/
+    /****************** FINLEY_REDUCED_DEGREES_OF_FREEDOM *******************/
     } else if (in_data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
         if (out_data_type == FINLEY_NODES) {
-            setError(TYPE_ERROR, "Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to nodes.");
+            throw escript::ValueError("Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to nodes.");
         } else if (out_data_type == FINLEY_REDUCED_NODES) {
-            paso::Coupler_ptr coupler(new paso::Coupler(nodes->reducedDegreesOfFreedomConnector,numComps));
-            if (Esys_noError()) {
-                const_cast<escript::Data*>(&in)->resolve();
-                coupler->startCollect(in.getDataRO());
-                out.requireWrite();
-                const index_t upperBound=nodes->getNumReducedDegreesOfFreedom();
-                const std::vector<index_t>& map=nodes->borrowReducedNodesTarget();
-                const dim_t mapSize = map.size();
-                const index_t* target=nodes->borrowTargetReducedDegreesOfFreedom();
-                const double *recv_buffer=coupler->finishCollect();
+            const_cast<escript::Data*>(&in)->resolve();
+            const index_t* target = nodes->borrowTargetReducedDegreesOfFreedom();
+            const IndexVector& map = nodes->borrowReducedNodesTarget();
+            out.requireWrite();
+#ifdef ESYS_HAVE_PASO
+            paso::Coupler_ptr coupler(new paso::Coupler(nodes->reducedDegreesOfFreedomConnector, numComps, nodes->MPIInfo));
+            coupler->startCollect(in.getDataRO());
+            const index_t upperBound = nodes->getNumReducedDegreesOfFreedom();
+            const dim_t mapSize = map.size();
+            const double *recv_buffer = coupler->finishCollect();
 #pragma omp parallel for
-                for (index_t n=0; n < mapSize; n++) {
-                    const index_t k=target[map[n]];
-                    if (k < upperBound) {
-                        memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
-                               numComps_size);
-                    } else {
-                        memcpy(out.getSampleDataRW(n),
-                               &recv_buffer[(k-upperBound)*numComps],
-                               numComps_size);
-                    }
+            for (index_t n = 0; n < mapSize; n++) {
+                const index_t k = target[map[n]];
+                if (k < upperBound) {
+                    memcpy(out.getSampleDataRW(n), in.getSampleDataRO(k),
+                           numComps_size);
+                } else {
+                    memcpy(out.getSampleDataRW(n),
+                           &recv_buffer[(k - upperBound) * numComps],
+                           numComps_size);
                 }
             }
+#elif defined(ESYS_HAVE_TRILINOS)
+            using namespace esys_trilinos;
+
+            Teuchos::RCP<const MapType> colMap;
+            Teuchos::RCP<const MapType> rowMap;
+            MapType colPointMap;
+            MapType rowPointMap;
+            if (numComps > 1) {
+                colPointMap = RealBlockVector::makePointMap(
+                                      *nodes->trilinosReducedColMap, numComps);
+                rowPointMap = RealBlockVector::makePointMap(
+                                      *nodes->trilinosReducedRowMap, numComps);
+                colMap = Teuchos::rcpFromRef(colPointMap);
+                rowMap = Teuchos::rcpFromRef(rowPointMap);
+            } else {
+                colMap = nodes->trilinosReducedColMap;
+                rowMap = nodes->trilinosReducedRowMap;
+            }
+
+            const ImportType importer(rowMap, colMap);
+            const Teuchos::ArrayView<const real_t> localIn(
+                                               in.getSampleDataRO(0),
+                                               in.getNumDataPoints()*numComps);
+            Teuchos::RCP<RealVector> lclData = rcp(new RealVector(rowMap,
+                                                  localIn, localIn.size(), 1));
+            Teuchos::RCP<RealVector> gblData = rcp(new RealVector(colMap, 1));
+            gblData->doImport(*lclData, importer, Tpetra::INSERT);
+            Teuchos::ArrayRCP<const real_t> gblArray(gblData->getData(0));
+#pragma omp parallel for
+            for (index_t i = 0; i < numOut; i++) {
+                const real_t* src = &gblArray[target[map[i]] * numComps];
+                std::copy(src, src+numComps, out.getSampleDataRW(i));
+            }
+#endif
         } else if (out_data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
             out.requireWrite();
 #pragma omp parallel for
-            for (index_t n=0; n<numOut; n++) {
+            for (index_t n = 0; n < numOut; n++) {
                 memcpy(out.getSampleDataRW(n), in.getSampleDataRO(n), numComps_size);
             }
-        } else if (out_data_type == FINLEY_DEGREES_OF_FREEDOM ) {
-            setError(TYPE_ERROR, "Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to degrees of freedom.");
+        } else if (out_data_type == FINLEY_DEGREES_OF_FREEDOM) {
+            throw escript::ValueError("Assemble_CopyNodalData: cannot copy from reduced degrees of freedom to degrees of freedom.");
         }
     } // in_data_type
 }
diff --git a/finley/src/Assemble_LumpedSystem.cpp b/finley/src/Assemble_LumpedSystem.cpp
index 361ef34..c385556 100644
--- a/finley/src/Assemble_LumpedSystem.cpp
+++ b/finley/src/Assemble_LumpedSystem.cpp
@@ -24,13 +24,11 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 #include <sstream>
 
 namespace finley {
@@ -39,52 +37,43 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
                            escript::Data& lumpedMat, const escript::Data& D,
                            bool useHRZ)
 {
-    resetError();
-
     if (!nodes || !elements || lumpedMat.isEmpty() || D.isEmpty())
         return;
 
-    const int funcspace=D.getFunctionSpace().getTypeCode();
+    const int funcspace = D.getFunctionSpace().getTypeCode();
     bool reducedOrder;
     // check function space of D
-    if (funcspace==FINLEY_ELEMENTS) {
-        reducedOrder=false;
-    } else if (funcspace==FINLEY_FACE_ELEMENTS)  {
-        reducedOrder=false;
-    } else if (funcspace==FINLEY_REDUCED_ELEMENTS) {
-        reducedOrder=true;
-    } else if (funcspace==FINLEY_REDUCED_FACE_ELEMENTS)  {
-        reducedOrder=true;
-    } else if (funcspace==FINLEY_POINTS)  {
-        reducedOrder=true;
+    if (funcspace == FINLEY_ELEMENTS) {
+        reducedOrder = false;
+    } else if (funcspace == FINLEY_FACE_ELEMENTS)  {
+        reducedOrder = false;
+    } else if (funcspace == FINLEY_REDUCED_ELEMENTS) {
+        reducedOrder = true;
+    } else if (funcspace == FINLEY_REDUCED_FACE_ELEMENTS)  {
+        reducedOrder = true;
+    } else if (funcspace == FINLEY_POINTS)  {
+        reducedOrder = true;
     } else {
-        setError(TYPE_ERROR, "Assemble_LumpedSystem: assemblage failed because of illegal function space.");
-        return;
+        throw escript::ValueError("Assemble_LumpedSystem: assemblage failed because of illegal function space.");
     }
 
     // initialize parameters
-    AssembleParameters p(nodes, elements, paso::SystemMatrix_ptr(), lumpedMat,
-                         reducedOrder);
-    if (!noError())
-        return;
+    AssembleParameters p(nodes, elements, NULL, lumpedMat, reducedOrder);
 
     // check if all function spaces are the same
-    if (!D.numSamplesEqual(p.numQuadTotal, elements->numElements) ) {
+    if (!D.numSamplesEqual(p.numQuadTotal, elements->numElements)) {
         std::stringstream ss;
         ss << "Assemble_LumpedSystem: sample points of coefficient D "
             "don't match (" << p.numQuadSub << "," << elements->numElements
             << ").";
-        std::string errorMsg = ss.str();
-        setError(TYPE_ERROR, errorMsg.c_str());
-        return;
+        throw escript::ValueError(ss.str());
     }
 
-    // check the dimensions:
-    if (p.numEqu==1) {
+    // check the dimensions
+    if (p.numEqu == 1) {
         const escript::DataTypes::ShapeType dimensions; //dummy
         if (D.getDataPointShape() != dimensions) {
-            setError(TYPE_ERROR, "Assemble_LumpedSystem: coefficient D, rank 0 expected.");
-            return;
+            throw escript::ValueError("Assemble_LumpedSystem: coefficient D, rank 0 expected.");
         }
     } else {
         const escript::DataTypes::ShapeType dimensions(1, p.numEqu);
@@ -92,25 +81,24 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
             std::stringstream ss;
             ss << "Assemble_LumpedSystem: coefficient D does not have "
                 "expected shape (" << p.numEqu << ",).";
-            std::string errorMsg = ss.str();
-            setError(TYPE_ERROR, errorMsg.c_str());
-            return;
+            throw escript::ValueError(ss.str());
         }
     }
 
     lumpedMat.requireWrite();
-    double *lumpedMat_p=lumpedMat.getSampleDataRW(0);
+    double* lumpedMat_p = lumpedMat.getSampleDataRW(0);
+
     if (funcspace==FINLEY_POINTS) {
 #pragma omp parallel
         {
             for (int color=elements->minColor; color<=elements->maxColor; color++) {
-                // loop over all elements:
+                // loop over all elements
 #pragma omp for
                 for (index_t e=0; e<elements->numElements; e++) {
                     if (elements->Color[e]==color) {
-                        const double *D_p=D.getSampleDataRO(e);
+                        const double* D_p = D.getSampleDataRO(e);
                         util::addScatter(1,
-                                &(p.row_DOF[elements->Nodes[INDEX2(0,e,p.NN)]]),
+                                &p.row_DOF[elements->Nodes[INDEX2(0,e,p.NN)]],
                                 p.numEqu, D_p, lumpedMat_p,
                                 p.row_DOF_UpperBound);
                     } // end color check
@@ -118,58 +106,58 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
             } // end color loop
         } // end parallel region
     } else { // function space not points
-        bool expandedD=D.actsExpanded();
+        bool expandedD = D.actsExpanded();
         const std::vector<double>& S(p.row_jac->BasisFunctions->S);
 
 #pragma omp parallel
         {
-            std::vector<double> EM_lumpedMat(p.row_numShapesTotal*p.numEqu);
-            std::vector<index_t> row_index(p.row_numShapesTotal);
+            std::vector<double> EM_lumpedMat(p.row_numShapesTotal * p.numEqu);
+            IndexVector row_index(p.row_numShapesTotal);
             if (p.numEqu == 1) { // single equation
                 if (expandedD) { // with expanded D
-                    for (int color=elements->minColor; color<=elements->maxColor; color++) {
-                        // loop over all elements:
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
 #pragma omp for
-                        for (index_t e=0; e<elements->numElements; e++) {
-                            if (elements->Color[e]==color) {
-                                for (int isub=0; isub<p.numSub; isub++) {
-                                    const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)]);
-                                    const double *D_p=D.getSampleDataRO(e);
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                for (int isub = 0; isub < p.numSub; isub++) {
+                                    const double* Vol = &p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)];
+                                    const double* D_p = D.getSampleDataRO(e);
                                     if (useHRZ) {
-                                        double m_t=0; // mass of the element
-                                        double diagS=0; // diagonal sum
+                                        double m_t = 0; // mass of the element
+                                        double diagS = 0; // diagonal sum
                                         double rtmp;
                                         #pragma ivdep
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            m_t+=Vol[q]*D_p[INDEX2(q, isub,p.numQuadSub) ];
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            m_t += Vol[q] * D_p[INDEX2(q, isub, p.numQuadSub) ];
 
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            rtmp=0.;
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            rtmp = 0.;
                                             #pragma ivdep
-                                            for (int q=0; q<p.numQuadSub; q++) {
-                                                const double Sq=S[INDEX2(s,q,p.row_numShapes)];
-                                                rtmp+=Vol[q]*D_p[INDEX2(q, isub,p.numQuadSub)]*Sq*Sq;
+                                            for (int q = 0; q < p.numQuadSub; q++) {
+                                                const double Sq = S[INDEX2(s,q,p.row_numShapes)];
+                                                rtmp += Vol[q]*D_p[INDEX2(q, isub,p.numQuadSub)] * Sq * Sq;
                                             }
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]=rtmp;
-                                            diagS+=rtmp;
+                                            EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
+                                            diagS += rtmp;
                                         }
                                         // rescale diagonals by m_t/diagS to
                                         // ensure consistent mass over element
-                                        rtmp=m_t/diagS;
+                                        rtmp = m_t/diagS;
                                         #pragma ivdep
-                                        for (int s=0; s<p.row_numShapes; s++)
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]*=rtmp;
+                                        for (int s = 0; s < p.row_numShapes; s++)
+                                            EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
                                     } else { // row-sum lumping
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            double rtmp=0.;
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            double rtmp = 0.;
                                             #pragma ivdep
-                                            for (int q=0; q<p.numQuadSub; q++)
-                                                rtmp+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_p[INDEX2(q, isub,p.numQuadSub)];
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]=rtmp;
+                                            for (int q = 0; q < p.numQuadSub; q++)
+                                                rtmp += Vol[q]*S[INDEX2(s,q,p.row_numShapes)] * D_p[INDEX2(q, isub,p.numQuadSub)];
+                                            EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
                                         }
                                     }
-                                    for (int q=0; q<p.row_numShapesTotal; q++)
-                                        row_index[q]=p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                                    for (int q = 0; q < p.row_numShapesTotal; q++)
+                                        row_index[q] = p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
                                     util::addScatter(p.row_numShapesTotal,
                                                 &row_index[0], p.numEqu,
                                                 &EM_lumpedMat[0], lumpedMat_p,
@@ -178,50 +166,49 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
                             } // end color check
                         } // end element loop
                     } // end color loop
-
-                } else  { // with constant D
-                    for (int color=elements->minColor; color<=elements->maxColor; color++) {
-                        // loop over all elements:
+                } else { // with constant D
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
 #pragma omp for
-                        for (index_t e=0; e<elements->numElements; e++) {
-                            if (elements->Color[e]==color) {
-                                for (int isub=0; isub<p.numSub; isub++) {
-                                    const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)]);
-                                    const double *D_p=D.getSampleDataRO(e);
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                for (int isub = 0; isub < p.numSub; isub++) {
+                                    const double* Vol = &p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)];
+                                    const double* D_p = D.getSampleDataRO(e);
                                     if (useHRZ) { // HRZ lumping
-                                        double m_t=0; // mass of the element
-                                        double diagS=0; // diagonal sum
+                                        double m_t = 0; // mass of the element
+                                        double diagS = 0; // diagonal sum
                                         double rtmp;
                                         #pragma ivdep
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            m_t+=Vol[q];
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            rtmp=0.;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            m_t += Vol[q];
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            rtmp = 0.;
                                             #pragma ivdep
-                                            for (int q=0; q<p.numQuadSub; q++) {
-                                                const double Sq=S[INDEX2(s,q,p.row_numShapes)];
-                                                rtmp+=Vol[q]*Sq*Sq;
+                                            for (int q = 0; q < p.numQuadSub; q++) {
+                                                const double Sq = S[INDEX2(s,q,p.row_numShapes)];
+                                                rtmp += Vol[q] * Sq * Sq;
                                             }
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]=rtmp;
-                                            diagS+=rtmp;
+                                            EM_lumpedMat[INDEX2(0, s, p.numEqu)] = rtmp;
+                                            diagS += rtmp;
                                         }
                                         // rescale diagonals by m_t/diagS to
                                         // ensure consistent mass over element
-                                        rtmp=m_t/diagS*D_p[0];
+                                        rtmp = m_t / diagS * D_p[0];
                                         #pragma ivdep
-                                        for (int s=0; s<p.row_numShapes; s++)
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]*=rtmp;
+                                        for (int s = 0; s < p.row_numShapes; s++)
+                                            EM_lumpedMat[INDEX2(0, s, p.numEqu)] *= rtmp;
                                     } else { // row-sum lumping
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            double rtmp=0.;
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            double rtmp = 0.;
                                             #pragma ivdep
-                                            for (int q=0; q<p.numQuadSub; q++)
-                                                rtmp+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)]=rtmp*D_p[0];
+                                            for (int q = 0; q < p.numQuadSub; q++)
+                                                rtmp += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                            EM_lumpedMat[INDEX2(0,s,p.numEqu)] = rtmp * D_p[0];
                                         }
                                     }
-                                    for (int q=0; q<p.row_numShapesTotal; q++)
-                                        row_index[q]=p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                                    for (int q = 0; q < p.row_numShapesTotal; q++)
+                                        row_index[q] = p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
                                     util::addScatter(p.row_numShapesTotal,
                                                 &row_index[0], p.numEqu,
                                                 &EM_lumpedMat[0], lumpedMat_p,
@@ -234,14 +221,14 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
 
             } else { // system of equations
                 if (expandedD) { // with expanded D
-                    for (int color=elements->minColor; color<=elements->maxColor; color++) {
-                        // loop over all elements:
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
 #pragma omp for
-                        for (index_t e=0; e<elements->numElements; e++) {
-                            if (elements->Color[e]==color) {
-                                for (int isub=0; isub<p.numSub; isub++) {
-                                    const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                                    const double *D_p=D.getSampleDataRO(e);
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                for (int isub = 0; isub < p.numSub; isub++) {
+                                    const double* Vol = &p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)];
+                                    const double* D_p = D.getSampleDataRO(e);
 
                                     if (useHRZ) { // HRZ lumping
                                         for (int k=0; k<p.numEqu; k++) {
@@ -291,57 +278,56 @@ void Assemble_LumpedSystem(const NodeFile* nodes, const ElementFile* elements,
                             } // end color check
                         } // end element loop
                     } // end color loop
-
                 } else { // with constant D
-                    for (int color=elements->minColor; color<=elements->maxColor; color++) {
-                        // loop over all elements:
+                    for (int color = elements->minColor; color <= elements->maxColor; color++) {
+                        // loop over all elements
 #pragma omp for
-                        for (index_t e=0; e<elements->numElements; e++) {
-                            if (elements->Color[e]==color) {
-                                for (int isub=0; isub<p.numSub; isub++) {
-                                    const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)]);
-                                    const double *D_p=D.getSampleDataRO(e);
+                        for (index_t e = 0; e < elements->numElements; e++) {
+                            if (elements->Color[e] == color) {
+                                for (int isub = 0; isub < p.numSub; isub++) {
+                                    const double* Vol = &p.row_jac->volume[INDEX3(0,isub,e, p.numQuadSub,p.numSub)];
+                                    const double* D_p = D.getSampleDataRO(e);
 
                                     if (useHRZ) { // HRZ lumping
-                                        double m_t=0.; // mass of the element
-                                        double diagS=0; // diagonal sum
+                                        double m_t = 0.; // mass of the element
+                                        double diagS = 0; // diagonal sum
                                         double rtmp;
                                         #pragma ivdep
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            m_t+=Vol[q];
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            rtmp=0.;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            m_t += Vol[q];
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            rtmp = 0.;
                                             #pragma ivdep
-                                            for (int q=0; q<p.numQuadSub; q++) {
-                                                const double Sq=S[INDEX2(s,q,p.row_numShapes)];
-                                                rtmp+=Vol[q]*Sq*Sq;
+                                            for (int q = 0; q < p.numQuadSub; q++) {
+                                                const double Sq = S[INDEX2(s, q, p.row_numShapes)];
+                                                rtmp += Vol[q] * Sq * Sq;
                                             }
                                             #pragma ivdep
-                                            for (int k=0; k<p.numEqu; k++)
-                                                EM_lumpedMat[INDEX2(k,s,p.numEqu)]=rtmp;
-                                            diagS+=rtmp;
+                                            for (int k = 0; k < p.numEqu; k++)
+                                                EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp;
+                                            diagS += rtmp;
                                         }
 
                                         // rescale diagonals by m_t/diagS to
                                         // ensure consistent mass over element
-                                        rtmp=m_t/diagS;
-                                        for (int s=0; s<p.row_numShapes; s++)
+                                        rtmp = m_t / diagS;
+                                        for (int s = 0; s < p.row_numShapes; s++)
                                             #pragma ivdep
-                                            for (int k=0; k<p.numEqu; k++)
-                                                EM_lumpedMat[INDEX2(k,s,p.numEqu)]*=rtmp*D_p[k];
+                                            for (int k = 0; k < p.numEqu; k++)
+                                                EM_lumpedMat[INDEX2(k, s, p.numEqu)] *= rtmp * D_p[k];
                                     } else { // row-sum lumping
-                                        for (int s=0; s<p.row_numShapes; s++) {
-                                            for (int k=0; k<p.numEqu; k++) {
-                                                double rtmp=0.;
+                                        for (int s = 0; s < p.row_numShapes; s++) {
+                                            for (int k = 0; k < p.numEqu; k++) {
+                                                double rtmp = 0.;
                                                 #pragma ivdep
-                                                for (int q=0; q<p.numQuadSub; q++)
-                                                    rtmp+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                                EM_lumpedMat[INDEX2(k,s,p.numEqu)]=rtmp*D_p[k];
+                                                for (int q = 0; q < p.numQuadSub; q++)
+                                                    rtmp += Vol[q] * S[INDEX2(s, q, p.row_numShapes)];
+                                                EM_lumpedMat[INDEX2(k, s, p.numEqu)] = rtmp * D_p[k];
                                             }
                                         }
                                     }
-                                    for (int q=0; q<p.row_numShapesTotal; q++)
-                                        row_index[q]=p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                                    for (int q = 0; q < p.row_numShapesTotal; q++)
+                                        row_index[q] = p.row_DOF[elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
                                     util::addScatter(p.row_numShapesTotal,
                                                 &row_index[0], p.numEqu,
                                                 &EM_lumpedMat[0], lumpedMat_p,
diff --git a/finley/src/Assemble_NodeCoordinates.cpp b/finley/src/Assemble_NodeCoordinates.cpp
index 21cfe5a..16d6029 100644
--- a/finley/src/Assemble_NodeCoordinates.cpp
+++ b/finley/src/Assemble_NodeCoordinates.cpp
@@ -21,12 +21,10 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Util.h"
 #include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
 
 #include <sstream>
 
@@ -34,30 +32,28 @@ namespace finley {
 
 void Assemble_NodeCoordinates(const NodeFile* nodes, escript::Data& x)
 {
-    resetError();
     if (!nodes) return;
 
     const escript::DataTypes::ShapeType expectedShape(1, nodes->numDim);
 
-    if (!x.numSamplesEqual(1, nodes->numNodes)) {
-        setError(TYPE_ERROR, "Assemble_NodeCoordinates: illegal number of samples of Data object");
+    if (!x.numSamplesEqual(1, nodes->getNumNodes())) {
+        throw escript::ValueError("Assemble_NodeCoordinates: illegal number of samples of Data object");
     } else if (x.getFunctionSpace().getTypeCode() != FINLEY_NODES) {
-        setError(TYPE_ERROR, "Assemble_NodeCoordinates: Data object is not defined on nodes.");
+        throw escript::ValueError("Assemble_NodeCoordinates: Data object is not defined on nodes.");
     } else if (!x.actsExpanded()) {
-        setError(TYPE_ERROR, "Assemble_NodeCoordinates: expanded Data object expected");
+        throw escript::ValueError("Assemble_NodeCoordinates: expanded Data object expected");
     } else if (x.getDataPointShape() != expectedShape) {
         std::stringstream ss;
         ss << "Assemble_NodeCoordinates: Data object of shape ("
             << nodes->numDim << ",) expected.";
-        std::string errorMsg = ss.str();
-        setError(TYPE_ERROR, errorMsg.c_str());
+        throw escript::ValueError(ss.str());
     } else {
-        const size_t dim_size = nodes->numDim*sizeof(double);
+        const size_t dim_size = nodes->numDim * sizeof(double);
         x.requireWrite();
 #pragma omp parallel for
-        for (int n=0; n<nodes->numNodes; n++)
+        for (dim_t n = 0; n < nodes->getNumNodes(); n++)
             memcpy(x.getSampleDataRW(n),
-                    &(nodes->Coordinates[INDEX2(0,n,nodes->numDim)]), dim_size);
+                    &nodes->Coordinates[INDEX2(0, n, nodes->numDim)], dim_size);
     }
 }
 
diff --git a/finley/src/Assemble_PDE.cpp b/finley/src/Assemble_PDE.cpp
index 2768c01..7a2f66a 100644
--- a/finley/src/Assemble_PDE.cpp
+++ b/finley/src/Assemble_PDE.cpp
@@ -14,13 +14,12 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles the system of numEqu PDEs into the stiffness matrix S and right
   hand side F:
 
-      -div(A*grad u)-div(B*u)+C*grad u + D*u= -div X + Y
+      -div(A*grad u)-div(B*u)+C*grad u + D*u = -div X + Y
 
       -(A_{k,i,m,j} u_m,j)_i-(B_{k,i,m} u_m)_i+C_{k,m,j} u_m,j-D_{k,m} u_m = -(X_{k,i})_i + Y_k
 
@@ -45,28 +44,26 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
-#include "esysUtils/blocktimer.h"
 
 #include <sstream>
 
 namespace finley {
 
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
+
 inline void setNumSamplesError(const char* c, int n0, int n1)
 {
     std::stringstream ss;
     ss << "Assemble_PDE: number of sample points of coefficient " << c
         << " don't match (" << n0 << "," << n1 << ").";
-    std::string errorMsg(ss.str());
-    setError(TYPE_ERROR, errorMsg.c_str());
+    const std::string errorMsg(ss.str());
+    throw escript::ValueError(errorMsg);
 }
 
-inline void setShapeError(const char* c, int num, const int *dims)
+inline void setShapeError(const char* c, int num, const int* dims)
 {
     std::stringstream ss;
     ss << "Assemble_PDE: shape of coefficient " << c
@@ -81,57 +78,61 @@ inline void setShapeError(const char* c, int num, const int *dims)
        }
     }
     ss << ").";
-    std::string errorMsg(ss.str());
-    setError(TYPE_ERROR, errorMsg.c_str());
+    const std::string errorMsg(ss.str());
+    throw escript::ValueError(errorMsg);
 }
 
 void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
-                  paso::SystemMatrix_ptr S, escript::Data& F,
+                  escript::ASM_ptr S, escript::Data& F,
                   const escript::Data& A, const escript::Data& B,
                   const escript::Data& C, const escript::Data& D,
                   const escript::Data& X, const escript::Data& Y)
 {
-    resetError();
-    if (!nodes || !elements || (S.get()==NULL && F.isEmpty()))
+    if (!nodes || !elements || (S==NULL && F.isEmpty()))
         return;
 
     if (F.isEmpty() && (!X.isEmpty() || !Y.isEmpty())) {
-        setError(TYPE_ERROR, "Assemble_PDE: right hand side coefficients are non-zero but no right hand side vector given.");
-        return;
+        throw escript::ValueError("Assemble_PDE: right hand side coefficients are non-zero but no right hand side vector given.");
     }
 
     if (S==NULL && !A.isEmpty() && !B.isEmpty() && !C.isEmpty() && !D.isEmpty()) {
-        setError(TYPE_ERROR, "Assemble_PDE: coefficients are non-zero but no matrix is given.");
-        return;
+        throw escript::ValueError("Assemble_PDE: coefficients are non-zero but no matrix is given.");
     }
 
     // get the function space for this assemblage call
-    int funcspace = UNKNOWN;
+    int funcspace = -1;
     if (!A.isEmpty()) funcspace=A.getFunctionSpace().getTypeCode();
     if (!B.isEmpty()) funcspace=B.getFunctionSpace().getTypeCode();
     if (!C.isEmpty()) funcspace=C.getFunctionSpace().getTypeCode();
     if (!D.isEmpty()) funcspace=D.getFunctionSpace().getTypeCode();
     if (!X.isEmpty()) funcspace=X.getFunctionSpace().getTypeCode();
     if (!Y.isEmpty()) funcspace=Y.getFunctionSpace().getTypeCode();
-    if (funcspace==UNKNOWN)
+    if (funcspace == -1)
         return; // all data are empty
 
     // check if all function spaces are the same
     if (!A.isEmpty() && A.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient A");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient A");
     } else if (!B.isEmpty() && B.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient B");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient B");
     } else if (!C.isEmpty() && C.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient C");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient C");
     } else if (!D.isEmpty() && D.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient D");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient D");
     } else if (!X.isEmpty() && X.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient X");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient X");
     } else if (!Y.isEmpty() && Y.getFunctionSpace().getTypeCode()!=funcspace) {
-        setError(TYPE_ERROR, "Assemble_PDE: unexpected function space type for coefficient Y");
+        throw escript::ValueError("Assemble_PDE: unexpected function space type for coefficient Y");
     }
-    if (!noError())
-        return;
+
+    // get value type
+    bool isComplex = false;
+    isComplex = isComplex || (!A.isEmpty() && A.isComplex());
+    isComplex = isComplex || (!B.isEmpty() && B.isComplex());
+    isComplex = isComplex || (!C.isEmpty() && C.isComplex());
+    isComplex = isComplex || (!D.isEmpty() && D.isComplex());
+    isComplex = isComplex || (!X.isEmpty() && X.isComplex());
+    isComplex = isComplex || (!Y.isEmpty() && Y.isComplex());
 
     bool reducedIntegrationOrder;
     if (funcspace==FINLEY_ELEMENTS) {
@@ -153,14 +154,12 @@ void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
     } else if (funcspace==FINLEY_POINTS)  {
        reducedIntegrationOrder=false;
     } else {
-       setError(TYPE_ERROR, "Assemble_PDE: assemblage failed because of illegal function space.");
+       throw escript::ValueError("Assemble_PDE: assemblage failed because of illegal function space.");
        return;
     }
 
     // get assemblage parameters
     AssembleParameters p(nodes, elements, S, F, reducedIntegrationOrder);
-    if (!noError())
-        return;
 
     // check if sample numbers are the same
     if (!A.numSamplesEqual(p.numQuadTotal, elements->numElements)) {
@@ -176,13 +175,11 @@ void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
     } else if (!Y.numSamplesEqual(p.numQuadTotal, elements->numElements)) {
         setNumSamplesError("Y", p.numQuadTotal, elements->numElements);
     }
-    if (!noError())
-        return;
 
     // check the dimensions:
     if (p.numEqu != p. numComp) {
-        setError(VALUE_ERROR, "Assemble_PDE requires number of equations == number of solutions.");
-    } else if (p.numEqu==1) {
+        throw escript::ValueError("Assemble_PDE requires number of equations == number of solutions.");
+    } else if (p.numEqu == 1) {
         const int dimensions[2] = { p.numDim, p.numDim };
         if (!A.isDataPointShapeEqual(2, dimensions)) {
             setShapeError("A", 2, dimensions);
@@ -191,11 +188,11 @@ void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
         } else if (!C.isDataPointShapeEqual(1, dimensions)) {
             setShapeError("C", 1, dimensions);
         } else if (!D.isDataPointShapeEqual(0, dimensions)) {
-            setError(TYPE_ERROR, "Assemble_PDE: coefficient D must be rank 0.");
+            throw escript::ValueError("Assemble_PDE: coefficient D must be rank 0.");
         } else if (!X.isDataPointShapeEqual(1, dimensions)) {
             setShapeError("X", 1, dimensions);
         } else if (!Y.isDataPointShapeEqual(0, dimensions)) {
-            setError(TYPE_ERROR, "Assemble_PDE: coefficient Y must be rank 0.");
+            throw escript::ValueError("Assemble_PDE: coefficient Y must be rank 0.");
         }
     } else {
         const int dimAB[4] = { p.numEqu, p.numDim, p.numComp, p.numDim };
@@ -214,52 +211,74 @@ void Assemble_PDE(const NodeFile* nodes, const ElementFile* elements,
             setShapeError("Y", 1, dimAB);
         }
     }
-    if (!noError())
-        return;
-
-    double blocktimer_start = blocktimer_time();
 
     if (p.numSides == 1) {
-        if (funcspace==FINLEY_POINTS) {
+        if (funcspace == FINLEY_POINTS) {
             if (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !X.isEmpty()) {
-                setError(TYPE_ERROR, "Assemble_PDE: Point elements require A, B, C and X to be empty.");
+                throw escript::ValueError("Assemble_PDE: Point elements require A, B, C and X to be empty.");
             } else {
-                Assemble_PDE_Points(p, D, Y);
+                if (isComplex) {
+                    Assemble_PDE_Points<cplx_t>(p, D, Y);
+                } else {
+                    Assemble_PDE_Points<real_t>(p, D, Y);
+                }
             }
         } else if (p.numEqu > 1) { // system of PDEs
-            if (p.numDim==3) {
-                Assemble_PDE_System_3D(p, A, B, C, D, X, Y);
-            } else if (p.numDim==2) {
-                Assemble_PDE_System_2D(p, A, B, C, D, X, Y);
-            } else if (p.numDim==1) {
+            if (p.numDim == 3) {
+                if (isComplex) {
+                    Assemble_PDE_System_3D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_System_3D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 2) {
+                if (isComplex) {
+                    Assemble_PDE_System_2D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_System_2D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 1) {
                 Assemble_PDE_System_1D(p, A, B, C, D, X, Y);
             } else {
-                setError(VALUE_ERROR, "Assemble_PDE supports spatial dimensions 1,2,3 only.");
+                throw escript::ValueError("Assemble_PDE supports spatial dimensions 1,2,3 only.");
             }
         } else { // single PDE
-            if (p.numDim==3) {
-                Assemble_PDE_Single_3D(p, A, B, C, D, X, Y);
-            } else if (p.numDim==2) {
-                Assemble_PDE_Single_2D(p, A, B, C, D, X, Y);
-            } else if (p.numDim==1) {
+            if (p.numDim == 3) {
+                if (isComplex) {
+                    Assemble_PDE_Single_3D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_Single_3D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 2) {
+                if (isComplex) {
+                    Assemble_PDE_Single_2D<cplx_t>(p, A, B, C, D, X, Y);
+                } else {
+                    Assemble_PDE_Single_2D<real_t>(p, A, B, C, D, X, Y);
+                }
+            } else if (p.numDim == 1) {
                 Assemble_PDE_Single_1D(p, A, B, C, D, X, Y);
             } else {
-                setError(VALUE_ERROR, "Assemble_PDE supports spatial dimensions 1,2,3 only.");
+                throw escript::ValueError("Assemble_PDE supports spatial dimensions 1,2,3 only.");
             }
         }
     } else if (p.numSides == 2) {
         if (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !X.isEmpty()) {
-            setError(TYPE_ERROR, "Assemble_PDE: Contact elements require A, B, C and X to be empty.");
+            throw escript::ValueError("Assemble_PDE: Contact elements require A, B, C and X to be empty.");
         } else if (p.numEqu > 1) { // system of PDEs
-            Assemble_PDE_System_C(p, D, Y);
+            if (isComplex) {
+                Assemble_PDE_System_C<cplx_t>(p, D, Y);
+            } else {
+                Assemble_PDE_System_C<real_t>(p, D, Y);
+            }
         } else { // single PDE
-            Assemble_PDE_Single_C(p, D, Y);
+            if (isComplex) {
+                Assemble_PDE_Single_C<cplx_t>(p, D, Y);
+            } else {
+                Assemble_PDE_Single_C<real_t>(p, D, Y);
+            }
         }
     } else {
-        setError(TYPE_ERROR,"Assemble_PDE supports numShape=NumNodes or 2*numShape=NumNodes only.");
+        throw escript::ValueError("Assemble_PDE supports numShape=NumNodes or 2*numShape=NumNodes only.");
     }
-
-    blocktimer_increment("Assemble_PDE()", blocktimer_start);
 }
 
 } // namespace finley
diff --git a/finley/src/Assemble_PDE_Points.cpp b/finley/src/Assemble_PDE_Points.cpp
index 830951a..5ed6578 100644
--- a/finley/src/Assemble_PDE_Points.cpp
+++ b/finley/src/Assemble_PDE_Points.cpp
@@ -14,10 +14,9 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
-  Assembles the system of numEq PDEs into the stiffness matrix S and right
+  Assembles the system of numEqu PDEs into the stiffness matrix S and right
   hand side F
 
       d_dirac_{k,m} u_m and y_dirac_k
@@ -33,47 +32,57 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_Points(const AssembleParameters& p,
                          const escript::Data& d_dirac,
                          const escript::Data& y_dirac)
 {
-
-    double *F_p=NULL;
-    if(!p.F.isEmpty()) {
+    Scalar* F_p = NULL;
+    const Scalar zero = static_cast<Scalar>(0);
+    if (!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0, zero);
     }
+
 #pragma omp parallel
     {
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
             // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    index_t row_index=p.row_DOF[p.elements->Nodes[INDEX2(0,e,p.NN)]];
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    index_t rowIndex = p.row_DOF[p.elements->Nodes[INDEX2(0,e,p.NN)]];
                     if (!y_dirac.isEmpty()) {
-                        const double *y_dirac_p=y_dirac.getSampleDataRO(e);
-                        util::addScatter(1, &row_index, p.numEqu,
+                        const Scalar* y_dirac_p = y_dirac.getSampleDataRO(e, zero);
+                        util::addScatter(1, &rowIndex, p.numEqu,
                                          y_dirac_p, F_p, p.row_DOF_UpperBound);
                     }
+                   
                     if (!d_dirac.isEmpty()) {
-                        const double *d_dirac_p=d_dirac.getSampleDataRO(e);
-                        Assemble_addToSystemMatrix(p.S, 1, &row_index,
-                                p.numEqu, 1, &row_index, p.numComp, d_dirac_p);
+                        const Scalar* d_dirac_p = d_dirac.getSampleDataRO(e, zero);
+                        Assemble_addToSystemMatrix(p.S, 1, &rowIndex,
+                                p.numEqu, 1, &rowIndex, p.numComp, d_dirac_p);
                     }
                 } // end color check
             } // end element loop
         } // end color loop
-    } // end parallel section
+    } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_Points<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& d, const escript::Data& y);
+template void Assemble_PDE_Points<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& d, const escript::Data& y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_Single_1D.cpp b/finley/src/Assemble_PDE_Single_1D.cpp
index 7438e63..89d7ca9 100644
--- a/finley/src/Assemble_PDE_Single_1D.cpp
+++ b/finley/src/Assemble_PDE_Single_1D.cpp
@@ -35,12 +35,11 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_PDE_Single_1D(const AssembleParameters& p,
@@ -49,58 +48,58 @@ void Assemble_PDE_Single_1D(const AssembleParameters& p,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 1;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    double *F_p = NULL;
     if(!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const int len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal;
-    const int len_EM_F=p.row_numShapesTotal;
+    const int len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal;
+    const int len_EM_F = p.row_numShapesTotal;
 
 #pragma omp parallel
     {
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
             // loop over all elements:
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        const double *DSDX=&(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)];
+                        const double* DSDX = &p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)];
                         std::vector<double> EM_S(len_EM_S);
                         std::vector<double> EM_F(len_EM_F);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double *A_p=A.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const double *A_p = A.getSampleDataRO(e);
+                            add_EM_S = true;
                             if (expandedA) {
-                                const double *A_q=&(A_p[INDEX4(0,0,0,isub,DIM,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                const double* A_q = &(A_p[INDEX4(0,0,0,isub,DIM,DIM,p.numQuadSub)]);
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        double f = 0.;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant A
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        double f = 0.;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            f += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f*A_p[INDEX2(0,0,DIM)];
                                     }
                                 }
@@ -221,16 +220,16 @@ void Assemble_PDE_Single_1D(const AssembleParameters& p,
                             add_EM_F=true;
                             if (expandedY) {
                                 const double *Y_q=&(Y_p[INDEX2(0,isub, p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    double f = 0.;
+                                    for (int q = 0; q < p.numQuadSub; q++)
+                                        f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
                                     EM_F[INDEX2(0,s,p.numEqu)]+=f;
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    double f = 0.;
+                                    for (int q = 0; q < p.numQuadSub; q++)
                                         f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
                                     EM_F[INDEX2(0,s,p.numEqu)]+=f*Y_p[0];
                                 }
@@ -238,9 +237,9 @@ void Assemble_PDE_Single_1D(const AssembleParameters& p,
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        std::vector<index_t> row_index(p.row_numShapesTotal);
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        IndexVector row_index(p.row_numShapesTotal);
+                        for (int q = 0; q < p.row_numShapesTotal; q++)
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q, isub, p.row_numShapesTotal)], e, p.NN)]];
 
                         if (add_EM_F)
                             util::addScatter(p.row_numShapesTotal,
diff --git a/finley/src/Assemble_PDE_Single_2D.cpp b/finley/src/Assemble_PDE_Single_2D.cpp
index 5c4b535..81e472a 100644
--- a/finley/src/Assemble_PDE_Single_2D.cpp
+++ b/finley/src/Assemble_PDE_Single_2D.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles a single PDE into the stiffness matrix S and right hand side F
@@ -35,66 +34,67 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_Single_2D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 2;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p = NULL;
-    if (! p.F.isEmpty()) {
-       p.F.requireWrite();
-       F_p=p.F.getSampleDataRW(0);
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const int len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal;
-    const int len_EM_F=p.row_numShapesTotal;
+    const int len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal;
+    const int len_EM_F = p.row_numShapesTotal;
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(len_EM_F);
-        std::vector<double> EM_S(len_EM_S);
-        std::vector<double> EM_F(len_EM_F);
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        IndexVector row_index(len_EM_F);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        const double *DSDX=&(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
-                        std::fill(EM_S.begin(), EM_S.end(), 0);
-                        std::fill(EM_F.begin(), EM_F.end(), 0);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        const double* DSDX = &(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
+                        std::fill(EM_S.begin(), EM_S.end(), zero);
+                        std::fill(EM_F.begin(), EM_F.end(), zero);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double *A_p=A.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedA) {
-                                const double *A_q=&(A_p[INDEX4(0,0,0,isub,DIM,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                const Scalar* A_q = &A_p[INDEX4(0,0,0,isub,DIM,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
                                                 + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,1,q,DIM,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
                                                 + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(1,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
                                                 + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(1,1,q,DIM,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
@@ -103,25 +103,25 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant A
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f00=0.;
-                                        double f01=0.;
-                                        double f10=0.;
-                                        double f11=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f0=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            const double f1=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f00+=f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f01+=f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f10+=f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f11+=f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f00 = zero;
+                                        Scalar f01 = zero;
+                                        Scalar f10 = zero;
+                                        Scalar f11 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f0 = Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f1 = Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f00 += f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f01 += f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f10 += f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f11 += f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                              f00*A_p[INDEX2(0,0,DIM)]
-                                            + f01*A_p[INDEX2(0,1,DIM)]
-                                            + f10*A_p[INDEX2(1,0,DIM)]
-                                            + f11*A_p[INDEX2(1,1,DIM)];
+                                              f00 * A_p[INDEX2(0,0,DIM)]
+                                            + f01 * A_p[INDEX2(0,1,DIM)]
+                                            + f10 * A_p[INDEX2(1,0,DIM)]
+                                            + f11 * A_p[INDEX2(1,1,DIM)];
                                     }
                                 }
                             }
@@ -130,29 +130,29 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double *B_p=B.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedB) {
-                                const double *B_q=&(B_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(0,q,DIM)]
+                                const Scalar* B_q = &B_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(0,q,DIM)]
                                                     + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(1,q,DIM)]);
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant B
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0.;
-                                        double f1=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f0*B_p[0]+f1*B_p[1];
                                     }
@@ -163,29 +163,29 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double *C_p=C.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedC) {
-                                const double *C_q=&(C_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(C_q[INDEX2(0,q,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                const Scalar* C_q = &C_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(C_q[INDEX2(0,q,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
                                                     + C_q[INDEX2(1,q,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant C
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0.;
-                                        double f1=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f0*C_p[0]+f1*C_p[1];
                                     }
@@ -196,25 +196,25 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX2(0,isub,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX2(0, isub, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f*D_p[0];
                                     }
@@ -225,27 +225,27 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double *X_p=X.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedX) {
-                                const double *X_q=&(X_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f+=Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(0,q,DIM)]
+                                const Scalar* X_q = &X_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(0,q,DIM)]
                                                  + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(1,q,DIM)]);
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f;
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f;
                                 }
                             } else { // constant X
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f0=0.;
-                                    double f1=0.;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f0+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                        f1+=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f0 += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                        f1 += Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f0*X_p[0]+f1*X_p[1];
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f0*X_p[0] + f1*X_p[1];
                                 }
                             }
                         }
@@ -253,42 +253,42 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX2(0,isub,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
+                                const Scalar* Y_q = &Y_p[INDEX2(0,isub,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f;
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f;
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f*Y_p[0];
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f * Y_p[0];
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
                                     &row_index[0], p.numEqu, &EM_F[0], F_p,
                                     p.row_DOF_UpperBound);
+                        }
                         if (add_EM_S)
                             Assemble_addToSystemMatrix(p.S,
                                     p.row_numShapesTotal, &row_index[0],
                                     p.numEqu, p.col_numShapesTotal,
                                     &row_index[0], p.numComp, &EM_S[0]);
-
                     } // end of isub
                 } // end color check
             } // end element loop
@@ -296,5 +296,17 @@ void Assemble_PDE_Single_2D(const AssembleParameters& p,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_Single_2D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_Single_2D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_Single_3D.cpp b/finley/src/Assemble_PDE_Single_3D.cpp
index dd7d250..2e5619a 100644
--- a/finley/src/Assemble_PDE_Single_3D.cpp
+++ b/finley/src/Assemble_PDE_Single_3D.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles a single PDE into the stiffness matrix S and right hand side F
@@ -35,66 +34,67 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_Single_3D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 3;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p = NULL;
-    if (! p.F.isEmpty()) {
-       p.F.requireWrite();
-       F_p=p.F.getSampleDataRW(0);
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
+        p.F.requireWrite();
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const int len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal;
-    const int len_EM_F=p.row_numShapesTotal;
+    const int len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal;
+    const int len_EM_F = p.row_numShapesTotal;
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(len_EM_F);
-        std::vector<double> EM_S(len_EM_S);
-        std::vector<double> EM_F(len_EM_F);
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        IndexVector row_index(len_EM_F);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        const double *DSDX=&(p.row_jac->DSDX[INDEX5(0,0,0,isub,e,p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
-                        std::fill(EM_S.begin(), EM_S.end(), 0);
-                        std::fill(EM_F.begin(), EM_F.end(), 0);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        const double* DSDX = &(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
+                        std::fill(EM_S.begin(), EM_S.end(), zero);
+                        std::fill(EM_F.begin(), EM_F.end(), zero);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double *A_p=A.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedA) {
-                                const double *A_q=&(A_p[INDEX4(0,0,0,isub, DIM,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                const Scalar* A_q = &A_p[INDEX4(0,0,0,isub,DIM,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
                                                    + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,1,q,DIM,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
                                                    + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(0,2,q,DIM,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]
                                                    + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX3(1,0,q,DIM,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
@@ -108,42 +108,43 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant A
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f00=0;
-                                        double f01=0;
-                                        double f02=0;
-                                        double f10=0;
-                                        double f11=0;
-                                        double f12=0;
-                                        double f20=0;
-                                        double f21=0;
-                                        double f22=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f0=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f00+=f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f01+=f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f02+=f0*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f00 = zero;
+                                        Scalar f01 = zero;
+                                        Scalar f02 = zero;
+                                        Scalar f10 = zero;
+                                        Scalar f11 = zero;
+                                        Scalar f12 = zero;
+                                        Scalar f20 = zero;
+                                        Scalar f21 = zero;
+                                        Scalar f22 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f0 = Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f00 += f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f01 += f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f02 += f0*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
 
-                                            const double f1=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f10+=f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f11+=f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f12+=f1*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f1 = Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f10 += f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f11 += f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f12 += f1*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
 
-                                            const double f2=Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
-                                            f20+=f2*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f21+=f2*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f22+=f2*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f2 = Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                            f20 += f2*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f21 += f2*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f22 += f2*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f00*A_p[INDEX2(0,0,DIM)]
-                                            + f01*A_p[INDEX2(0,1,DIM)]
-                                            + f02*A_p[INDEX2(0,2,DIM)]
-                                            + f10*A_p[INDEX2(1,0,DIM)]
-                                            + f11*A_p[INDEX2(1,1,DIM)]
-                                            + f12*A_p[INDEX2(1,2,DIM)]
-                                            + f20*A_p[INDEX2(2,0,DIM)]
-                                            + f21*A_p[INDEX2(2,1,DIM)]
-                                            + f22*A_p[INDEX2(2,2,DIM)];
+                                        EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
+                                              f00 * A_p[INDEX2(0,0,DIM)]
+                                            + f01 * A_p[INDEX2(0,1,DIM)]
+                                            + f02 * A_p[INDEX2(0,2,DIM)]
+                                            + f10 * A_p[INDEX2(1,0,DIM)]
+                                            + f11 * A_p[INDEX2(1,1,DIM)]
+                                            + f12 * A_p[INDEX2(1,2,DIM)]
+                                            + f20 * A_p[INDEX2(2,0,DIM)]
+                                            + f21 * A_p[INDEX2(2,1,DIM)]
+                                            + f22 * A_p[INDEX2(2,2,DIM)];
                                     }
                                 }
                             }
@@ -152,33 +153,33 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double *B_p=B.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedB) {
-                                const double *B_q=&(B_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*
+                                const Scalar* B_q = &B_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*
                                                  (DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(0,q,DIM)]
-                                                 +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(1,q,DIM)]
-                                                 +DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(2,q,DIM)]);
+                                                 + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(1,q,DIM)]
+                                                 + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*B_q[INDEX2(2,q,DIM)]);
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant B
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0;
-                                        double f1=0;
-                                        double f2=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f2+=f*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        Scalar f2 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f2 += f * DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f0*B_p[0]+f1*B_p[1]+f2*B_p[2];
                                     }
@@ -189,33 +190,33 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double *C_p=C.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedC) {
-                                const double *C_q=&(C_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*
+                                const Scalar* C_q = &C_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)] *
                                                 (C_q[INDEX2(0,q,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                +C_q[INDEX2(1,q,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                +C_q[INDEX2(2,q,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
+                                                + C_q[INDEX2(1,q,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                + C_q[INDEX2(2,q,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant C
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0;
-                                        double f1=0;
-                                        double f2=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f2+=f*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        Scalar f2 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f2 += f * DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
                                         }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f0*C_p[0]+f1*C_p[1]+f2*C_p[2];
                                     }
@@ -226,24 +227,26 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX2(0,isub,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX2(0, isub, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f*D_p[0];
                                     }
                                 }
@@ -253,30 +256,30 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double *X_p=X.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedX) {
-                                const double *X_q=&(X_p[INDEX3(0,0,isub,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f+=Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(0,q,DIM)]
+                                const Scalar* X_q = &X_p[INDEX3(0,0,isub,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q]*(DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(0,q,DIM)]
                                                  + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(1,q,DIM)]
                                                  + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*X_q[INDEX2(2,q,DIM)]);
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f;
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f;
                                 }
                             } else { // constant X
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f0=0;
-                                    double f1=0;
-                                    double f2=0;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f0+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                        f1+=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                        f2+=Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f0 += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                        f1 += Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                        f2 += Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
                                     }
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f0*X_p[0]+f1*X_p[1]+f2*X_p[2];
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f0*X_p[0] + f1*X_p[1] + f2*X_p[2];
                                 }
                             }
                         }
@@ -284,34 +287,37 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX2(0,isub, p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f;
+                                const Scalar* Y_q = &Y_p[INDEX2(0,isub,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
+                                    }
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f;
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                    EM_F[INDEX2(0,s,p.numEqu)]+=f*Y_p[0];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                    }
+                                    EM_F[INDEX2(0,s,p.numEqu)] += f * Y_p[0];
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
                                     &row_index[0], p.numEqu, &EM_F[0], F_p,
                                     p.row_DOF_UpperBound);
+                        }
                         if (add_EM_S)
                             Assemble_addToSystemMatrix(p.S,
                                     p.row_numShapesTotal, &row_index[0],
@@ -324,5 +330,17 @@ void Assemble_PDE_Single_3D(const AssembleParameters& p,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_Single_3D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_Single_3D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_Single_C.cpp b/finley/src/Assemble_PDE_Single_C.cpp
index fe27d79..a820890 100644
--- a/finley/src/Assemble_PDE_Single_C.cpp
+++ b/finley/src/Assemble_PDE_Single_C.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles a single PDE into the stiffness matrix S and right hand side F
@@ -31,54 +30,56 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_Single_C(const AssembleParameters& p, const escript::Data& D,
                            const escript::Data& Y)
 {
-    bool expandedD=D.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
-    if(!p.F.isEmpty()) {
+    bool expandedD = D.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(p.row_numShapesTotal);
-        std::vector<double> EM_S(p.row_numShapesTotal*p.col_numShapesTotal);
-        std::vector<double> EM_F(p.row_numShapesTotal);
+        std::vector<Scalar> EM_S(p.row_numShapesTotal*p.col_numShapesTotal);
+        std::vector<Scalar> EM_F(p.row_numShapesTotal);
+        IndexVector row_index(p.row_numShapesTotal);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
             // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX2(0,isub, p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double val=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            val+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX2(0, isub, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar val = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            val += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]= val;
                                         EM_S[INDEX4(0,0,s,r+p.col_numShapes,p.numEqu,p.numComp,p.row_numShapesTotal)]=-val;
                                         EM_S[INDEX4(0,0,s+p.row_numShapes,r,p.numEqu,p.numComp,p.row_numShapesTotal)]=-val;
@@ -86,12 +87,13 @@ void Assemble_PDE_Single_C(const AssembleParameters& p, const escript::Data& D,
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
-                                        const double fD=f*D_p[0];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
+                                        const Scalar fD = f * D_p[0];
                                         EM_S[INDEX4(0,0,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]= fD;
                                         EM_S[INDEX4(0,0,s,r+p.col_numShapes,p.numEqu,p.numComp,p.row_numShapesTotal)]=-fD;
                                         EM_S[INDEX4(0,0,s+p.row_numShapes,r,p.numEqu,p.numComp,p.row_numShapesTotal)]=-fD;
@@ -104,41 +106,43 @@ void Assemble_PDE_Single_C(const AssembleParameters& p, const escript::Data& D,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX2(0,isub, p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double val=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        val+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
-                                    EM_F[INDEX2(0,s,p.numEqu)]=-val;
-                                    EM_F[INDEX2(0,s+p.row_numShapes,p.numEqu)]= val;
+                                const Scalar* Y_q = &Y_p[INDEX2(0, isub, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar val = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++)
+                                        val += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[q];
+                                    EM_F[INDEX2(0,s,p.numEqu)] = -val;
+                                    EM_F[INDEX2(0,s+p.row_numShapes,p.numEqu)] = val;
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                    EM_F[INDEX2(0,s,p.numEqu)] = -f*Y_p[0];
-                                    EM_F[INDEX2(0,s+p.row_numShapes,p.numEqu)] = f*Y_p[0];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                    }
+                                    EM_F[INDEX2(0,s,p.numEqu)] = -f * Y_p[0];
+                                    EM_F[INDEX2(0,s+p.row_numShapes,p.numEqu)] = f * Y_p[0];
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
-                                   &row_index[0], p.numEqu, &EM_F[0], F_p,
-                                   p.row_DOF_UpperBound);
+                                    &row_index[0], p.numEqu, &EM_F[0], F_p,
+                                    p.row_DOF_UpperBound);
+                        }
                         if (add_EM_S)
                             Assemble_addToSystemMatrix(p.S,
-                                   p.row_numShapesTotal, &row_index[0],
-                                   p.numEqu, p.col_numShapesTotal,
-                                   &row_index[0], p.numComp, &EM_S[0]);
+                                    p.row_numShapesTotal, &row_index[0],
+                                    p.numEqu, p.col_numShapesTotal,
+                                    &row_index[0], p.numComp, &EM_S[0]);
                     } // end of isub
                 } // end color check
             } // end element loop
@@ -146,5 +150,13 @@ void Assemble_PDE_Single_C(const AssembleParameters& p, const escript::Data& D,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_Single_C<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& D, const escript::Data& Y);
+template void Assemble_PDE_Single_C<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& D, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_System_1D.cpp b/finley/src/Assemble_PDE_System_1D.cpp
index 978a181..2bd5825 100644
--- a/finley/src/Assemble_PDE_System_1D.cpp
+++ b/finley/src/Assemble_PDE_System_1D.cpp
@@ -38,13 +38,11 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_PDE_System_1D(const AssembleParameters& p,
@@ -53,27 +51,27 @@ void Assemble_PDE_System_1D(const AssembleParameters& p,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 1;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    double *F_p = NULL;
     if(!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const size_t len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
-    const size_t len_EM_F=p.row_numShapesTotal*p.numEqu;
+    const size_t len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
+    const size_t len_EM_F = p.row_numShapesTotal*p.numEqu;
 
 #pragma omp parallel
     {
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
+            for (index_t e = 0; e < p.elements->numElements; e++) {
                 if (p.elements->Color[e]==color) {
                     for (int isub=0; isub<p.numSub; isub++) {
                         const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
diff --git a/finley/src/Assemble_PDE_System_2D.cpp b/finley/src/Assemble_PDE_System_2D.cpp
index ffcda51..c44c425 100644
--- a/finley/src/Assemble_PDE_System_2D.cpp
+++ b/finley/src/Assemble_PDE_System_2D.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles the system of numEqu PDEs into the stiffness matrix S and right
@@ -38,72 +37,73 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_System_2D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 2;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
-    if(!p.F.isEmpty()){
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const size_t len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
-    const size_t len_EM_F=p.row_numShapesTotal*p.numEqu;
+    const size_t len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
+    const size_t len_EM_F = p.row_numShapesTotal*p.numEqu;
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(p.row_numShapesTotal);
-        std::vector<double> EM_S(len_EM_S);
-        std::vector<double> EM_F(len_EM_F);
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        IndexVector row_index(p.row_numShapesTotal);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        const double *DSDX=&(p.row_jac->DSDX[INDEX5(0,0,0,isub,e,p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
-                        std::fill(EM_S.begin(), EM_S.end(), 0);
-                        std::fill(EM_F.begin(), EM_F.end(), 0);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
-                        //////////////////
-                        // processing A //
-                        //////////////////
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        const double* DSDX = &(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
+                        std::fill(EM_S.begin(), EM_S.end(), zero);
+                        std::fill(EM_F.begin(), EM_F.end(), zero);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
+                        ///////////////
+                        // process A //
+                        ///////////////
                         if (!A.isEmpty()) {
-                            const double *A_p=A.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedA) {
-                                const double *A_q=&(A_p[INDEX6(0,0,0,0,0,isub,p.numEqu,DIM,p.numComp,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*(
+                                const Scalar* A_q = &A_p[INDEX6(0,0,0,0,0,isub,p.numEqu,DIM,p.numComp,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*(
                                                             DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
+                                                          + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -111,27 +111,27 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant A
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f00=0;
-                                        double f01=0;
-                                        double f10=0;
-                                        double f11=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f0=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            const double f1=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f00+=f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f01+=f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f10+=f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f11+=f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f00 = zero;
+                                        Scalar f01 = zero;
+                                        Scalar f10 = zero;
+                                        Scalar f11 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f0 = Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f1 = Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f00 += f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f01 += f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f10 += f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f11 += f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f00*A_p[INDEX4(k,0,m,0,p.numEqu,DIM,p.numComp)]
-                                                   +f01*A_p[INDEX4(k,0,m,1,p.numEqu,DIM,p.numComp)]
-                                                   +f10*A_p[INDEX4(k,1,m,0,p.numEqu,DIM,p.numComp)]
-                                                   +f11*A_p[INDEX4(k,1,m,1,p.numEqu,DIM,p.numComp)];
+                                                    f00 * A_p[INDEX4(k,0,m,0,p.numEqu,DIM,p.numComp)]
+                                                  + f01 * A_p[INDEX4(k,0,m,1,p.numEqu,DIM,p.numComp)]
+                                                  + f10 * A_p[INDEX4(k,1,m,0,p.numEqu,DIM,p.numComp)]
+                                                  + f11 * A_p[INDEX4(k,1,m,1,p.numEqu,DIM,p.numComp)];
                                             }
                                         }
                                     }
@@ -142,17 +142,17 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double *B_p=B.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedB) {
-                                const double *B_q=&(B_p[INDEX5(0,0,0,0,isub,p.numEqu,DIM,p.numComp,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(
+                                const Scalar* B_q = &B_p[INDEX5(0,0,0,0,isub,p.numEqu,DIM,p.numComp,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(
                                                             DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*B_q[INDEX4(k,0,m,q,p.numEqu,DIM,p.numComp)]
                                                           + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*B_q[INDEX4(k,1,m,q,p.numEqu,DIM,p.numComp)]);
                                                 }
@@ -162,20 +162,20 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant B
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0;
-                                        double f1=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f0*B_p[INDEX3(k,0,m,p.numEqu,DIM)]
-                                                   +f1*B_p[INDEX3(k,1,m,p.numEqu,DIM)];
+                                                    f0 * B_p[INDEX3(k,0,m,p.numEqu,DIM)]
+                                                  + f1 * B_p[INDEX3(k,1,m,p.numEqu,DIM)];
                                             }
                                         }
                                     }
@@ -186,19 +186,19 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double *C_p=C.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedC) {
-                                const double *C_q=&(C_p[INDEX5(0,0,0,0,isub,p.numEqu,p.numComp,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(
+                                const Scalar* C_q = &C_p[INDEX5(0,0,0,0,isub,p.numEqu,p.numComp,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(
                                                             C_q[INDEX4(k,m,0,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +C_q[INDEX4(k,m,1,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
+                                                          + C_q[INDEX4(k,m,1,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]);
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -206,20 +206,20 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant C
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0.;
-                                        double f1=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f0*C_p[INDEX3(k,m,0,p.numEqu,p.numComp)]
-                                                   +f1*C_p[INDEX3(k,m,1,p.numEqu,p.numComp)];
+                                                    f0 * C_p[INDEX3(k,m,0,p.numEqu,p.numComp)]
+                                                  + f1 * C_p[INDEX3(k,m,1,p.numEqu,p.numComp)];
                                             }
                                         }
                                     }
@@ -230,17 +230,17 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -248,13 +248,14 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f*D_p[INDEX2(k,m,p.numEqu)];
                                             }
                                         }
@@ -266,33 +267,33 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double *X_p=X.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedX) {
-                                const double *X_q=&(X_p[INDEX4(0,0,0,isub,p.numEqu,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*(
+                                const Scalar* X_q = &X_p[INDEX4(0,0,0,isub,p.numEqu,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*(
                                                     DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,0,q,p.numEqu,DIM)]
-                                                   +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,1,q,p.numEqu,DIM)]);
+                                                  + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,1,q,p.numEqu,DIM)]);
                                         }
-                                        EM_F[INDEX2(k,s,p.numEqu)]+=f;
+                                        EM_F[INDEX2(k,s,p.numEqu)] += f;
                                     }
                                 }
                             } else { // constant X
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f0=0;
-                                    double f1=0;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f0+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                        f1+=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f0 += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                        f1 += Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
                                     }
-                                    for (int k=0; k<p.numEqu; k++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
                                         EM_F[INDEX2(k,s,p.numEqu)] +=
-                                            f0*X_p[INDEX2(k,0,p.numEqu)]
-                                           +f1*X_p[INDEX2(k,1,p.numEqu)];
+                                            f0 * X_p[INDEX2(k,0,p.numEqu)]
+                                          + f1 * X_p[INDEX2(k,1,p.numEqu)];
                                     }
                                 }
                             }
@@ -301,42 +302,45 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX3(0,0,isub,p.numEqu,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
+                                const Scalar* Y_q = &Y_p[INDEX3(0,0,isub,p.numEqu,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
                                         EM_F[INDEX2(k,s,p.numEqu)]+=f;
                                     }
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                    for (int k=0; k<p.numEqu; k++)
-                                        EM_F[INDEX2(k,s,p.numEqu)]+=f*Y_p[k];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++)
+                                        EM_F[INDEX2(k,s,p.numEqu)] += f * Y_p[k];
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
                                     &row_index[0], p.numEqu, &EM_F[0], F_p,
                                     p.row_DOF_UpperBound);
-                        if (add_EM_S)
+                        }
+                        if (add_EM_S) {
                             Assemble_addToSystemMatrix(p.S,
                                     p.row_numShapesTotal, &row_index[0],
                                     p.numEqu, p.col_numShapesTotal,
                                     &row_index[0], p.numComp, &EM_S[0]);
+                        }
                     } // end of isub
                 } // end color check
             } // end element loop
@@ -344,5 +348,17 @@ void Assemble_PDE_System_2D(const AssembleParameters& p,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_System_2D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_System_2D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_System_3D.cpp b/finley/src/Assemble_PDE_System_3D.cpp
index b7d8681..22b26da 100644
--- a/finley/src/Assemble_PDE_System_3D.cpp
+++ b/finley/src/Assemble_PDE_System_3D.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles the system of numEqu PDEs into the stiffness matrix S and right
@@ -38,77 +37,78 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_System_3D(const AssembleParameters& p,
                             const escript::Data& A, const escript::Data& B,
                             const escript::Data& C, const escript::Data& D,
                             const escript::Data& X, const escript::Data& Y)
 {
     const int DIM = 3;
-    bool expandedA=A.actsExpanded();
-    bool expandedB=B.actsExpanded();
-    bool expandedC=C.actsExpanded();
-    bool expandedD=D.actsExpanded();
-    bool expandedX=X.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
-    if(!p.F.isEmpty()) {
+    bool expandedA = A.actsExpanded();
+    bool expandedB = B.actsExpanded();
+    bool expandedC = C.actsExpanded();
+    bool expandedD = D.actsExpanded();
+    bool expandedX = X.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
-    const size_t len_EM_S=p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
-    const size_t len_EM_F=p.row_numShapesTotal*p.numEqu;
+    const size_t len_EM_S = p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp;
+    const size_t len_EM_F = p.row_numShapesTotal*p.numEqu;
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(p.row_numShapesTotal);
-        std::vector<double> EM_S(len_EM_S);
-        std::vector<double> EM_F(len_EM_F);
+        std::vector<Scalar> EM_S(len_EM_S);
+        std::vector<Scalar> EM_F(len_EM_F);
+        IndexVector row_index(p.row_numShapesTotal);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        const double *DSDX=&(p.row_jac->DSDX[INDEX5(0,0,0,isub,e,p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
-                        std::fill(EM_S.begin(), EM_S.end(), 0);
-                        std::fill(EM_F.begin(), EM_F.end(), 0);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        const double* DSDX = &(p.row_jac->DSDX[INDEX5(0,0,0,isub,e, p.row_numShapesTotal,DIM,p.numQuadSub,p.numSub)]);
+                        std::fill(EM_S.begin(), EM_S.end(), zero);
+                        std::fill(EM_F.begin(), EM_F.end(), zero);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double *A_p=A.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedA) {
-                                const double *A_q=&(A_p[INDEX6(0,0,0,0,0,isub,p.numEqu,DIM,p.numComp,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*(
+                                const Scalar* A_q = &A_p[INDEX6(0,0,0,0,0,isub,p.numEqu,DIM,p.numComp,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*(
                                                             DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                           +DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
+                                                          + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,0,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,1,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,0,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,1,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                          + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*A_q[INDEX5(k,2,m,2,q,p.numEqu,DIM,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -116,45 +116,45 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant A
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f00=0;
-                                        double f01=0;
-                                        double f02=0;
-                                        double f10=0;
-                                        double f11=0;
-                                        double f12=0;
-                                        double f20=0;
-                                        double f21=0;
-                                        double f22=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f0=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f00+=f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f01+=f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f02+=f0*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f00 = zero;
+                                        Scalar f01 = zero;
+                                        Scalar f02 = zero;
+                                        Scalar f10 = zero;
+                                        Scalar f11 = zero;
+                                        Scalar f12 = zero;
+                                        Scalar f20 = zero;
+                                        Scalar f21 = zero;
+                                        Scalar f22 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f0 = Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f00 += f0*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f01 += f0*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f02 += f0*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
 
-                                            const double f1=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f10+=f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f11+=f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f12+=f1*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f1 = Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f10 += f1*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f11 += f1*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f12 += f1*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
 
-                                            const double f2=Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
-                                            f20+=f2*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f21+=f2*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f22+=f2*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                            const Scalar f2 = Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                            f20 += f2*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f21 += f2*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f22 += f2*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f00*A_p[INDEX4(k,0,m,0,p.numEqu,DIM,p.numComp)]
-                                                   +f01*A_p[INDEX4(k,0,m,1,p.numEqu,DIM,p.numComp)]
-                                                   +f02*A_p[INDEX4(k,0,m,2,p.numEqu,DIM,p.numComp)]
-                                                   +f10*A_p[INDEX4(k,1,m,0,p.numEqu,DIM,p.numComp)]
-                                                   +f11*A_p[INDEX4(k,1,m,1,p.numEqu,DIM,p.numComp)]
-                                                   +f12*A_p[INDEX4(k,1,m,2,p.numEqu,DIM,p.numComp)]
-                                                   +f20*A_p[INDEX4(k,2,m,0,p.numEqu,DIM,p.numComp)]
-                                                   +f21*A_p[INDEX4(k,2,m,1,p.numEqu,DIM,p.numComp)]
-                                                   +f22*A_p[INDEX4(k,2,m,2,p.numEqu,DIM,p.numComp)];
+                                                    f00 * A_p[INDEX4(k,0,m,0,p.numEqu,DIM,p.numComp)]
+                                                  + f01 * A_p[INDEX4(k,0,m,1,p.numEqu,DIM,p.numComp)]
+                                                  + f02 * A_p[INDEX4(k,0,m,2,p.numEqu,DIM,p.numComp)]
+                                                  + f10 * A_p[INDEX4(k,1,m,0,p.numEqu,DIM,p.numComp)]
+                                                  + f11 * A_p[INDEX4(k,1,m,1,p.numEqu,DIM,p.numComp)]
+                                                  + f12 * A_p[INDEX4(k,1,m,2,p.numEqu,DIM,p.numComp)]
+                                                  + f20 * A_p[INDEX4(k,2,m,0,p.numEqu,DIM,p.numComp)]
+                                                  + f21 * A_p[INDEX4(k,2,m,1,p.numEqu,DIM,p.numComp)]
+                                                  + f22 * A_p[INDEX4(k,2,m,2,p.numEqu,DIM,p.numComp)];
                                             }
                                         }
                                     }
@@ -165,17 +165,17 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double *B_p=B.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedB) {
-                                const double *B_q=&(B_p[INDEX5(0,0,0,0,isub,p.numEqu,DIM,p.numComp,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(
+                                const Scalar* B_q = &B_p[INDEX5(0,0,0,0,isub,p.numEqu,DIM,p.numComp,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(r,q,p.row_numShapes)]*(
                                                             DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*B_q[INDEX4(k,0,m,q,p.numEqu,DIM,p.numComp)]
                                                           + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*B_q[INDEX4(k,1,m,q,p.numEqu,DIM,p.numComp)]
                                                           + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*B_q[INDEX4(k,2,m,q,p.numEqu,DIM,p.numComp)]);
@@ -186,23 +186,23 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant B
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0;
-                                        double f1=0;
-                                        double f2=0;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                            f2+=f*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        Scalar f2 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(r,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                            f2 += f * DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f0*B_p[INDEX3(k,0,m,p.numEqu,DIM)]
-                                                   +f1*B_p[INDEX3(k,1,m,p.numEqu,DIM)]
-                                                   +f2*B_p[INDEX3(k,2,m,p.numEqu,DIM)];
+                                                    f0 * B_p[INDEX3(k,0,m,p.numEqu,DIM)]
+                                                  + f1 * B_p[INDEX3(k,1,m,p.numEqu,DIM)]
+                                                  + f2 * B_p[INDEX3(k,2,m,p.numEqu,DIM)];
                                             }
                                         }
                                     }
@@ -213,20 +213,20 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double *C_p=C.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedC) {
-                                const double *C_q=&(C_p[INDEX5(0,0,0,0,isub,p.numEqu,p.numComp,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(
+                                const Scalar* C_q = &C_p[INDEX5(0,0,0,0,isub,p.numEqu,p.numComp,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*(
                                                             C_q[INDEX4(k,m,0,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)]
-                                                           +C_q[INDEX4(k,m,1,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
-                                                           +C_q[INDEX4(k,m,2,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
+                                                          + C_q[INDEX4(k,m,1,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)]
+                                                          + C_q[INDEX4(k,m,2,q,p.numEqu,p.numComp,DIM)]*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)]);
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -234,23 +234,23 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant C
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f0=0.;
-                                        double f1=0.;
-                                        double f2=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            const double f=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                            f0+=f*DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
-                                            f1+=f*DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
-                                            f2+=f*DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f0 = zero;
+                                        Scalar f1 = zero;
+                                        Scalar f2 = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            const Scalar f = Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
+                                            f0 += f * DSDX[INDEX3(r,0,q,p.row_numShapesTotal,DIM)];
+                                            f1 += f * DSDX[INDEX3(r,1,q,p.row_numShapesTotal,DIM)];
+                                            f2 += f * DSDX[INDEX3(r,2,q,p.row_numShapesTotal,DIM)];
                                         }
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)] +=
-                                                    f0*C_p[INDEX3(k,m,0,p.numEqu,p.numComp)]
-                                                   +f1*C_p[INDEX3(k,m,1,p.numEqu,p.numComp)]
-                                                   +f2*C_p[INDEX3(k,m,2,p.numEqu,p.numComp)];
+                                                    f0 * C_p[INDEX3(k,m,0,p.numEqu,p.numComp)]
+                                                  + f1 * C_p[INDEX3(k,m,1,p.numEqu,p.numComp)]
+                                                  + f2 * C_p[INDEX3(k,m,2,p.numEqu,p.numComp)];
                                             }
                                         }
                                     }
@@ -261,17 +261,17 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double f=0.;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar f = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f;
                                             }
@@ -279,13 +279,14 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]+=f*D_p[INDEX2(k,m,p.numEqu)];
                                             }
                                         }
@@ -297,37 +298,37 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double *X_p=X.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedX) {
-                                const double *X_q=&(X_p[INDEX4(0,0,0,isub,p.numEqu,DIM,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++) {
-                                            f+=Vol[q]*(
+                                const Scalar* X_q = &X_p[INDEX4(0,0,0,isub,p.numEqu,DIM,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*(
                                                     DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,0,q,p.numEqu,DIM)]
-                                                   +DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,1,q,p.numEqu,DIM)]
-                                                   +DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,2,q,p.numEqu,DIM)]);
+                                                  + DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,1,q,p.numEqu,DIM)]
+                                                  + DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)]*X_q[INDEX3(k,2,q,p.numEqu,DIM)]);
                                         }
-                                        EM_F[INDEX2(k,s,p.numEqu)]+=f;
+                                        EM_F[INDEX2(k,s,p.numEqu)] += f;
                                     }
                                 }
                             } else { // constant X
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f0=0;
-                                    double f1=0;
-                                    double f2=0;
-                                    for (int q=0; q<p.numQuadSub; q++) {
-                                        f0+=Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
-                                        f1+=Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
-                                        f2+=Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f0 = zero;
+                                    Scalar f1 = zero;
+                                    Scalar f2 = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f0 += Vol[q]*DSDX[INDEX3(s,0,q,p.row_numShapesTotal,DIM)];
+                                        f1 += Vol[q]*DSDX[INDEX3(s,1,q,p.row_numShapesTotal,DIM)];
+                                        f2 += Vol[q]*DSDX[INDEX3(s,2,q,p.row_numShapesTotal,DIM)];
                                     }
-                                    for (int k=0; k<p.numEqu; k++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
                                         EM_F[INDEX2(k,s,p.numEqu)] +=
-                                            f0*X_p[INDEX2(k,0,p.numEqu)]
-                                           +f1*X_p[INDEX2(k,1,p.numEqu)]
-                                           +f2*X_p[INDEX2(k,2,p.numEqu)];
+                                            f0 * X_p[INDEX2(k,0,p.numEqu)]
+                                          + f1 * X_p[INDEX2(k,1,p.numEqu)]
+                                          + f2 * X_p[INDEX2(k,2,p.numEqu)];
                                     }
                                 }
                             }
@@ -336,42 +337,45 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX3(0,0,isub,p.numEqu,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        double f=0.;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
+                                const Scalar* Y_q = &Y_p[INDEX3(0,0,isub,p.numEqu,p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
                                         EM_F[INDEX2(k,s,p.numEqu)]+=f;
                                     }
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0.;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                    for (int k=0; k<p.numEqu; k++)
-                                        EM_F[INDEX2(k,s,p.numEqu)]+=f*Y_p[k];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++)
+                                        EM_F[INDEX2(k,s,p.numEqu)] += f * Y_p[k];
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
                                     &row_index[0], p.numEqu, &EM_F[0], F_p,
                                     p.row_DOF_UpperBound);
-                        if (add_EM_S)
+                        }
+                        if (add_EM_S) {
                             Assemble_addToSystemMatrix(p.S,
                                     p.row_numShapesTotal, &row_index[0],
                                     p.numEqu, p.col_numShapesTotal,
                                     &row_index[0], p.numComp, &EM_S[0]);
+                        }
                     } // end of isub
                 } // end color check
             } // end element loop
@@ -379,5 +383,17 @@ void Assemble_PDE_System_3D(const AssembleParameters& p,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_System_3D<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+template void Assemble_PDE_System_3D<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& A, const escript::Data& B,
+                            const escript::Data& C, const escript::Data& D,
+                            const escript::Data& X, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_PDE_System_C.cpp b/finley/src/Assemble_PDE_System_C.cpp
index dd7f63e..bf0a6cc 100644
--- a/finley/src/Assemble_PDE_System_C.cpp
+++ b/finley/src/Assemble_PDE_System_C.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assembles the system of numEqu PDEs into the stiffness matrix S and right
@@ -32,59 +31,57 @@
 
 *****************************************************************************/
 
-/*  Author: Lutz Gross, l.gross at uq.edu.au */
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
+template<typename Scalar>
 void Assemble_PDE_System_C(const AssembleParameters& p, const escript::Data& D,
                            const escript::Data& Y)
 {
-    bool expandedD=D.actsExpanded();
-    bool expandedY=Y.actsExpanded();
-    double *F_p=NULL;
-    if(!p.F.isEmpty()) {
+    bool expandedD = D.actsExpanded();
+    bool expandedY = Y.actsExpanded();
+    const Scalar zero = static_cast<Scalar>(0);
+    Scalar* F_p = NULL;
+    if (!p.F.isEmpty()) {
         p.F.requireWrite();
-        F_p=p.F.getSampleDataRW(0);
+        F_p = p.F.getSampleDataRW(0, zero);
     }
     const std::vector<double>& S(p.row_jac->BasisFunctions->S);
 
 #pragma omp parallel
     {
-        std::vector<index_t> row_index(p.row_numShapesTotal);
-        std::vector<double> EM_S(p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp);
-        std::vector<double> EM_F(p.row_numShapesTotal*p.numEqu);
+        std::vector<Scalar> EM_S(p.row_numShapesTotal*p.col_numShapesTotal*p.numEqu*p.numComp);
+        std::vector<Scalar> EM_F(p.row_numShapesTotal*p.numEqu);
+        IndexVector row_index(p.row_numShapesTotal);
 
-        for (int color=p.elements->minColor; color<=p.elements->maxColor; color++) {
-            // loop over all elements:
+        for (index_t color = p.elements->minColor; color <= p.elements->maxColor; color++) {
+            // loop over all elements
 #pragma omp for
-            for (index_t e=0; e<p.elements->numElements; e++) {
-                if (p.elements->Color[e]==color) {
-                    for (int isub=0; isub<p.numSub; isub++) {
-                        const double *Vol=&(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
-                        bool add_EM_F=false;
-                        bool add_EM_S=false;
+            for (index_t e = 0; e < p.elements->numElements; e++) {
+                if (p.elements->Color[e] == color) {
+                    for (int isub = 0; isub < p.numSub; isub++) {
+                        const double* Vol = &(p.row_jac->volume[INDEX3(0,isub,e,p.numQuadSub,p.numSub)]);
+                        bool add_EM_F = false;
+                        bool add_EM_S = false;
                         ///////////////
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double *D_p=D.getSampleDataRO(e);
-                            add_EM_S=true;
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
+                            add_EM_S = true;
                             if (expandedD) {
-                                const double *D_q=&(D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp, p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                double val=0;
-                                                for (int q=0; q<p.numQuadSub; q++) {
-                                                    val+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
+                                const Scalar* D_q = &D_p[INDEX4(0,0,0,isub,p.numEqu,p.numComp, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                Scalar val = zero;
+                                                for (int q = 0; q < p.numQuadSub; q++) {
+                                                    val += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*D_q[INDEX3(k,m,q,p.numEqu,p.numComp)]*S[INDEX2(r,q,p.row_numShapes)];
                                                 }
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]= val;
                                                 EM_S[INDEX4(k,m,s,r+p.col_numShapes,p.numEqu,p.numComp,p.row_numShapesTotal)]=-val;
@@ -95,14 +92,15 @@ void Assemble_PDE_System_C(const AssembleParameters& p, const escript::Data& D,
                                     }
                                 }
                             } else { // constant D
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int r=0; r<p.col_numShapes; r++) {
-                                        double f=0;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
-                                        for (int k=0; k<p.numEqu; k++) {
-                                            for (int m=0; m<p.numComp; m++) {
-                                                const double fD=f*D_p[INDEX2(k,m,p.numEqu)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int r = 0; r < p.col_numShapes; r++) {
+                                        Scalar f = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++) {
+                                            f += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*S[INDEX2(r,q,p.row_numShapes)];
+                                        }
+                                        for (int k = 0; k < p.numEqu; k++) {
+                                            for (int m = 0; m < p.numComp; m++) {
+                                                const Scalar fD = f * D_p[INDEX2(k,m,p.numEqu)];
                                                 EM_S[INDEX4(k,m,s,r,p.numEqu,p.numComp,p.row_numShapesTotal)]= fD;
                                                 EM_S[INDEX4(k,m,s,r+p.col_numShapes,p.numEqu,p.numComp,p.row_numShapesTotal)]=-fD;
                                                 EM_S[INDEX4(k,m,s+p.row_numShapes,r,p.numEqu,p.numComp,p.row_numShapesTotal)]=-fD;
@@ -117,40 +115,42 @@ void Assemble_PDE_System_C(const AssembleParameters& p, const escript::Data& D,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double *Y_p=Y.getSampleDataRO(e);
-                            add_EM_F=true;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            add_EM_F = true;
                             if (expandedY) {
-                                const double *Y_q=&(Y_p[INDEX3(0,0,isub, p.numEqu,p.numQuadSub)]);
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        double val=0;
-                                        for (int q=0; q<p.numQuadSub; q++)
-                                            val+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
-                                        EM_F[INDEX2(k,s,p.numEqu)]=-val;
-                                        EM_F[INDEX2(k,s+p.row_numShapes,p.numEqu)]= val;
+                                const Scalar* Y_q = &Y_p[INDEX3(0, 0, isub, p.numEqu, p.numQuadSub)];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        Scalar val = zero;
+                                        for (int q = 0; q < p.numQuadSub; q++)
+                                            val += Vol[q]*S[INDEX2(s,q,p.row_numShapes)]*Y_q[INDEX2(k,q,p.numEqu)];
+                                        EM_F[INDEX2(k,s,p.numEqu)] = -val;
+                                        EM_F[INDEX2(k,s+p.row_numShapes,p.numEqu)] = val;
                                     }
                                 }
                             } else { // constant Y
-                                for (int s=0; s<p.row_numShapes; s++) {
-                                    double f=0;
-                                    for (int q=0; q<p.numQuadSub; q++)
-                                        f+=Vol[q]*S[INDEX2(s,q,p.row_numShapes)];
-                                    for (int k=0; k<p.numEqu; k++) {
-                                        EM_F[INDEX2(k,s,p.numEqu)]=-f*Y_p[k];
-                                        EM_F[INDEX2(k,s+p.row_numShapes,p.numEqu)]=f*Y_p[k];
+                                for (int s = 0; s < p.row_numShapes; s++) {
+                                    Scalar f = zero;
+                                    for (int q = 0; q < p.numQuadSub; q++) {
+                                        f += Vol[q] * S[INDEX2(s,q,p.row_numShapes)];
+                                    }
+                                    for (int k = 0; k < p.numEqu; k++) {
+                                        EM_F[INDEX2(k,s,p.numEqu)] = -f * Y_p[k];
+                                        EM_F[INDEX2(k,s+p.row_numShapes,p.numEqu)] = f * Y_p[k];
                                     }
                                 }
                             }
                         }
                         // add the element matrices onto the matrix and
                         // right hand side
-                        for (int q=0; q<p.row_numShapesTotal; q++)
-                            row_index[q]=p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
-               
-                        if (add_EM_F)
+                        for (int q = 0; q < p.row_numShapesTotal; q++) {
+                            row_index[q] = p.row_DOF[p.elements->Nodes[INDEX2(p.row_node[INDEX2(q,isub,p.row_numShapesTotal)],e,p.NN)]];
+                        }
+                        if (add_EM_F) {
                             util::addScatter(p.row_numShapesTotal,
                                     &row_index[0], p.numEqu, &EM_F[0], F_p,
                                     p.row_DOF_UpperBound);
+                        }
                         if (add_EM_S)
                             Assemble_addToSystemMatrix(p.S,
                                     p.row_numShapesTotal, &row_index[0],
@@ -163,5 +163,13 @@ void Assemble_PDE_System_C(const AssembleParameters& p, const escript::Data& D,
     } // end parallel region
 }
 
+// instantiate our two supported versions
+template void Assemble_PDE_System_C<escript::DataTypes::real_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& D, const escript::Data& Y);
+template void Assemble_PDE_System_C<escript::DataTypes::cplx_t>(
+                            const AssembleParameters& p,
+                            const escript::Data& D, const escript::Data& Y);
+
 } // namespace finley
 
diff --git a/finley/src/Assemble_addToSystemMatrix.cpp b/finley/src/Assemble_addToSystemMatrix.cpp
index 7b20009..5121593 100644
--- a/finley/src/Assemble_addToSystemMatrix.cpp
+++ b/finley/src/Assemble_addToSystemMatrix.cpp
@@ -14,121 +14,156 @@
 *
 *****************************************************************************/
 
+#include "Assemble.h"
 
-/****************************************************************************
-
-  Finley: SystemMatrix
-
-  Adds the matrix array[Equa,Sol,NN,NN] to the matrix in.
-  The rows/columns are given by
-    i_Equa+Equa*Nodes_Equa[Nodes[j_Equa]] (i_Equa=0:Equa; j_Equa=0:NN_Equa).
-
-  The routine has to be called from a parallel region.
-  This routine assumes that in->Equa=in->Sol=1, i.e. array is fully packed.
-  TODO: the case in->Equa!=1
-  WARNING: MATRIX_FORMAT_CSC is not supported under MPI!
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
 
-*****************************************************************************/
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/TrilinosMatrixAdapter.h>
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+using esys_trilinos::TrilinosMatrixAdapter;
+#endif
 
+namespace finley {
 
-#include "Assemble.h"
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
 
-namespace finley {
+#ifdef ESYS_HAVE_PASO
+static void addToSystemMatrixPasoCSC(paso::SystemMatrix* S, int NN_Equa,
+                                     const index_t* Nodes_Equa, int num_Equa,
+                                     int NN_Sol, const index_t* Nodes_Sol,
+                                     int num_Sol, const real_t* array);
 
-void Assemble_addToSystemMatrix_CSC(paso::SystemMatrix_ptr in, int NN_Equa,
+static void addToSystemMatrixPasoCSR(paso::SystemMatrix* S, int NN_Equa,
                                     const index_t* Nodes_Equa, int num_Equa,
                                     int NN_Sol, const index_t* Nodes_Sol,
-                                    int num_Sol, const double* array);
-
-void Assemble_addToSystemMatrix_Trilinos(paso::SystemMatrix_ptr in,
-                        int NN_Equa, const index_t* Nodes_Equa, int num_Equa,
-                        int NN_Sol, const index_t* Nodes_Sol, int num_Sol,
-                        const double* array);
+                                    int num_Sol, const real_t* array);
+#endif
 
-void Assemble_addToSystemMatrix_CSR(paso::SystemMatrix_ptr in, int NN_Equa,
+template<>
+void Assemble_addToSystemMatrix<real_t>(escript::ASM_ptr S, int NN_Equa,
                                     const index_t* Nodes_Equa, int num_Equa,
                                     int NN_Sol, const index_t* Nodes_Sol,
-                                    int num_Sol, const double* array);
-
-void Assemble_addToSystemMatrix(paso::SystemMatrix_ptr in, int NN_Equa,
-                                const index_t* Nodes_Equa, int num_Equa,
-                                int NN_Sol, const index_t* Nodes_Sol,
-                                int num_Sol, const double* array)
+                                    int num_Sol, const real_t* array)
 {
-    // call the right function depending on storage type
-    if (in->type & MATRIX_FORMAT_CSC) {
-        Assemble_addToSystemMatrix_CSC(in, NN_Equa, Nodes_Equa,
-                                  num_Equa, NN_Sol, Nodes_Sol, num_Sol, array);
-    } else if (in->type & MATRIX_FORMAT_TRILINOS_CRS) {
-        Assemble_addToSystemMatrix_Trilinos(in, NN_Equa, Nodes_Equa,
-                                  num_Equa, NN_Sol, Nodes_Sol, num_Sol, array);
-    } else { // type == CSR
-        Assemble_addToSystemMatrix_CSR(in, NN_Equa, Nodes_Equa,
-                                  num_Equa, NN_Sol, Nodes_Sol, num_Sol, array);
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrix* pmat = dynamic_cast<paso::SystemMatrix*>(S.get());
+    if (pmat) {
+        // call the right function depending on storage type
+        if (pmat->type & MATRIX_FORMAT_CSC) {
+            addToSystemMatrixPasoCSC(pmat, NN_Equa, Nodes_Equa,
+                                     num_Equa, NN_Sol, Nodes_Sol,
+                                     num_Sol, array);
+        } else { // type == CSR
+            addToSystemMatrixPasoCSR(pmat, NN_Equa, Nodes_Equa,
+                                     num_Equa, NN_Sol, Nodes_Sol,
+                                     num_Sol, array);
+        }
+        return;
     }
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tmat(dynamic_cast<TrilinosMatrixAdapter*>(S.get()));
+    if (tmat) {
+        IndexVector rowIdx(Nodes_Equa, Nodes_Equa+NN_Equa);
+        //IndexVector colIdx(Nodes_Sol, Nodes_Sol+NN_Sol);
+        std::vector<real_t> arr(array, array+(NN_Equa*NN_Sol*num_Sol*num_Equa));
+        tmat->add(rowIdx, arr);
+        return;
+    }
+#endif
+    throw FinleyException("Assemble_addToSystemMatrix: unknown system "
+                          "matrix type.");
 }
 
-void Assemble_addToSystemMatrix_CSC(paso::SystemMatrix_ptr in, int NN_Equa,
+template<>
+void Assemble_addToSystemMatrix<cplx_t>(escript::ASM_ptr S, int NN_Equa,
                                     const index_t* Nodes_Equa, int num_Equa,
                                     int NN_Sol, const index_t* Nodes_Sol,
-                                    int num_Sol, const double* array)
+                                    int num_Sol, const cplx_t* array)
 {
-    const int index_offset=(in->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
-    const int row_block_size=in->row_block_size;
-    const int col_block_size=in->col_block_size;
-    const int block_size=in->block_size;
-    const int num_subblocks_Equa=num_Equa/row_block_size;
-    const int num_subblocks_Sol=num_Sol/col_block_size;
-    const dim_t numMyCols=in->pattern->mainPattern->numInput;
-    const dim_t numMyRows=in->pattern->mainPattern->numOutput;
-    const index_t *mainBlock_ptr=in->mainBlock->pattern->ptr;
-    const index_t *mainBlock_index=in->mainBlock->pattern->index;
-    double *mainBlock_val=in->mainBlock->val;
-    const index_t *col_coupleBlock_ptr=in->col_coupleBlock->pattern->ptr;
-    const index_t *col_coupleBlock_index=in->col_coupleBlock->pattern->index;
-    double *col_coupleBlock_val=in->col_coupleBlock->val;
-    //const index_t *row_coupleBlock_ptr=in->row_coupleBlock->pattern->ptr;
-    const index_t *row_coupleBlock_index=in->row_coupleBlock->pattern->index;
-    double *row_coupleBlock_val=in->row_coupleBlock->val;
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tmat = dynamic_cast<TrilinosMatrixAdapter*>(S.get());
+    if (tmat) {
+        IndexVector rowIdx(Nodes_Equa, Nodes_Equa+NN_Equa);
+        //IndexVector colIdx(Nodes_Sol, Nodes_Sol+NN_Sol);
+        std::vector<cplx_t> arr(array, array+(NN_Equa*NN_Sol*num_Sol*num_Equa));
+        tmat->add(rowIdx, arr);
+        return;
+    }
+#endif
+    throw FinleyException("addToSystemMatrix: only Trilinos matrices support "
+                          "complex-valued assembly!");
+}
+
+#ifdef ESYS_HAVE_PASO
+void addToSystemMatrixPasoCSC(paso::SystemMatrix* in, int NN_Equa,
+                              const index_t* Nodes_Equa, int num_Equa,
+                              int NN_Sol, const index_t* Nodes_Sol,
+                              int num_Sol, const real_t* array)
+{
+    const int index_offset = (in->type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
+    const int row_block_size = in->row_block_size;
+    const int col_block_size = in->col_block_size;
+    const int block_size = in->block_size;
+    const int num_subblocks_Equa = num_Equa/row_block_size;
+    const int num_subblocks_Sol = num_Sol/col_block_size;
+    const dim_t numMyCols = in->pattern->mainPattern->numInput;
+    const dim_t numMyRows = in->pattern->mainPattern->numOutput;
 
-    for (int k_Sol=0; k_Sol<NN_Sol; ++k_Sol) {
+    const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
+    const index_t* mainBlock_index = in->mainBlock->pattern->index;
+    real_t* mainBlock_val = in->mainBlock->val;
+    const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
+    const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
+    real_t* col_coupleBlock_val = in->col_coupleBlock->val;
+    //const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
+    const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
+    real_t* row_coupleBlock_val = in->row_coupleBlock->val;
+
+    for (int k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
         // Down columns of array
-        const index_t j_Sol=Nodes_Sol[k_Sol];
-        for (int l_col=0; l_col<num_subblocks_Sol; ++l_col) {
-            const index_t i_col=j_Sol*num_subblocks_Sol+l_col;
+        const index_t j_Sol = Nodes_Sol[k_Sol];
+        for (int l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
+            const index_t i_col = j_Sol * num_subblocks_Sol + l_col;
             if (i_col < numMyCols) {
-                for (int k_Equa=0;k_Equa<NN_Equa;++k_Equa) {
+                for (int k_Equa = 0; k_Equa < NN_Equa; ++k_Equa) {
                     // Across cols of array
-                    const index_t j_Equa=Nodes_Equa[k_Equa];
-                    for (int l_row=0; l_row<num_subblocks_Equa; ++l_row) {
-                        const index_t i_row=j_Equa*num_subblocks_Equa+index_offset+l_row;
+                    const index_t j_Equa = Nodes_Equa[k_Equa];
+                    for (int l_row = 0; l_row < num_subblocks_Equa; ++l_row) {
+                        const index_t i_row = j_Equa*num_subblocks_Equa+index_offset+l_row;
                         if (i_row < numMyRows + index_offset ) {
-                            for (index_t k=mainBlock_ptr[i_col]-index_offset; k<mainBlock_ptr[i_col+1]-index_offset; ++k) {
-                                if (mainBlock_index[k]==i_row) {
-                                    // Entry array(k_Equa, j_Sol) is a block (col_block_size x col_block_size)
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const int i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const index_t i_Equa=ir+row_block_size*l_row;
-                                            mainBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                    array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                            for (index_t k = mainBlock_ptr[i_col]-index_offset;
+                                 k < mainBlock_ptr[i_col + 1]-index_offset; ++k) {
+                                if (mainBlock_index[k] == i_row) {
+                                    // Entry array(k_Equa, j_Sol) is a block
+                                    // (col_block_size x col_block_size)
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            mainBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
                                 }
                             }
                         } else {
-                            for (index_t k=col_coupleBlock_ptr[i_col]-index_offset; k<col_coupleBlock_ptr[i_col+1]-index_offset; ++k) {
-                                if (row_coupleBlock_index[k] == i_row-numMyRows) {
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const int i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const index_t i_Equa=ir+row_block_size*l_row;
-                                            row_coupleBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                            for (index_t k = col_coupleBlock_ptr[i_col]-index_offset;
+                                 k < col_coupleBlock_ptr[i_col + 1]-index_offset; ++k) {
+                                if (row_coupleBlock_index[k] == i_row - numMyRows) {
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            row_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
@@ -138,20 +173,22 @@ void Assemble_addToSystemMatrix_CSC(paso::SystemMatrix_ptr in, int NN_Equa,
                     }
                 }
             } else { // i_col >= numMyCols
-                for (int k_Equa=0;k_Equa<NN_Equa;++k_Equa) {
+                for (int k_Equa = 0; k_Equa < NN_Equa; ++k_Equa) {
                     // Across rows of array
-                    const index_t j_Equa=Nodes_Equa[k_Equa];
-                    for (int l_row=0; l_row<num_subblocks_Equa; ++l_row) {
-                        const index_t i_row=j_Equa*num_subblocks_Equa+index_offset+l_row;
-                        if (i_row < numMyRows + index_offset ) {
-                            for (index_t k=col_coupleBlock_ptr[i_col-numMyCols]-index_offset; k<col_coupleBlock_ptr[i_col-numMyCols+1]-index_offset; ++k) {
+                    const index_t j_Equa = Nodes_Equa[k_Equa];
+                    for (int l_row = 0; l_row < num_subblocks_Equa; ++l_row) {
+                        const index_t i_row = j_Equa * num_subblocks_Equa + index_offset + l_row;
+                        if (i_row < numMyRows + index_offset) {
+                            for (index_t k = col_coupleBlock_ptr[i_col-numMyCols]-index_offset;
+                                 k < col_coupleBlock_ptr[i_col - numMyCols + 1] - index_offset; ++k) {
                                 if (col_coupleBlock_index[k] == i_row) {
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const int i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const int i_Equa=ir+row_block_size*l_row;
-                                            col_coupleBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            col_coupleBlock_val[k*block_size + ir + row_block_size*ic] +=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
@@ -165,66 +202,35 @@ void Assemble_addToSystemMatrix_CSC(paso::SystemMatrix_ptr in, int NN_Equa,
     }
 }
 
-void Assemble_addToSystemMatrix_Trilinos(paso::SystemMatrix_ptr in,
-                                         int NN_Equa, const index_t* Nodes_Equa,
-                                         int num_Equa, int NN_Sol,
-                                         const index_t* Nodes_Sol, int num_Sol,
-                                         const double* array)
+void addToSystemMatrixPasoCSR(paso::SystemMatrix* in, int NN_Equa,
+                              const index_t* Nodes_Equa, int num_Equa,
+                              int NN_Sol, const index_t* Nodes_Sol,
+                              int num_Sol, const real_t* array)
 {
-    // FIXME: this needs to be modified
-#ifdef TRILINOS
-    const int index_offset=(in->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
-    const int row_block_size=in->row_block_size;
-    const int col_block_size=in->col_block_size;
-    const int num_subblocks_Equa=num_Equa/row_block_size;
-    for (int k_Equa=0;k_Equa<NN_Equa;++k_Equa) { // Down columns of array
-        const int j_Equa=Nodes_Equa[k_Equa];
-        if (j_Equa < in->mainBlock->pattern->output_node_distribution->numLocal) {
-            for (int k_Sol=0; k_Sol<NN_Sol; ++k_Sol) {
-                // Across rows of array
-                const int j_Sol=Nodes_Sol[k_Sol];
-                for (int l_row=0; l_row<num_subblocks_Equa; ++l_row) {
-                    const int irow=j_Equa*row_block_size+l_row;
-                    for (int l_col=0; l_col<col_block_size; ++l_col) {
-                        const int icol=j_Sol*col_block_size+index_offset+l_col;
-                        // irow is local and icol is global
-                        Trilinos_SumIntoMyValues(in->trilinos_data, irow, icol, array[INDEX4(l_row,l_col,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)]);
-                    }
-                }
-            }
-        }
-    }
-#endif
-}
+    const int index_offset = (in->type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
+    const int row_block_size = in->row_block_size;
+    const int col_block_size = in->col_block_size;
+    const int block_size = in->block_size;
+    const int num_subblocks_Equa = num_Equa / row_block_size;
+    const int num_subblocks_Sol = num_Sol / col_block_size;
+    const dim_t numMyCols = in->pattern->mainPattern->numInput;
+    const dim_t numMyRows = in->pattern->mainPattern->numOutput;
 
-void Assemble_addToSystemMatrix_CSR(paso::SystemMatrix_ptr in, int NN_Equa,
-                                    const index_t* Nodes_Equa, int num_Equa,
-                                    int NN_Sol, const index_t* Nodes_Sol,
-                                    int num_Sol, const double* array)
-{
-    const int index_offset=(in->type & MATRIX_FORMAT_OFFSET1 ? 1:0);
-    const int row_block_size=in->row_block_size;
-    const int col_block_size=in->col_block_size;
-    const int block_size=in->block_size;
-    const int num_subblocks_Equa=num_Equa/row_block_size;
-    const int num_subblocks_Sol=num_Sol/col_block_size;
-    const dim_t numMyCols=in->pattern->mainPattern->numInput;
-    const dim_t numMyRows=in->pattern->mainPattern->numOutput;
-    const index_t *mainBlock_ptr=in->mainBlock->pattern->ptr;
-    const index_t *mainBlock_index=in->mainBlock->pattern->index;
-    double *mainBlock_val=in->mainBlock->val;
-    const index_t *col_coupleBlock_ptr=in->col_coupleBlock->pattern->ptr;
-    const index_t *col_coupleBlock_index=in->col_coupleBlock->pattern->index;
-    double *col_coupleBlock_val=in->col_coupleBlock->val;
-    const index_t *row_coupleBlock_ptr=in->row_coupleBlock->pattern->ptr;
-    const index_t *row_coupleBlock_index=in->row_coupleBlock->pattern->index;
-    double *row_coupleBlock_val=in->row_coupleBlock->val;
+    const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
+    const index_t* mainBlock_index = in->mainBlock->pattern->index;
+    real_t* mainBlock_val = in->mainBlock->val;
+    const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
+    const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
+    real_t* col_coupleBlock_val = in->col_coupleBlock->val;
+    const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
+    const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
+    real_t* row_coupleBlock_val = in->row_coupleBlock->val;
 
-    for (int k_Equa=0; k_Equa<NN_Equa; ++k_Equa) {
+    for (int k_Equa = 0; k_Equa < NN_Equa; ++k_Equa) {
         // Down columns of array
-        const index_t j_Equa=Nodes_Equa[k_Equa];
-        for (int l_row=0; l_row<num_subblocks_Equa; ++l_row) {
-            const index_t i_row=j_Equa*num_subblocks_Equa+l_row;
+        const index_t j_Equa = Nodes_Equa[k_Equa];
+        for (int l_row = 0; l_row<num_subblocks_Equa; ++l_row) {
+            const index_t i_row = j_Equa*num_subblocks_Equa+l_row;
             // only look at the matrix rows stored on this processor
             if (i_row < numMyRows) {
                 for (int k_Sol=0; k_Sol<NN_Sol; ++k_Sol) {
@@ -232,34 +238,38 @@ void Assemble_addToSystemMatrix_CSR(paso::SystemMatrix_ptr in, int NN_Equa,
                     const index_t j_Sol=Nodes_Sol[k_Sol];
                     for (int l_col=0; l_col<num_subblocks_Sol; ++l_col) {
                         // only look at the matrix rows stored on this processor
-                        const index_t i_col=j_Sol*num_subblocks_Sol+index_offset+l_col;
-                        if (i_col < numMyCols + index_offset ) {
-                            for (index_t k=mainBlock_ptr[i_row]-index_offset; k<mainBlock_ptr[i_row+1]-index_offset; ++k) {
-                                if (mainBlock_index[k]==i_col) {
+                        const index_t i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
+                        if (i_col < numMyCols + index_offset) {
+                            for (index_t k = mainBlock_ptr[i_row] - index_offset;
+                                 k < mainBlock_ptr[i_row + 1] - index_offset; ++k) {
+                                if (mainBlock_index[k] == i_col) {
                                     // Entry array(k_Sol, j_Equa) is a block
                                     // (row_block_size x col_block_size)
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const int i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const index_t i_Equa=ir+row_block_size*l_row;
-                                            mainBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                  array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            mainBlock_val[k*block_size + ir + row_block_size*ic]+=
+                                                  array[INDEX4
+                                                    (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
                                 }
                             }
                         } else {
-                            for (index_t k=col_coupleBlock_ptr[i_row]-index_offset; k<col_coupleBlock_ptr[i_row+1]-index_offset; ++k) {
-                                if (col_coupleBlock_index[k] == i_col-numMyCols) {
+                            for (index_t k = col_coupleBlock_ptr[i_row] - index_offset;
+                                 k < col_coupleBlock_ptr[i_row + 1] - index_offset; ++k) {
+                                if (col_coupleBlock_index[k] == i_col - numMyCols) {
                                     // Entry array(k_Sol, j_Equa) is a block
                                     // (row_block_size x col_block_size)
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const index_t i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const index_t i_Equa=ir+row_block_size*l_row;
-                                            col_coupleBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                  array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir+row_block_size*l_row;
+                                            col_coupleBlock_val[k*block_size + ir + row_block_size*ic]+=
+                                                  array[INDEX4
+                                                    (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
@@ -269,22 +279,24 @@ void Assemble_addToSystemMatrix_CSR(paso::SystemMatrix_ptr in, int NN_Equa,
                     }
                 }
             } else { // i_row >= numMyRows
-                for (int k_Sol=0; k_Sol<NN_Sol; ++k_Sol) {
+                for (int k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
                     // Across rows of array
-                    const index_t j_Sol=Nodes_Sol[k_Sol];
-                    for (int l_col=0; l_col<num_subblocks_Sol; ++l_col) {
-                        const index_t i_col=j_Sol*num_subblocks_Sol+index_offset+l_col;
-                        if (i_col < numMyCols + index_offset ) {
-                            for (index_t k=row_coupleBlock_ptr[i_row-numMyRows]-index_offset; k<row_coupleBlock_ptr[i_row-numMyRows+1]-index_offset; ++k) {
+                    const index_t j_Sol = Nodes_Sol[k_Sol];
+                    for (int l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
+                        const index_t i_col = j_Sol * num_subblocks_Sol + index_offset + l_col;
+                        if (i_col < numMyCols + index_offset) {
+                            for (index_t k = row_coupleBlock_ptr[i_row - numMyRows] - index_offset;
+                                 k < row_coupleBlock_ptr[i_row - numMyRows + 1] - index_offset; ++k) {
                                 if (row_coupleBlock_index[k] == i_col) {
                                     // Entry array(k_Sol, j_Equa) is a block
                                     // (row_block_size x col_block_size)
-                                    for (int ic=0; ic<col_block_size; ++ic) {
-                                        const int i_Sol=ic+col_block_size*l_col;
-                                        for (int ir=0; ir<row_block_size; ++ir) {
-                                            const index_t i_Equa=ir+row_block_size*l_row;
-                                            row_coupleBlock_val[k*block_size+ir+row_block_size*ic]+=
-                                                  array[INDEX4(i_Equa,i_Sol,k_Equa,k_Sol,num_Equa,num_Sol,NN_Equa)];
+                                    for (int ic = 0; ic < col_block_size; ++ic) {
+                                        const int i_Sol = ic + col_block_size * l_col;
+                                        for (int ir = 0; ir < row_block_size; ++ir) {
+                                            const int i_Eq = ir + row_block_size * l_row;
+                                            row_coupleBlock_val[k*block_size + ir + row_block_size*ic]+=
+                                                array[INDEX4
+                                                  (i_Eq, i_Sol, k_Equa, k_Sol, num_Equa, num_Sol, NN_Equa)];
                                         }
                                     }
                                     break;
@@ -297,6 +309,7 @@ void Assemble_addToSystemMatrix_CSR(paso::SystemMatrix_ptr in, int NN_Equa,
         }
     }
 }
+#endif // ESYS_HAVE_PASO
 
 } // namespace finley
 
diff --git a/finley/src/Assemble_getAssembleParameters.cpp b/finley/src/Assemble_getAssembleParameters.cpp
index a035979..1bcf3b9 100644
--- a/finley/src/Assemble_getAssembleParameters.cpp
+++ b/finley/src/Assemble_getAssembleParameters.cpp
@@ -14,174 +14,179 @@
 *
 *****************************************************************************/
 
-
 /****************************************************************************
 
   Assemblage routines: prepares the assemble parameter set
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
+
+using escript::ValueError;
+
 namespace finley {
 
 AssembleParameters::AssembleParameters(const NodeFile* nodes,
                                        const ElementFile* ef,
-                                       paso::SystemMatrix_ptr sm,
+                                       escript::ASM_ptr sm,
                                        escript::Data& rhs,
-                                       bool reducedOrder)
-    : elements(ef),
-      S(sm),
-      F(rhs)
+                                       bool reducedOrder) :
+    elements(ef),
+    S(sm),
+    F(rhs)
 {
-    int numSub, numQuadSub;
-    resetError();
-
     if (!rhs.isEmpty() && !rhs.actsExpanded()) {
-        setError(TYPE_ERROR, "AssembleParameters: Right hand side is not expanded.");
-        return;
+        throw ValueError("AssembleParameters: Right hand side is not expanded.");
     }
-    // check the dimensions of S and rhs
-    if (sm.get()!=NULL && !rhs.isEmpty()) {
-        if (!rhs.numSamplesEqual(1, (sm->row_distribution->getMyNumComponents()
-                           * sm->row_block_size)/sm->logical_row_block_size)) {
-            setError(TYPE_ERROR, "AssembleParameters: number of rows of matrix and length of right hand side don't match.");
-            return;
+
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrix* pasoMat = sm ?
+        dynamic_cast<paso::SystemMatrix*>(sm.get()) : NULL;
+
+    // check the dimensions of matrix and rhs
+    if (pasoMat != NULL && !rhs.isEmpty()) {
+        const dim_t numRows = pasoMat->row_distribution->getMyNumComponents()*pasoMat->row_block_size;
+        if (!rhs.numSamplesEqual(1, numRows/pasoMat->logical_row_block_size)) {
+            throw ValueError("AssembleParameters: number of rows of matrix "
+                             "and length of right hand side don't match.");
         }
     }
+#endif
 
     // get the number of equations and components
-    if (sm.get()==NULL) {
+    if (sm == NULL) {
         if (rhs.isEmpty()) {
-            this->numEqu=1;
-            this->numComp=1;
+            numEqu = numComp = 1;
         } else {
-            this->numEqu=rhs.getDataPointSize();
-            this->numComp=this->numEqu;
+            numEqu = numComp = rhs.getDataPointSize();
         }
     } else {
-        if (rhs.isEmpty()) {
-            this->numEqu=sm->logical_row_block_size;
-            this->numComp=sm->logical_col_block_size;
-        } else {
-            if (rhs.getDataPointSize() != sm->logical_row_block_size) {
-                setError(TYPE_ERROR,"AssembleParameters: matrix row block size and number of components of right hand side don't match.");
-                return;
-            }
-            this->numEqu=sm->logical_row_block_size;
-            this->numComp=sm->logical_col_block_size;
+        if (!rhs.isEmpty() && rhs.getDataPointSize() != sm->getRowBlockSize()) {
+            throw ValueError("AssembleParameters: matrix row block size and "
+                      "number of components of right hand side don't match.");
         }
+        numEqu = sm->getRowBlockSize();
+        numComp = sm->getColumnBlockSize();
     }
-    this->col_DOF=nodes->borrowTargetDegreesOfFreedom();
-    this->row_DOF=nodes->borrowTargetDegreesOfFreedom();
+    // set some defaults
+    row_DOF = nodes->borrowTargetDegreesOfFreedom();
+    row_DOF_UpperBound = nodes->getNumDegreesOfFreedom();
+    row_jac = ef->borrowJacobians(nodes, false, reducedOrder);
+    col_DOF = row_DOF;
+    col_DOF_UpperBound = row_DOF_UpperBound;
+    col_jac = row_jac;
+
+#ifdef ESYS_HAVE_PASO
     // get the information for the labeling of the degrees of freedom from
     // the matrix
-    if (sm.get()) {
+    if (pasoMat) {
         // Make sure # rows in matrix == num DOF for one of:
         // full or reduced (use numLocalDOF for MPI)
-        if (sm->row_distribution->getMyNumComponents()*sm->row_block_size ==
-                this->numEqu*nodes->getNumDegreesOfFreedom()) {
-            this->row_DOF_UpperBound = nodes->getNumDegreesOfFreedom();
-            this->row_DOF=nodes->borrowTargetDegreesOfFreedom();
-            this->row_jac=ef->borrowJacobians(nodes, false, reducedOrder);
-        } else if (sm->row_distribution->getMyNumComponents()*sm->row_block_size ==
-                this->numEqu*nodes->getNumReducedDegreesOfFreedom()) {
-            this->row_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
-            this->row_DOF=nodes->borrowTargetReducedDegreesOfFreedom();
-            this->row_jac=ef->borrowJacobians(nodes, true, reducedOrder);
+        const index_t numRows = pasoMat->row_distribution->getMyNumComponents()*pasoMat->row_block_size;
+        const index_t numCols = pasoMat->col_distribution->getMyNumComponents()*pasoMat->col_block_size;
+
+        if (numRows == numEqu * nodes->getNumDegreesOfFreedom()) {
+        } else if (numRows == numEqu * nodes->getNumReducedDegreesOfFreedom()) {
+            row_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
+            row_DOF = nodes->borrowTargetReducedDegreesOfFreedom();
+            row_jac = ef->borrowJacobians(nodes, true, reducedOrder);
         } else {
-            setError(TYPE_ERROR, "AssembleParameters: number of rows in matrix does not match the number of degrees of freedom in mesh");
+            throw ValueError("AssembleParameters: number of rows in matrix "
+                   "does not match the number of degrees of freedom in mesh");
         }
         // Make sure # cols in matrix == num DOF for one of:
         // full or reduced (use numLocalDOF for MPI)
-        if (sm->col_distribution->getMyNumComponents()*sm->col_block_size ==
-                this->numComp*nodes->getNumDegreesOfFreedom()) {
-            this->col_DOF_UpperBound = nodes->getNumDegreesOfFreedom();
-            this->col_DOF=nodes->borrowTargetDegreesOfFreedom();
-            this->col_jac=ef->borrowJacobians(nodes, false, reducedOrder);
-        } else if (sm->col_distribution->getMyNumComponents()*sm->col_block_size==this->numComp*nodes->getNumReducedDegreesOfFreedom()) {
-            this->col_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
-            this->col_DOF=nodes->borrowTargetReducedDegreesOfFreedom();
-            this->col_jac=ef->borrowJacobians(nodes, true, reducedOrder);
+        if (numCols == this->numComp * nodes->getNumDegreesOfFreedom()) {
+        } else if (numCols == this->numComp * nodes->getNumReducedDegreesOfFreedom()) {
+            col_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
+            col_DOF = nodes->borrowTargetReducedDegreesOfFreedom();
+            col_jac = ef->borrowJacobians(nodes, true, reducedOrder);
         } else {
-            setError(TYPE_ERROR, "AssembleParameters: number of columns in matrix does not match the number of degrees of freedom in mesh");
+            throw ValueError("AssembleParameters: number of columns in matrix "
+                   "does not match the number of degrees of freedom in mesh");
         }
     }
-
-    if (!noError())
-        return;
+#endif
 
     // get the information from right hand side
     if (!rhs.isEmpty()) {
         if (rhs.numSamplesEqual(1, nodes->getNumDegreesOfFreedom())) {
-            this->row_DOF_UpperBound = nodes->getNumDegreesOfFreedom();
-            this->row_DOF=nodes->borrowTargetDegreesOfFreedom();
-            this->row_jac=ef->borrowJacobians(nodes, false, reducedOrder);
         } else if (rhs.numSamplesEqual(1, nodes->getNumReducedDegreesOfFreedom())) {
-            this->row_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
-            this->row_DOF=nodes->borrowTargetReducedDegreesOfFreedom();
-            this->row_jac=ef->borrowJacobians(nodes, true, reducedOrder);
+            row_DOF_UpperBound = nodes->getNumReducedDegreesOfFreedom();
+            row_DOF = nodes->borrowTargetReducedDegreesOfFreedom();
+            row_jac = ef->borrowJacobians(nodes, true, reducedOrder);
         } else {
-            setError(TYPE_ERROR, "AssembleParameters: length of RHS vector does not match the number of degrees of freedom in mesh");
+            throw ValueError("AssembleParameters: length of RHS vector does not match the number of degrees of freedom in mesh");
         }
-        if (sm.get()==NULL) {
-            this->col_DOF_UpperBound=this->row_DOF_UpperBound;
-            this->col_DOF=this->row_DOF;
-            this->col_jac=this->row_jac;
+#ifdef ESYS_HAVE_PASO
+        if (sm == NULL) {
+            col_DOF_UpperBound = this->row_DOF_UpperBound;
+            col_DOF = this->row_DOF;
+            col_jac = this->row_jac;
         }
+#else // trilinos case
+        col_DOF_UpperBound = this->row_DOF_UpperBound;
+        col_DOF = this->row_DOF;
+        col_jac = this->row_jac;
+#endif
     }
 
-    numSub=std::min(this->row_jac->numSub, this->col_jac->numSub);
-    numQuadSub=this->row_jac->numQuadTotal/numSub;
-    if (this->row_jac->numSides != this->col_jac->numSides) {
-        setError(TYPE_ERROR, "AssembleParameters: number of sides for row and column shape functions must match.");
+    numSub = std::min(row_jac->numSub, col_jac->numSub);
+    numQuadSub = row_jac->numQuadTotal / numSub;
+    if (row_jac->numSides != col_jac->numSides) {
+        throw ValueError("AssembleParameters: number of sides for row and "
+                         "column shape functions must match.");
     }
-    if (this->row_jac->numDim != this->col_jac->numDim) {
-        setError(TYPE_ERROR, "AssembleParameters: spatial dimension for row and column shape function must match.");
+    if (row_jac->numDim != col_jac->numDim) {
+        throw ValueError("AssembleParameters: spatial dimension for row and "
+                         "column shape function must match.");
     }
-    if (ef->numNodes < this->row_jac->numShapesTotal) {
-        setError(TYPE_ERROR, "AssembleParameters: too many nodes are expected by row.");
+    if (ef->numNodes < row_jac->numShapesTotal) {
+        throw ValueError("AssembleParameters: too many nodes are expected by row.");
     }
-    if (ef->numNodes < this->col_jac->numShapesTotal) {
-        setError(TYPE_ERROR, "AssembleParameters: too many nodes are expected by col.");
+    if (ef->numNodes < col_jac->numShapesTotal) {
+        throw ValueError("AssembleParameters: too many nodes are expected by col.");
     }
-    if (this->row_jac->numElements != ef->numElements) {
-        setError(TYPE_ERROR, "AssembleParameters: number of elements for row is wrong.");
+    if (row_jac->numElements != ef->numElements) {
+        throw ValueError("AssembleParameters: number of elements for row is wrong.");
     }
-    if (this->col_jac->numElements != ef->numElements) {
-        setError(TYPE_ERROR, "AssembleParameters: number of elements for column is wrong.");
+    if (col_jac->numElements != ef->numElements) {
+        throw ValueError("AssembleParameters: number of elements for column is wrong.");
     }
-    if (this->row_jac->numQuadTotal != this->col_jac->numQuadTotal) {
-        setError(TYPE_ERROR, "AssembleParameters: number of quadrature points for row and column shape functions must match.");
+    if (row_jac->numQuadTotal != col_jac->numQuadTotal) {
+        throw ValueError("AssembleParameters: number of quadrature points for "
+                         "row and column shape functions must match.");
     }
     // to consider different basis function for rows and columns this will
     // require some work:
-    if (numQuadSub*numSub != this->row_jac->numQuadTotal) {
-        setError(TYPE_ERROR, "AssembleParameters: number of quadrature points for row is not correct.");
+    if (numQuadSub * numSub != row_jac->numQuadTotal) {
+        throw ValueError("AssembleParameters: number of quadrature points "
+                         "for row is not correct.");
     }
-    if (numQuadSub != this->row_jac->BasisFunctions->numQuadNodes) {
-        setError(TYPE_ERROR, "AssembleParameters: Incorrect number of quadrature points for row.");
+    if (numQuadSub != row_jac->BasisFunctions->numQuadNodes) {
+        throw ValueError("AssembleParameters: Incorrect number of quadrature "
+                         "points for row.");
     }
-    if (numQuadSub != this->col_jac->BasisFunctions->numQuadNodes) {
-        setError(TYPE_ERROR, "AssembleParameters: Incorrect number of quadrature points for column.");
+    if (numQuadSub != col_jac->BasisFunctions->numQuadNodes) {
+        throw ValueError("AssembleParameters: Incorrect number of quadrature "
+                         "points for column.");
     }
 
-    this->numQuadSub=numQuadSub;
-    this->numSub=numSub;
-    this->numQuadTotal=this->row_jac->numQuadTotal;
-    this->NN=elements->numNodes;
-    this->numElements=elements->numElements;
-    this->numDim=this->row_jac->numDim;
-    this->col_node=this->col_jac->node_selection;
-    this->row_node=this->row_jac->node_selection;
-    this->numSides=this->row_jac->numSides;
-    this->row_numShapesTotal=this->row_jac->numShapesTotal;
-    this->row_numShapes=this->row_jac->BasisFunctions->Type->numShapes;
-    this->col_numShapesTotal=this->col_jac->numShapesTotal;
-    this->col_numShapes=this->col_jac->BasisFunctions->Type->numShapes;
+    NN = elements->numNodes;
+    numQuadTotal = row_jac->numQuadTotal;
+    numElements = elements->numElements;
+    numDim = row_jac->numDim;
+    col_node = col_jac->node_selection;
+    row_node = row_jac->node_selection;
+    numSides = row_jac->numSides;
+    row_numShapesTotal = row_jac->numShapesTotal;
+    row_numShapes = row_jac->BasisFunctions->Type->numShapes;
+    col_numShapesTotal = col_jac->numShapesTotal;
+    col_numShapes = col_jac->BasisFunctions->Type->numShapes;
 }
 
 } // namespace finley
diff --git a/finley/src/Assemble_getNormal.cpp b/finley/src/Assemble_getNormal.cpp
index 3284ab9..1cd625f 100644
--- a/finley/src/Assemble_getNormal.cpp
+++ b/finley/src/Assemble_getNormal.cpp
@@ -22,72 +22,67 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_getNormal(const NodeFile* nodes, const ElementFile* elements,
                         escript::Data& normal)
 {
-    resetError();
     if (!nodes || !elements)
         return;
 
     const_ReferenceElement_ptr refElement(elements->referenceElementSet->
             borrowReferenceElement(util::hasReducedIntegrationOrder(normal)));
-    const int NN=elements->numNodes;
-    const int numDim=nodes->numDim;
-    const int numQuad=refElement->Parametrization->numQuadNodes;
-    const int numDim_local=refElement->Parametrization->Type->numDim;
-    const int NS=refElement->Parametrization->Type->numShapes;
+    const int NN = elements->numNodes;
+    const int numDim = nodes->numDim;
+    const int numQuad = refElement->Parametrization->numQuadNodes;
+    const int numDim_local = refElement->Parametrization->Type->numDim;
+    const int NS = refElement->Parametrization->Type->numShapes;
   
     int sign, node_offset;
-    if (normal.getFunctionSpace().getTypeCode()==FINLEY_CONTACT_ELEMENTS_2) {
-        node_offset=refElement->Type->offsets[1];
-        sign=-1;
+    if (normal.getFunctionSpace().getTypeCode() == FINLEY_CONTACT_ELEMENTS_2) {
+        node_offset = refElement->Type->offsets[1];
+        sign = -1;
     } else {
-        node_offset=refElement->Type->offsets[0];
-        sign=1;
+        node_offset = refElement->Type->offsets[0];
+        sign = 1;
     }
 
     // check the dimensions of normal
-    if (!(numDim==numDim_local || numDim-1==numDim_local)) {
-        setError(TYPE_ERROR, "Assemble_setNormal: Cannot calculate normal vector");
+    if (!(numDim == numDim_local || numDim - 1 == numDim_local)) {
+        throw escript::ValueError("Assemble_setNormal: Cannot calculate normal vector");
     } else if (!normal.numSamplesEqual(numQuad, elements->numElements)) {
-        setError(TYPE_ERROR, "Assemble_setNormal: illegal number of samples of normal Data object");
+        throw escript::ValueError("Assemble_setNormal: illegal number of samples of normal Data object");
     } else if (!normal.isDataPointShapeEqual(1, &numDim)) {
-        setError(TYPE_ERROR, "Assemble_setNormal: illegal point data shape of normal Data object");
+        throw escript::ValueError("Assemble_setNormal: illegal point data shape of normal Data object");
     } else if (!normal.actsExpanded()) {
-        setError(TYPE_ERROR, "Assemble_setNormal: expanded Data object is expected for normal.");
+        throw escript::ValueError("Assemble_setNormal: expanded Data object is expected for normal.");
     }
-   
-    if (noError()) {
-        normal.requireWrite();
+
+    normal.requireWrite();
 #pragma omp parallel
-        {
-            std::vector<double> local_X(NS*numDim); 
-            std::vector<double> dVdv(numQuad*numDim*numDim_local);
-            // open the element loop
+    {
+        std::vector<double> local_X(NS * numDim); 
+        std::vector<double> dVdv(numQuad * numDim * numDim_local);
 #pragma omp for
-            for (index_t e=0; e<elements->numElements; e++) {
-                // gather local coordinates of nodes into local_X:
-                util::gather(NS, &(elements->Nodes[INDEX2(node_offset,e,NN)]),
-                             numDim, nodes->Coordinates, &local_X[0]);
-                // calculate dVdv(i,j,q)=local_X(i,n)*DSDv(n,j,q)
-                util::smallMatMult(numDim, numDim_local*numQuad, &dVdv[0], NS,
-                        local_X, refElement->Parametrization->dSdv);
-                double *normal_array=normal.getSampleDataRW(e);
-                util::normalVector(numQuad, numDim, numDim_local, &dVdv[0],
-                                   normal_array);
-                for (int q=0; q<numQuad*numDim; q++)
-                    normal_array[q]*=sign;
-            }
+        for (index_t e = 0; e < elements->numElements; e++) {
+            // gather local coordinates of nodes into local_X
+            util::gather(NS, &elements->Nodes[INDEX2(node_offset, e, NN)],
+                         numDim, nodes->Coordinates, &local_X[0]);
+            // calculate dVdv(i,j,q)=local_X(i,n)*DSDv(n,j,q)
+            util::smallMatMult(numDim, numDim_local * numQuad, &dVdv[0], NS,
+                    local_X, refElement->Parametrization->dSdv);
+            double* normal_array = normal.getSampleDataRW(e);
+            util::normalVector(numQuad, numDim, numDim_local, &dVdv[0],
+                               normal_array);
+            for (int q = 0; q < numQuad * numDim; q++)
+                normal_array[q] *= sign;
         }
-    }
+    } // parallel section
 }
 
 } // namespace finley
diff --git a/finley/src/Assemble_getSize.cpp b/finley/src/Assemble_getSize.cpp
index db45720..910da82 100644
--- a/finley/src/Assemble_getSize.cpp
+++ b/finley/src/Assemble_getSize.cpp
@@ -21,19 +21,17 @@
   of elements and assigns the value to each quadrature point in out.
 
 *****************************************************************************/
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
 
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_getSize(const NodeFile* nodes, const ElementFile* elements,
                       escript::Data& out)
 {
-    resetError();
-
     if (!nodes || !elements)
         return;
 
@@ -41,61 +39,57 @@ void Assemble_getSize(const NodeFile* nodes, const ElementFile* elements,
             borrowReferenceElement(
                 util::hasReducedIntegrationOrder(out)));
 
-    const int numDim=nodes->numDim;
-    const int numQuad=refElement->Parametrization->numQuadNodes;
-    const int NN=elements->numNodes;
-    const int NS=refElement->Parametrization->Type->numShapes;
-    const int NVertices=refElement->Parametrization->Type->numVertices;
+    const int numDim = nodes->numDim;
+    const int numQuad = refElement->Parametrization->numQuadNodes;
+    const int NN = elements->numNodes;
+    const int NS = refElement->Parametrization->Type->numShapes;
+    const int NVertices = refElement->Parametrization->Type->numVertices;
 
     // check the dimensions of out
     if (!out.numSamplesEqual(numQuad, elements->numElements)) {
-        setError(TYPE_ERROR, "Assemble_getSize: illegal number of samples of out Data object");
+        throw escript::ValueError("Assemble_getSize: illegal number of samples of out Data object");
     } else if (!out.isDataPointShapeEqual(0, &numDim)) {
-        setError(TYPE_ERROR, "Assemble_getSize: illegal data point shape of out Data object");
+        throw escript::ValueError("Assemble_getSize: illegal data point shape of out Data object");
     } else if (!out.actsExpanded()) {
-        setError(TYPE_ERROR, "Assemble_getSize: expanded Data object is expected for element size.");
+        throw escript::ValueError("Assemble_getSize: expanded Data object is expected for element size.");
     }
 
-    if (!noError())
-        return;
-
     // now we can start
     int node_offset;
-    if (out.getFunctionSpace().getTypeCode()==FINLEY_CONTACT_ELEMENTS_2) {
-        node_offset=refElement->Type->offsets[1];
+    if (out.getFunctionSpace().getTypeCode() == FINLEY_CONTACT_ELEMENTS_2) {
+        node_offset = refElement->Type->offsets[1];
     } else {
-        node_offset=refElement->Type->offsets[0];
+        node_offset = refElement->Type->offsets[0];
     }
-    const double f=pow(0.5, pow((double)(refElement->Type->numSubElements),
+    const double f = pow(0.5, pow((double)(refElement->Type->numSubElements),
                 1./(double)(numDim))-1);
 
     out.requireWrite();
 #pragma omp parallel
     {
-        std::vector<double> local_X(NN*numDim);
-#pragma omp parallel for
-        for (index_t e=0; e<elements->numElements; e++) {
-            // gather local coordinates of nodes into
-            // local_X(numDim,NN):
-            util::gather(NS, &(elements->Nodes[INDEX2(node_offset,e,NN)]),
+        std::vector<double> local_X(NN * numDim);
+#pragma omp for
+        for (index_t e = 0; e < elements->numElements; e++) {
+            // gather local coordinates of nodes into local_X(numDim,NN)
+            util::gather(NS, &elements->Nodes[INDEX2(node_offset, e, NN)],
                          numDim, nodes->Coordinates, &local_X[0]);
             // calculate minimal differences:
-            double max_diff=0.;
-            for (int n0=0; n0<NVertices; n0++) {
-                for (int n1=n0+1; n1<NVertices; n1++) {
-                    double diff=0;
-                    for (int i=0; i<numDim; i++) {
-                        const double d=local_X[INDEX2(i,n0,numDim)]-local_X[INDEX2(i,n1,numDim)];
-                        diff += d*d;
+            double max_diff = 0.;
+            for (int n0 = 0; n0 < NVertices; n0++) {
+                for (int n1 = n0 + 1; n1 < NVertices; n1++) {
+                    double diff = 0;
+                    for (int i = 0; i < numDim; i++) {
+                        const double d = local_X[INDEX2(i,n0,numDim)] - local_X[INDEX2(i,n1,numDim)];
+                        diff += d * d;
                     }
-                    max_diff=std::max(max_diff,diff);
+                    max_diff = std::max(max_diff, diff);
                 }
             }
-            max_diff=sqrt(max_diff)*f;
+            max_diff = sqrt(max_diff) * f;
             // set all values to max_diff
-            double *out_array=out.getSampleDataRW(e);
-            for (int q=0; q<numQuad; q++)
-                out_array[q]=max_diff;
+            double* out_array = out.getSampleDataRW(e);
+            for (int q = 0; q < numQuad; q++)
+                out_array[q] = max_diff;
         }
     } // end of parallel region
 }
diff --git a/finley/src/Assemble_gradient.cpp b/finley/src/Assemble_gradient.cpp
index 4a506ec..351145c 100644
--- a/finley/src/Assemble_gradient.cpp
+++ b/finley/src/Assemble_gradient.cpp
@@ -22,133 +22,123 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
                        escript::Data& grad_data, const escript::Data& data)
 {
-    resetError();
     if (!nodes || !elements)
         return;
 
-    const int numComps=data.getDataPointSize();
-    const int NN=elements->numNodes;
-    const bool reducedOrder=util::hasReducedIntegrationOrder(grad_data);
-    const int data_type=data.getFunctionSpace().getTypeCode();
+    const int numComps = data.getDataPointSize();
+    const int NN = elements->numNodes;
+    const bool reducedOrder = util::hasReducedIntegrationOrder(grad_data);
+    const int data_type = data.getFunctionSpace().getTypeCode();
 
     bool reducedShapefunction = false;
-    int numNodes = 0;
+    dim_t numNodes = 0;
     if (data_type == FINLEY_NODES) {
         numNodes = nodes->getNumNodes();
     } else if (data_type==FINLEY_REDUCED_NODES) { 
         reducedShapefunction = true;
         numNodes = nodes->getNumReducedNodes();
-    } else if (data_type==FINLEY_DEGREES_OF_FREEDOM) {
+    } else if (data_type == FINLEY_DEGREES_OF_FREEDOM) {
         if (elements->MPIInfo->size > 1) {
-            setError(TYPE_ERROR, "Assemble_gradient: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
-            return;
+            throw escript::ValueError("Assemble_gradient: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
         }
         numNodes = nodes->getNumDegreesOfFreedom();
-    } else if (data_type==FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
+    } else if (data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
         if (elements->MPIInfo->size > 1) {
-            setError(TYPE_ERROR, "Assemble_gradient: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
-            return;
+            throw escript::ValueError("Assemble_gradient: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
         }
         reducedShapefunction = true;
         numNodes = nodes->getNumReducedDegreesOfFreedom();
     } else {
-        setError(TYPE_ERROR, "Assemble_gradient: Cannot calculate gradient of data because of unsuitable input data representation.");
-        return;
+        throw escript::ValueError("Assemble_gradient: Cannot calculate gradient of data because of unsuitable input data representation.");
     }
 
-    ElementFile_Jacobians *jac = elements->borrowJacobians(nodes,
-            reducedShapefunction, reducedOrder);
+    ElementFile_Jacobians* jac = elements->borrowJacobians(nodes,
+                                         reducedShapefunction, reducedOrder);
     const_ReferenceElement_ptr refElement(elements->referenceElementSet->
             borrowReferenceElement(reducedOrder));
-    const int numDim=jac->numDim;
-    const int numShapes=jac->BasisFunctions->Type->numShapes;
-    const int numShapesTotal=jac->numShapesTotal;
-    const int numSub=jac->numSub;
-    const int numQuad=jac->numQuadTotal/numSub;
-    int numShapesTotal2=0;
-    int s_offset=0;
-    const int *nodes_selector=NULL;
+    const int numDim = jac->numDim;
+    const int numShapes = jac->BasisFunctions->Type->numShapes;
+    const int numShapesTotal = jac->numShapesTotal;
+    const int numSub = jac->numSub;
+    const int numQuad = jac->numQuadTotal/numSub;
+    int numShapesTotal2 = 0;
+    int s_offset = 0;
+    const int* nodes_selector = NULL;
   
-    if (noError()) {
-        const int grad_data_type=grad_data.getFunctionSpace().getTypeCode();
-        if (grad_data_type==FINLEY_CONTACT_ELEMENTS_2 || grad_data_type==FINLEY_REDUCED_CONTACT_ELEMENTS_2)  {
-            s_offset=jac->offsets[1];
-            s_offset=jac->offsets[1];
-        } else {
-            s_offset=jac->offsets[0];
-            s_offset=jac->offsets[0];
-        }
-        if (data_type==FINLEY_REDUCED_NODES || data_type==FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
-            nodes_selector=refElement->Type->linearNodes;
-            numShapesTotal2=refElement->LinearBasisFunctions->Type->numShapes * refElement->Type->numSides;
-        } else { 
-            nodes_selector=refElement->Type->subElementNodes;
-            numShapesTotal2=refElement->BasisFunctions->Type->numShapes * refElement->Type->numSides;
-        }
-
-        // check the dimensions of data
-        if (!grad_data.numSamplesEqual(numQuad*numSub, elements->numElements)) {
-            setError(TYPE_ERROR, "Assemble_gradient: illegal number of samples in gradient Data object");
-        } else if (!data.numSamplesEqual(1, numNodes)) {
-            setError(TYPE_ERROR, "Assemble_gradient: illegal number of samples of input Data object");
-        } else if (numDim*numComps != grad_data.getDataPointSize()) {
-            setError(TYPE_ERROR, "Assemble_gradient: illegal number of components in gradient data object.");
-        } else if (!grad_data.actsExpanded()) {
-            setError(TYPE_ERROR, "Assemble_gradient: expanded Data object is expected for output data.");
-        } else if (!(s_offset+numShapes <= numShapesTotal)) {
-            setError(SYSTEM_ERROR, "Assemble_gradient: nodes per element is inconsistent with number of jacobians.");
-        }
+    const int grad_data_type = grad_data.getFunctionSpace().getTypeCode();
+    if (grad_data_type==FINLEY_CONTACT_ELEMENTS_2 || grad_data_type==FINLEY_REDUCED_CONTACT_ELEMENTS_2)  {
+        s_offset = jac->offsets[1];
+    } else {
+        s_offset = jac->offsets[0];
+    }
+    if (data_type==FINLEY_REDUCED_NODES || data_type==FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
+        nodes_selector = refElement->Type->linearNodes;
+        numShapesTotal2 = refElement->LinearBasisFunctions->Type->numShapes * refElement->Type->numSides;
+    } else { 
+        nodes_selector = refElement->Type->subElementNodes;
+        numShapesTotal2 = refElement->BasisFunctions->Type->numShapes * refElement->Type->numSides;
     }
 
-    if (!noError())
-        return;
+    // check the dimensions of data
+    if (!grad_data.numSamplesEqual(numQuad*numSub, elements->numElements)) {
+        throw escript::ValueError("Assemble_gradient: illegal number of samples in gradient Data object");
+    } else if (!data.numSamplesEqual(1, numNodes)) {
+        throw escript::ValueError("Assemble_gradient: illegal number of samples of input Data object");
+    } else if (numDim * numComps != grad_data.getDataPointSize()) {
+        throw escript::ValueError("Assemble_gradient: illegal number of components in gradient data object.");
+    } else if (!grad_data.actsExpanded()) {
+        throw escript::ValueError("Assemble_gradient: expanded Data object is expected for output data.");
+    } else if (!(s_offset+numShapes <= numShapesTotal)) {
+        throw escript::ValueError("Assemble_gradient: nodes per element is inconsistent with number of jacobians.");
+    }
 
-    const size_t localGradSize=sizeof(double)*numDim*numQuad*numSub*numComps;
+    const size_t localGradSize = sizeof(double)*numDim*numQuad*numSub*numComps;
     grad_data.requireWrite();
 #pragma omp parallel
     {
-        if (data_type==FINLEY_NODES) {
-            if (numDim==1) {
+        if (data_type == FINLEY_NODES) {
+            if (numDim == 1) {
 #pragma omp for
-                for (index_t e=0; e<elements->numElements; e++) {
-                    double *grad_data_e=grad_data.getSampleDataRW(e);
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
                     memset(grad_data_e, 0, localGradSize);
-                    for (int isub=0; isub<numSub; isub++) {
-                        for (int s=0; s<numShapes; s++) {
-                            const index_t n=elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
-                            const double *data_array=data.getSampleDataRO(n);
-                            for (int q=0; q<numQuad; q++) {
+                    for (int isub = 0; isub < numSub; isub++) {
+                        for (int s = 0; s < numShapes; s++) {
+                            const index_t n = elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
+                            const double* data_array = data.getSampleDataRO(n);
+                            for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-                                for (int l=0; l<numComps; l++) {
-                                    grad_data_e[INDEX4(l,0,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,0,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
+                                for (int l = 0; l < numComps; l++) {
+                                    grad_data_e[INDEX4(l, 0, q, isub, numComps, numDim, numQuad)] +=
+                                        data_array[l] * jac->DSDX[INDEX5
+                                         (s_offset+s, 0, q, isub, e, numShapesTotal, numDim, numQuad, numSub)];
                                 }
                             }
                         }
                     }
                 }
-            } else if (numDim==2) {
+            } else if (numDim == 2) {
 #pragma omp for
-                for (index_t e=0; e<elements->numElements; e++) {
-                    double *grad_data_e=grad_data.getSampleDataRW(e);
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
                     memset(grad_data_e, 0, localGradSize);
-                    for (int isub=0; isub<numSub; isub++) {
-                        for (int s=0; s<numShapes; s++) {
-                            const index_t n=elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
-                            const double *data_array=data.getSampleDataRO(n);
-                            for (int q=0; q<numQuad; q++) {
+                    for (int isub = 0; isub < numSub; isub++) {
+                        for (int s = 0; s < numShapes; s++) {
+                            const index_t n = elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
+                            const double* data_array=data.getSampleDataRO(n);
+                            for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-                                for (int l=0; l<numComps; l++) {
+                                for (int l = 0; l < numComps; l++) {
                                     grad_data_e[INDEX4(l,0,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,0,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
                                     grad_data_e[INDEX4(l,1,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,1,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
                                 }
@@ -156,18 +146,18 @@ void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
                         }
                     }
                 }
-            } else if (numDim==3) {
+            } else if (numDim == 3) {
 #pragma omp for
-                for (index_t e=0; e<elements->numElements; e++) {
-                    double *grad_data_e=grad_data.getSampleDataRW(e); 
-                    memset(grad_data_e,0, localGradSize);
-                    for (int isub=0; isub<numSub; isub++) {
-                        for (int s=0; s<numShapes; s++) {
-                            const index_t n=elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
-                            const double *data_array=data.getSampleDataRO(n);
-                            for (int q=0; q<numQuad; q++) {
+                for (index_t e = 0; e < elements->numElements; e++) {
+                    double* grad_data_e = grad_data.getSampleDataRW(e);
+                    memset(grad_data_e, 0, localGradSize);
+                    for (int isub = 0; isub < numSub; isub++) {
+                        for (int s = 0; s < numShapes; s++) {
+                            const index_t n = elements->Nodes[INDEX2(nodes_selector[INDEX2(s_offset+s,isub,numShapesTotal2)],e, NN)];
+                            const double* data_array = data.getSampleDataRO(n);
+                            for (int q = 0; q < numQuad; q++) {
 #pragma ivdep
-                                for (int l=0; l<numComps; l++) {
+                                for (int l = 0; l < numComps; l++) {
                                     grad_data_e[INDEX4(l,0,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,0,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
                                     grad_data_e[INDEX4(l,1,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,1,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
                                     grad_data_e[INDEX4(l,2,q,isub,numComps,numDim,numQuad)]+=data_array[l]*jac->DSDX[INDEX5(s_offset+s,2,q,isub,e,numShapesTotal,numDim,numQuad,numSub)];
@@ -177,9 +167,9 @@ void Assemble_gradient(const NodeFile* nodes, const ElementFile* elements,
                     }
                 }
             }
-        } else if (data_type==FINLEY_REDUCED_NODES) {
+        } else if (data_type == FINLEY_REDUCED_NODES) {
             const index_t* target = nodes->borrowTargetReducedNodes();
-            if (numDim==1) {
+            if (numDim == 1) {
 #pragma omp for
                 for (index_t e=0; e<elements->numElements; e++) {
                     double *grad_data_e=grad_data.getSampleDataRW(e);
diff --git a/finley/src/Assemble_integrate.cpp b/finley/src/Assemble_integrate.cpp
index 9207bb3..580fd94 100644
--- a/finley/src/Assemble_integrate.cpp
+++ b/finley/src/Assemble_integrate.cpp
@@ -21,72 +21,67 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_integrate(const NodeFile* nodes, const ElementFile* elements,
                         const escript::Data& data, double* out)
 {
-    resetError();
     if (!nodes || !elements)
         return;
 
-    Esys_MPI_rank my_mpi_rank = nodes->MPIInfo->rank;
-    ElementFile_Jacobians *jac = elements->borrowJacobians(nodes, FALSE,
+    const int my_mpi_rank = nodes->MPIInfo->rank;
+    ElementFile_Jacobians* jac = elements->borrowJacobians(nodes, false,
                                     util::hasReducedIntegrationOrder(data));
-    if (noError()) {
-        const int numQuadTotal = jac->numQuadTotal;
-        // check the shape of the data
-        if (!data.numSamplesEqual(numQuadTotal, elements->numElements)) {
-            setError(TYPE_ERROR, "Assemble_integrate: illegal number of samples of integrant kernel Data object");
-            return;
-        }
 
-        const int numComps = data.getDataPointSize();
+    const int numQuadTotal = jac->numQuadTotal;
+    // check the shape of the data
+    if (!data.numSamplesEqual(numQuadTotal, elements->numElements)) {
+        throw escript::ValueError("Assemble_integrate: illegal number of samples of integrant kernel Data object");
+    }
+
+    const int numComps = data.getDataPointSize();
 
-        for (int q=0; q<numComps; q++)
-            out[q]=0;
+    for (int q = 0; q < numComps; q++)
+        out[q] = 0;
 
 #pragma omp parallel
-        {
-            std::vector<double> out_local(numComps);
+    {
+        std::vector<double> out_local(numComps);
 
-            if (data.actsExpanded()) {
+        if (data.actsExpanded()) {
 #pragma omp for
-                for (int e=0; e<elements->numElements; e++) {
-                    if (elements->Owner[e] == my_mpi_rank) {
-                        const double *data_array=data.getSampleDataRO(e);
-                        for (int q=0; q<numQuadTotal; q++) {
-                            for (int i=0; i<numComps; i++)
-                                out_local[i]+=data_array[INDEX2(i,q,numComps)]*jac->volume[INDEX2(q,e,numQuadTotal)];
-                        }
+            for (index_t e = 0; e < elements->numElements; e++) {
+                if (elements->Owner[e] == my_mpi_rank) {
+                    const double* data_array = data.getSampleDataRO(e);
+                    for (int q = 0; q < numQuadTotal; q++) {
+                        for (int i = 0; i < numComps; i++)
+                            out_local[i] += data_array[INDEX2(i,q,numComps)]*jac->volume[INDEX2(q,e,numQuadTotal)];
                     }
                 }
-            } else {
+            }
+        } else {
 #pragma omp for
-                for (int e=0; e<elements->numElements; e++) {
-                    if (elements->Owner[e] == my_mpi_rank) {
-                        const double *data_array=data.getSampleDataRO(e);
-                        double rtmp=0.;
-                        for (int q=0; q<numQuadTotal; q++)
-                            rtmp+=jac->volume[INDEX2(q,e,numQuadTotal)];
-                        for (int i=0; i<numComps; i++)
-                            out_local[i]+=data_array[i]*rtmp;
-                    }
+            for (index_t e = 0; e < elements->numElements; e++) {
+                if (elements->Owner[e] == my_mpi_rank) {
+                    const double* data_array = data.getSampleDataRO(e);
+                    double rtmp = 0.;
+                    for (int q = 0; q < numQuadTotal; q++)
+                        rtmp += jac->volume[INDEX2(q, e, numQuadTotal)];
+                    for (int i = 0; i < numComps; i++)
+                        out_local[i] += data_array[i] * rtmp;
                 }
             }
-            // add local results to global result
-#pragma omp critical
-            for (int i=0; i<numComps; i++)
-                out[i]+=out_local[i];
         }
-    }
+        // add local results to global result
+#pragma omp critical
+        for (int i = 0; i < numComps; i++)
+            out[i] += out_local[i];
+    } // parallel section
 }
 
 } // namespace finley
diff --git a/finley/src/Assemble_interpolate.cpp b/finley/src/Assemble_interpolate.cpp
index b6a53e0..2d3878c 100644
--- a/finley/src/Assemble_interpolate.cpp
+++ b/finley/src/Assemble_interpolate.cpp
@@ -22,37 +22,34 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "Assemble.h"
 #include "Util.h"
 
+#include <escript/index.h>
+
 namespace finley {
 
 void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
                           const escript::Data& data,
                           escript::Data& interpolated_data)
 {
-    resetError();
     if (!nodes || !elements)
         return;
 
-    const int data_type=data.getFunctionSpace().getTypeCode();
+    const int data_type = data.getFunctionSpace().getTypeCode();
     const bool reducedOrder = util::hasReducedIntegrationOrder(interpolated_data);
     const_ReferenceElement_ptr refElement(elements->referenceElementSet->
                                         borrowReferenceElement(reducedOrder));
 
-    const int *resort_nodes = NULL;
+    const int* resort_nodes = NULL;
     const index_t* map = NULL;
     int numSub = 0;
     dim_t numNodes = 0;
     const_ShapeFunction_ptr basis;
     int dof_offset = 0;
 
-    if (data_type==FINLEY_NODES) {
-        numSub=refElement->Type->numSubElements;
+    if (data_type == FINLEY_NODES) {
+        numSub = refElement->Type->numSubElements;
         resort_nodes=refElement->Type->subElementNodes;
         basis=refElement->BasisFunctions;
         numNodes=nodes->getNumNodes();
@@ -73,25 +70,23 @@ void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
         } else {
             dof_offset=refElement->LinearType->offsets[0];
         }
-    } else if (data_type==FINLEY_DEGREES_OF_FREEDOM) {
+    } else if (data_type == FINLEY_DEGREES_OF_FREEDOM) {
         if (elements->MPIInfo->size > 1) {
-            setError(TYPE_ERROR,"Assemble_interpolate: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
-            return;
+            throw escript::ValueError("Assemble_interpolate: for more than one processor DEGREES_OF_FREEDOM data are not accepted as input.");
         }
-        numSub=refElement->Type->numSubElements;
-        resort_nodes=refElement->Type->subElementNodes;
-        basis=refElement->BasisFunctions;
-        numNodes=nodes->getNumDegreesOfFreedom();
-        map=nodes->borrowTargetDegreesOfFreedom();
+        numSub = refElement->Type->numSubElements;
+        resort_nodes = refElement->Type->subElementNodes;
+        basis = refElement->BasisFunctions;
+        numNodes = nodes->getNumDegreesOfFreedom();
+        map = nodes->borrowTargetDegreesOfFreedom();
         if (interpolated_data.getFunctionSpace().getTypeCode()==FINLEY_CONTACT_ELEMENTS_2) {
-            dof_offset=refElement->Type->offsets[1];
+            dof_offset = refElement->Type->offsets[1];
         } else {
-            dof_offset=refElement->Type->offsets[0];
+            dof_offset = refElement->Type->offsets[0];
         }
-    } else if (data_type==FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
+    } else if (data_type == FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
         if (elements->MPIInfo->size > 1) {
-            setError(TYPE_ERROR, "Assemble_interpolate: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
-            return;
+            throw escript::ValueError("Assemble_interpolate: for more than one processor REDUCED_DEGREES_OF_FREEDOM data are not accepted as input.");
         }
         numSub=1;
         resort_nodes=refElement->Type->linearNodes;
@@ -104,51 +99,47 @@ void Assemble_interpolate(const NodeFile* nodes, const ElementFile* elements,
             dof_offset=refElement->LinearType->offsets[0];
         }
     } else {
-        setError(TYPE_ERROR,"Assemble_interpolate: Cannot interpolate data");
-        return;
+        throw escript::ValueError("Assemble_interpolate: invalid functionspace");
     }
 
-    const int numComps=data.getDataPointSize();
-    const int numQuad=basis->numQuadNodes;
-    const int numShapesTotal=basis->Type->numShapes*refElement->Type->numSides;
-    const int NN=elements->numNodes;
-    const int NS_DOF=basis->Type->numShapes;
+    const int numComps = data.getDataPointSize();
+    const int numQuad = basis->numQuadNodes;
+    const int numShapesTotal = basis->Type->numShapes*refElement->Type->numSides;
+    const int NN = elements->numNodes;
+    const int NS_DOF = basis->Type->numShapes;
 
     // check the dimensions of interpolated_data and data
     if (!interpolated_data.numSamplesEqual(numQuad*numSub, elements->numElements)) {
-        setError(TYPE_ERROR, "Assemble_interpolate: illegal number of samples of output Data object");
+        throw escript::ValueError("Assemble_interpolate: illegal number of samples of output Data object");
     } else if (!data.numSamplesEqual(1,numNodes)) {
-        setError(TYPE_ERROR, "Assemble_interpolate: illegal number of samples of input Data object");
+        throw escript::ValueError("Assemble_interpolate: illegal number of samples of input Data object");
     } else if (numComps != interpolated_data.getDataPointSize()) {
-        setError(TYPE_ERROR, "Assemble_interpolate: number of components of input and interpolated Data do not match.");
+        throw escript::ValueError("Assemble_interpolate: number of components of input and interpolated Data do not match.");
     }  else if (!interpolated_data.actsExpanded()) {
-        setError(TYPE_ERROR, "Assemble_interpolate: expanded Data object is expected for output data.");
+        throw escript::ValueError("Assemble_interpolate: expanded Data object is expected for output data.");
     }
 
-    if (noError()) {
-        interpolated_data.requireWrite();
+    interpolated_data.requireWrite();
 #pragma omp parallel
-        {
-            // allocation of work array
-            std::vector<double> local_data(NS_DOF*numComps*numSub);
-            const size_t numComps_size=numComps*sizeof(double);
-            // open the element loop
+    {
+        std::vector<double> local_data(NS_DOF * numComps * numSub);
+        const size_t numComps_size = numComps * sizeof(double);
+        // open the element loop
 #pragma omp for
-            for (index_t e=0; e<elements->numElements; e++) {
-                for (int isub=0; isub<numSub; isub++) {
-                    for (int q=0; q<NS_DOF; q++) {
-                        const index_t i=elements->Nodes[INDEX2(resort_nodes[INDEX2(dof_offset+q,isub,numShapesTotal)],e,NN)];
-                        const double *data_array=data.getSampleDataRO(map[i]);
-                        memcpy(&local_data[INDEX3(0,q,isub, numComps,NS_DOF)], data_array, numComps_size);
-                    }
+        for (index_t e = 0; e < elements->numElements; e++) {
+            for (int isub = 0; isub < numSub; isub++) {
+                for (int q = 0; q < NS_DOF; q++) {
+                    const index_t i = elements->Nodes[INDEX2(resort_nodes[INDEX2(dof_offset+q,isub,numShapesTotal)],e,NN)];
+                    const double* data_array = data.getSampleDataRO(map[i]);
+                    memcpy(&local_data[INDEX3(0, q, isub, numComps,NS_DOF)], data_array, numComps_size);
                 }
-                // calculate interpolated_data=local_data*S
-                util::smallMatSetMult1(numSub, numComps, numQuad,
-                      interpolated_data.getSampleDataRW(e), NS_DOF,
-                      local_data, basis->S);
-            } // end of element loop
-        } // end of parallel region
-    } // no error
+            }
+            // calculate interpolated_data=local_data*S
+            util::smallMatSetMult1(numSub, numComps, numQuad,
+                  interpolated_data.getSampleDataRW(e), NS_DOF,
+                  local_data, basis->S);
+        } // end of element loop
+    } // end of parallel region
 }
 
 } // namespace finley
diff --git a/finley/src/Assemble_jacobians.cpp b/finley/src/Assemble_jacobians.cpp
index a8be9b0..7df3975 100644
--- a/finley/src/Assemble_jacobians.cpp
+++ b/finley/src/Assemble_jacobians.cpp
@@ -14,6 +14,13 @@
 *
 *****************************************************************************/
 
+#include "Assemble.h"
+#include "Util.h"
+
+#include <escript/index.h>
+
+#include <sstream>
+
 /*
   input: 
     const double* coordinates[DIM*(*)]
@@ -21,7 +28,7 @@
     const double* QuadWeights[numQuad]
     int numShape
     dim_t numElements
-    dim_t numNodes
+    int numNodes
     const index_t* nodes[numNodes*numElements]  where NUMSIDES*numShape<=numNodes
     const double* DSDv[numShape*DIM*numQuad]
     int numTest
@@ -33,15 +40,6 @@
     double* volume[numQuad*numElements]
 */
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Assemble.h"
-#include "Util.h"
-
-#include <sstream>
-
 namespace finley {
 
 /****************************************************************************/
@@ -50,7 +48,7 @@ namespace finley {
 //
 void Assemble_jacobians_1D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -69,14 +67,14 @@ void Assemble_jacobians_1D(const double* coordinates, int numQuad,
                 ss << "Assemble_jacobians_1D: element " << e
                     << " (id " << elementId[e] << ") has length zero.";
                 std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 for (int s=0; s<numTest; s++)
                      dTdX[INDEX4(s,0,q,e,numTest,DIM,numQuad)] =
                          DTDv[INDEX3(s,0,q,numTest,LOCDIM)]*invD;
             }
-            volume[INDEX2(q,e,numQuad)]=ABS(D)*QuadWeights[q];
+            volume[INDEX2(q,e,numQuad)]=std::abs(D)*QuadWeights[q];
         }
     }
 }
@@ -87,20 +85,20 @@ void Assemble_jacobians_1D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
-    const int DIM=2;
-    const int LOCDIM=2;
+    const int DIM = 2;
+    const int LOCDIM = 2;
 #pragma omp parallel for
-    for (index_t e=0; e<numElements; e++) {
-        for (int q=0; q<numQuad; q++) {
-            double dXdv00=0.;
-            double dXdv10=0.;
-            double dXdv01=0.;
-            double dXdv11=0.;
-            for (int s=0; s<numShape; s++) {
+    for (index_t e = 0; e < numElements; e++) {
+        for (int q = 0; q < numQuad; q++) {
+            double dXdv00 = 0.;
+            double dXdv10 = 0.;
+            double dXdv01 = 0.;
+            double dXdv11 = 0.;
+            for (int s = 0; s < numShape; s++) {
                 const double X0_loc=coordinates[INDEX2(0,nodes[INDEX2(s,e,numNodes)],DIM)];
                 const double X1_loc=coordinates[INDEX2(1,nodes[INDEX2(s,e,numNodes)],DIM)];
                 dXdv00+=X0_loc*DSDv[INDEX3(s,0,q,numShape,LOCDIM)];
@@ -113,8 +111,7 @@ void Assemble_jacobians_2D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_2D: element " << e
                     << " (id " << elementId[e] << ") has length zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                throw FinleyException(ss.str());
             } else {
                 const double invD = 1./D;
                 const double dvdX00 = dXdv11*invD;
@@ -130,7 +127,7 @@ void Assemble_jacobians_2D(const double* coordinates, int numQuad,
                         DTDv[INDEX3(s,1,q,numTest,LOCDIM)]*dvdX11;
                 }
             }
-            volume[INDEX2(q,e,numQuad)]=ABS(D)*QuadWeights[q];
+            volume[INDEX2(q,e,numQuad)] = std::abs(D)*QuadWeights[q];
         }
     }
 }
@@ -141,7 +138,7 @@ void Assemble_jacobians_2D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -163,8 +160,8 @@ void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_2D_M1D_E1D: element " << e
                    << " (id " << elementId[e] << ") has length zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 const double dvdX00 = dXdv00*invD;
@@ -187,7 +184,7 @@ void Assemble_jacobians_2D_M1D_E1D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_2D_M1D_E1D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -216,8 +213,8 @@ void Assemble_jacobians_2D_M1D_E1D_C(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_2D_M1D_E1D_C: element " << e
                     << " (id " << elementId[e] << ") has length zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD_0 = 1./D_0;
                 const double dvdX00_0=dXdv00_0*invD_0;
@@ -247,7 +244,7 @@ void Assemble_jacobians_2D_M1D_E1D_C(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_2D_M1D_E2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -273,8 +270,8 @@ void Assemble_jacobians_2D_M1D_E2D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_2D_M1D_E2D: element " << e
                     << " (id " << elementId[e] << ") has area zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 const double dvdX00 = dXdv11*invD;
@@ -301,7 +298,7 @@ void Assemble_jacobians_2D_M1D_E2D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_2D_M1D_E2D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -338,8 +335,8 @@ void Assemble_jacobians_2D_M1D_E2D_C(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_2D_M1D_E2D_C: element " << e
                     << " (id " << elementId[e] << ") has area zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD_0=1./D_0;
                 const double dvdX00_0= dXdv11_0*invD_0;
@@ -377,7 +374,7 @@ void Assemble_jacobians_2D_M1D_E2D_C(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_3D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -414,8 +411,8 @@ void Assemble_jacobians_3D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_3D: element " << e
                     << " (id " << elementId[e] << ") has volume zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 const double dvdX00=(dXdv11*dXdv22-dXdv12*dXdv21)*invD;
@@ -441,7 +438,7 @@ void Assemble_jacobians_3D(const double* coordinates, int numQuad,
                         DTDv[INDEX3(s,1,q,numTest,LOCDIM)]*dvdX12 +
                         DTDv[INDEX3(s,2,q,numTest,LOCDIM)]*dvdX22;
                 }
-                volume[INDEX2(q,e,numQuad)]=ABS(D)*QuadWeights[q];
+                volume[INDEX2(q,e,numQuad)]=std::abs(D)*QuadWeights[q];
             }
         }
     }
@@ -453,7 +450,7 @@ void Assemble_jacobians_3D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_3D_M2D_E3D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -492,8 +489,8 @@ void Assemble_jacobians_3D_M2D_E3D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_M2D_E3D: element " << e
                     << " (id " << elementId[e] << ") has volume zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 const double dvdX00=(dXdv11*dXdv22-dXdv12*dXdv21)*invD;
@@ -534,7 +531,7 @@ void Assemble_jacobians_3D_M2D_E3D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_3D_M2D_E3D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -598,8 +595,8 @@ void Assemble_jacobians_3D_M2D_E3D_C(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_M2D_E3D_C: element " << e
                     << " (id " << elementId[e] << ") has volume zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD_0=1./D_0;
                 const double dvdX00_0=(dXdv11_0*dXdv22_0-dXdv12_0*dXdv21_0)*invD_0;
@@ -667,7 +664,7 @@ void Assemble_jacobians_3D_M2D_E3D_C(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -701,8 +698,8 @@ void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_3D_M2D_E2D: element " << e
                     << " (id " << elementId[e] << ") has area zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD = 1./D;
                 const double dvdX00=( m00*dXdv00-m01*dXdv01)*invD;
@@ -734,7 +731,7 @@ void Assemble_jacobians_3D_M2D_E2D(const double* coordinates, int numQuad,
 //
 void Assemble_jacobians_3D_M2D_E2D_C(const double* coordinates, int numQuad,
                            const double* QuadWeights, int numShape,
-                           dim_t numElements, dim_t numNodes, const index_t* nodes,
+                           dim_t numElements, int numNodes, const index_t* nodes,
                            const double* DSDv, int numTest, const double* DTDv,
                            double* dTdX, double* volume, const index_t* elementId)
 {
@@ -787,8 +784,8 @@ void Assemble_jacobians_3D_M2D_E2D_C(const double* coordinates, int numQuad,
                 std::stringstream ss;
                 ss << "Assemble_jacobians_3D_M2D_E2D_C: element " << e
                     << " (id " << elementId[e] << ") has area zero.";
-                std::string errorMsg = ss.str();
-                setError(ZERO_DIVISION_ERROR, errorMsg.c_str());
+                const std::string errorMsg = ss.str();
+                throw FinleyException(errorMsg);
             } else {
                 const double invD_0=1./D_0;
                 const double dvdX00_0=( m00_0*dXdv00_0-m01_0*dXdv01_0)*invD_0;
diff --git a/finley/src/CPPAdapter/FinleyAdapterException.cpp b/finley/src/CPPAdapter/FinleyAdapterException.cpp
deleted file mode 100644
index 6cd6851..0000000
--- a/finley/src/CPPAdapter/FinleyAdapterException.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "FinleyAdapterException.h"
-
-
-using namespace finley;
-
-
-const std::string 
-FinleyAdapterException::exceptionNameValue("FinleyAdapterException");
-
-
-const std::string &
-FinleyAdapterException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-
diff --git a/finley/src/CPPAdapter/FinleyAdapterException.h b/finley/src/CPPAdapter/FinleyAdapterException.h
deleted file mode 100644
index 0890205..0000000
--- a/finley/src/CPPAdapter/FinleyAdapterException.h
+++ /dev/null
@@ -1,106 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  finley_FinleyAdapterException_20040526_H
-#define finley_FinleyAdapterException_20040526_H
-#include "system_dep.h"
-
-#include "esysUtils/EsysException.h"
-
-namespace finley
-{
-
-  /**
-  \brief
-  FinleyAdapterException exception class.
-
-  Description:
-  FinleyAdapterException exception class.
-  The class provides a public function returning the exception name
-  */
-  class FinleyAdapterException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    FINLEY_DLL_API
-    FinleyAdapterException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    FINLEY_DLL_API
-    FinleyAdapterException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    FINLEY_DLL_API
-    FinleyAdapterException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    FINLEY_DLL_API
-    FinleyAdapterException(const FinleyAdapterException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    /// Destructor
-    FINLEY_DLL_API
-    virtual ~FinleyAdapterException() THROW(NO_ARG) {}
-
-    /**
-    \brief
-    Assignment operator.
-    */
-    FINLEY_DLL_API
-    inline FinleyAdapterException &
-    operator=(const FinleyAdapterException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    FINLEY_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
-
-} // end of namespace
-#endif
diff --git a/finley/src/CPPAdapter/MeshAdapter.cpp b/finley/src/CPPAdapter/MeshAdapter.cpp
deleted file mode 100644
index 4043b00..0000000
--- a/finley/src/CPPAdapter/MeshAdapter.cpp
+++ /dev/null
@@ -1,2277 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-
-#include <pasowrap/PasoException.h>
-#include <pasowrap/TransportProblemAdapter.h>
-#include "MeshAdapter.h"
-#include "escript/Data.h"
-#include "escript/DataFactory.h"
-#include "esysUtils/blocktimer.h"
-#include "esysUtils/EsysRandom.h"
-
-#include <boost/python/import.hpp>
-#ifdef USE_NETCDF
-#include <netcdfcpp.h>
-#endif
-
-using namespace std;
-using namespace paso;
-namespace bp = boost::python;
-
-namespace finley {
-
-// define the static constants
-MeshAdapter::FunctionSpaceNamesMapType MeshAdapter::m_functionSpaceTypeNames;
-const int MeshAdapter::DegreesOfFreedom=FINLEY_DEGREES_OF_FREEDOM;
-const int MeshAdapter::ReducedDegreesOfFreedom=FINLEY_REDUCED_DEGREES_OF_FREEDOM;
-const int MeshAdapter::Nodes=FINLEY_NODES;
-const int MeshAdapter::ReducedNodes=FINLEY_REDUCED_NODES;
-const int MeshAdapter::Elements=FINLEY_ELEMENTS;
-const int MeshAdapter::ReducedElements=FINLEY_REDUCED_ELEMENTS;
-const int MeshAdapter::FaceElements=FINLEY_FACE_ELEMENTS;
-const int MeshAdapter::ReducedFaceElements=FINLEY_REDUCED_FACE_ELEMENTS;
-const int MeshAdapter::Points=FINLEY_POINTS;
-const int MeshAdapter::ContactElementsZero=FINLEY_CONTACT_ELEMENTS_1;
-const int MeshAdapter::ReducedContactElementsZero=FINLEY_REDUCED_CONTACT_ELEMENTS_1;
-const int MeshAdapter::ContactElementsOne=FINLEY_CONTACT_ELEMENTS_2;
-const int MeshAdapter::ReducedContactElementsOne=FINLEY_REDUCED_CONTACT_ELEMENTS_2;
-
-MeshAdapter::MeshAdapter(Mesh* finleyMesh)
-{
-    setFunctionSpaceTypeNames();
-    // need to use a null_deleter as Finley_Mesh_free deletes the pointer
-    // for us.
-    m_finleyMesh.reset(finleyMesh, null_deleter());
-}
-
-//
-// The copy constructor should just increment the use count
-MeshAdapter::MeshAdapter(const MeshAdapter& in) :
-    m_finleyMesh(in.m_finleyMesh)
-{
-    setFunctionSpaceTypeNames();
-}
-
-MeshAdapter::~MeshAdapter()
-{
-    // I hope the case for the pointer being zero has been taken care of
-    if (m_finleyMesh.unique()) {
-        delete m_finleyMesh.get();
-    }
-}
-
-int MeshAdapter::getMPISize() const
-{
-    return m_finleyMesh.get()->MPIInfo->size;
-}
-
-int MeshAdapter::getMPIRank() const
-{
-    return m_finleyMesh.get()->MPIInfo->rank;
-}
-
-void MeshAdapter::MPIBarrier() const
-{
-#ifdef ESYS_MPI
-    MPI_Barrier(m_finleyMesh.get()->MPIInfo->comm);
-#endif
-}
-
-bool MeshAdapter::onMasterProcessor() const
-{
-    return m_finleyMesh.get()->MPIInfo->rank == 0;
-}
-
-MPI_Comm MeshAdapter::getMPIComm() const
-{
-    return m_finleyMesh->MPIInfo->comm;
-}
-
-Mesh* MeshAdapter::getFinley_Mesh() const
-{
-    return m_finleyMesh.get();
-}
-
-void MeshAdapter::write(const string& fileName) const
-{
-    m_finleyMesh.get()->write(fileName);
-    checkFinleyError();
-}
-
-void MeshAdapter::Print_Mesh_Info(const bool full) const
-{
-    m_finleyMesh.get()->printInfo(full);
-}
-
-void MeshAdapter::dump(const string& fileName) const
-{
-#ifdef USE_NETCDF
-    const NcDim* ncdims[12] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
-    NcVar *ids;
-    int *int_ptr;
-    index_t *index_ptr;
-#ifdef ESYS_INDEXTYPE_LONG
-    NcType ncIdxType = ncLong;
-#else
-    NcType ncIdxType = ncInt;
-#endif
-    Mesh *mesh = m_finleyMesh.get();
-    int num_Tags = 0;
-    int mpi_size                         = mesh->MPIInfo->size;
-    int mpi_rank                         = mesh->MPIInfo->rank;
-    int numDim                           = mesh->Nodes->numDim;
-    dim_t numNodes                       = mesh->Nodes->numNodes;
-    dim_t num_Elements                   = mesh->Elements->numElements;
-    dim_t num_FaceElements               = mesh->FaceElements->numElements;
-    dim_t num_ContactElements            = mesh->ContactElements->numElements;
-    dim_t num_Points                     = mesh->Points->numElements;
-    int num_Elements_numNodes            = mesh->Elements->numNodes;
-    int num_FaceElements_numNodes        = mesh->FaceElements->numNodes;
-    int num_ContactElements_numNodes     = mesh->ContactElements->numNodes;
-#ifdef ESYS_MPI
-    MPI_Status status;
-#endif
-
-/* Incoming token indicates it's my turn to write */
-#ifdef ESYS_MPI
-    if (mpi_rank>0) {
-        MPI_Recv(&num_Tags, 0, MPI_INT, mpi_rank-1, 81800, mesh->MPIInfo->comm, &status);
-    }
-#endif
-
-    const std::string newFileName(esysUtils::appendRankToFileName(fileName,
-                                                     mpi_size, mpi_rank));
-
-    // Figure out how much storage is required for tags
-    num_Tags = mesh->tagMap.size();
-
-    // NetCDF error handler
-    NcError err(NcError::verbose_nonfatal);
-    // Create the file.
-    NcFile dataFile(newFileName.c_str(), NcFile::Replace);
-    string msgPrefix("Error in MeshAdapter::dump: NetCDF operation failed - ");
-    // check if writing was successful
-    if (!dataFile.is_valid())
-        throw FinleyAdapterException(msgPrefix+"Open file for output");
-
-    // Define dimensions (num_Elements and dim_Elements are identical,
-    // dim_Elements only appears if > 0)
-    if (! (ncdims[0] = dataFile.add_dim("numNodes", numNodes)) )
-        throw FinleyAdapterException(msgPrefix+"add_dim(numNodes)");
-    if (! (ncdims[1] = dataFile.add_dim("numDim", numDim)) )
-        throw FinleyAdapterException(msgPrefix+"add_dim(numDim)");
-    if (! (ncdims[2] = dataFile.add_dim("mpi_size_plus_1", mpi_size+1)) )
-        throw FinleyAdapterException(msgPrefix+"add_dim(mpi_size)");
-    if (num_Elements>0)
-        if (! (ncdims[3] = dataFile.add_dim("dim_Elements", num_Elements)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_Elements)");
-    if (num_FaceElements>0)
-        if (! (ncdims[4] = dataFile.add_dim("dim_FaceElements", num_FaceElements)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_FaceElements)");
-    if (num_ContactElements>0)
-        if (! (ncdims[5] = dataFile.add_dim("dim_ContactElements", num_ContactElements)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_ContactElements)");
-    if (num_Points>0)
-        if (! (ncdims[6] = dataFile.add_dim("dim_Points", num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_Points)");
-    if (num_Elements>0)
-        if (! (ncdims[7] = dataFile.add_dim("dim_Elements_Nodes", num_Elements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_Elements_Nodes)");
-    if (num_FaceElements>0)
-        if (! (ncdims[8] = dataFile.add_dim("dim_FaceElements_numNodes", num_FaceElements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_FaceElements_numNodes)");
-    if (num_ContactElements>0)
-        if (! (ncdims[9] = dataFile.add_dim("dim_ContactElements_numNodes", num_ContactElements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_ContactElements_numNodes)");
-    if (num_Tags>0)
-        if (! (ncdims[10] = dataFile.add_dim("dim_Tags", num_Tags)) )
-            throw FinleyAdapterException(msgPrefix+"add_dim(dim_Tags)");
-
-    // Attributes: MPI size, MPI rank, Name, order, reduced_order
-    if (!dataFile.add_att("index_size", (int)sizeof(index_t)))
-        throw FinleyAdapterException(msgPrefix+"add_att(index_size)");
-    if (!dataFile.add_att("mpi_size", mpi_size))
-        throw FinleyAdapterException(msgPrefix+"add_att(mpi_size)");
-    if (!dataFile.add_att("mpi_rank", mpi_rank))
-        throw FinleyAdapterException(msgPrefix+"add_att(mpi_rank)");
-    if (!dataFile.add_att("Name",mesh->m_name.c_str()))
-        throw FinleyAdapterException(msgPrefix+"add_att(Name)");
-    if (!dataFile.add_att("numDim",numDim))
-        throw FinleyAdapterException(msgPrefix+"add_att(order)");
-    if (!dataFile.add_att("order",mesh->integrationOrder))
-        throw FinleyAdapterException(msgPrefix+"add_att(order)");
-    if (!dataFile.add_att("reduced_order",mesh->reducedIntegrationOrder))
-        throw FinleyAdapterException(msgPrefix+"add_att(reduced_order)");
-    if (!dataFile.add_att("numNodes",numNodes))
-        throw FinleyAdapterException(msgPrefix+"add_att(numNodes)");
-    if (!dataFile.add_att("num_Elements",num_Elements))
-        throw FinleyAdapterException(msgPrefix+"add_att(num_Elements)");
-    if (!dataFile.add_att("num_FaceElements",num_FaceElements))
-        throw FinleyAdapterException(msgPrefix+"add_att(num_FaceElements)");
-    if (!dataFile.add_att("num_ContactElements",num_ContactElements))
-        throw FinleyAdapterException(msgPrefix+"add_att(num_ContactElements)");
-    if (!dataFile.add_att("num_Points",num_Points))
-        throw FinleyAdapterException(msgPrefix+"add_att(num_Points)");
-    if (!dataFile.add_att("num_Elements_numNodes",num_Elements_numNodes))
-        throw FinleyAdapterException(msgPrefix+"add_att(num_Elements_numNodes)");
-    if (!dataFile.add_att("num_FaceElements_numNodes",num_FaceElements_numNodes) )
-        throw FinleyAdapterException(msgPrefix+"add_att(num_FaceElements_numNodes)");
-    if (!dataFile.add_att("num_ContactElements_numNodes",num_ContactElements_numNodes) )
-        throw FinleyAdapterException(msgPrefix+"add_att(num_ContactElements_numNodes)");
-    if (!dataFile.add_att("Elements_TypeId", mesh->Elements->referenceElementSet->referenceElement->Type->TypeId) )
-        throw FinleyAdapterException(msgPrefix+"add_att(Elements_TypeId)");
-    if (!dataFile.add_att("FaceElements_TypeId", mesh->FaceElements->referenceElementSet->referenceElement->Type->TypeId) )
-        throw FinleyAdapterException(msgPrefix+"add_att(FaceElements_TypeId)");
-    if (!dataFile.add_att("ContactElements_TypeId", mesh->ContactElements->referenceElementSet->referenceElement->Type->TypeId) )
-        throw FinleyAdapterException(msgPrefix+"add_att(ContactElements_TypeId)");
-    if (!dataFile.add_att("Points_TypeId", mesh->Points->referenceElementSet->referenceElement->Type->TypeId) )
-        throw FinleyAdapterException(msgPrefix+"add_att(Points_TypeId)");
-    if (!dataFile.add_att("num_Tags", num_Tags) )
-        throw FinleyAdapterException(msgPrefix+"add_att(num_Tags)");
-
-    // // // // // Nodes // // // // //
-
-    // Nodes nodeDistribution
-    if (! (ids = dataFile.add_var("Nodes_NodeDistribution", ncIdxType, ncdims[2])) )
-        throw FinleyAdapterException(msgPrefix+"add_var(Nodes_NodeDistribution)");
-    index_ptr = &mesh->Nodes->nodesDistribution->first_component[0];
-    if (! (ids->put(index_ptr, mpi_size+1)) )
-        throw FinleyAdapterException(msgPrefix+"put(Nodes_NodeDistribution)");
-
-    // Nodes degreesOfFreedomDistribution
-    if (! ( ids = dataFile.add_var("Nodes_DofDistribution", ncIdxType, ncdims[2])) )
-        throw FinleyAdapterException(msgPrefix+"add_var(Nodes_DofDistribution)");
-    index_ptr = &mesh->Nodes->degreesOfFreedomDistribution->first_component[0];
-    if (! (ids->put(index_ptr, mpi_size+1)) )
-        throw FinleyAdapterException(msgPrefix+"put(Nodes_DofDistribution)");
-
-    // Only write nodes if non-empty because NetCDF doesn't like empty arrays
-    // (it treats them as NC_UNLIMITED)
-    if (numNodes > 0) {
-        // Nodes Id
-        if (! ( ids = dataFile.add_var("Nodes_Id", ncIdxType, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_Id)");
-        if (! (ids->put(&mesh->Nodes->Id[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_Id)");
-
-        // Nodes Tag
-        if (! ( ids = dataFile.add_var("Nodes_Tag", ncInt, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_Tag)");
-        if (! (ids->put(&mesh->Nodes->Tag[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_Tag)");
-
-        // Nodes gDOF
-        if (! ( ids = dataFile.add_var("Nodes_gDOF", ncIdxType, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_gDOF)");
-        if (! (ids->put(&mesh->Nodes->globalDegreesOfFreedom[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_gDOF)");
-
-        // Nodes global node index
-        if (! ( ids = dataFile.add_var("Nodes_gNI", ncIdxType, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_gNI)");
-        if (! (ids->put(&mesh->Nodes->globalNodesIndex[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_gNI)");
-
-        // Nodes grDof
-        if (! ( ids = dataFile.add_var("Nodes_grDfI", ncIdxType, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_grDfI)");
-        if (! (ids->put(&mesh->Nodes->globalReducedDOFIndex[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_grDfI)");
-
-        // Nodes grNI
-        if (! ( ids = dataFile.add_var("Nodes_grNI", ncIdxType, ncdims[0])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_grNI)");
-        if (! (ids->put(&mesh->Nodes->globalReducedNodesIndex[0], numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_grNI)");
-
-        // Nodes Coordinates
-        if (! ( ids = dataFile.add_var("Nodes_Coordinates", ncDouble, ncdims[0], ncdims[1]) ) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Nodes_Coordinates)");
-        if (! (ids->put(&mesh->Nodes->Coordinates[INDEX2(0,0,numDim)], numNodes, numDim)) )
-            throw FinleyAdapterException(msgPrefix+"put(Nodes_Coordinates)");
-    }
-
-    // // // // // Elements // // // // //
-    if (num_Elements > 0) {
-        // Elements_Id
-        if (! ( ids = dataFile.add_var("Elements_Id", ncIdxType, ncdims[3])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Elements_Id)");
-        if (! (ids->put(&mesh->Elements->Id[0], num_Elements)) )
-            throw FinleyAdapterException(msgPrefix+"put(Elements_Id)");
-
-        // Elements_Tag
-        if (! ( ids = dataFile.add_var("Elements_Tag", ncInt, ncdims[3])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Elements_Tag)");
-        if (! (ids->put(&mesh->Elements->Tag[0], num_Elements)) )
-            throw FinleyAdapterException(msgPrefix+"put(Elements_Tag)");
-
-        // Elements_Owner
-        if (! ( ids = dataFile.add_var("Elements_Owner", ncInt, ncdims[3])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Elements_Owner)");
-        if (! (ids->put(&mesh->Elements->Owner[0], num_Elements)) )
-            throw FinleyAdapterException(msgPrefix+"put(Elements_Owner)");
-
-        // Elements_Color
-        if (! ( ids = dataFile.add_var("Elements_Color", ncInt, ncdims[3])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Elements_Color)");
-        if (! (ids->put(&mesh->Elements->Color[0], num_Elements)) )
-            throw FinleyAdapterException(msgPrefix+"put(Elements_Color)");
-
-        // Elements_Nodes
-        if (! ( ids = dataFile.add_var("Elements_Nodes", ncIdxType, ncdims[3], ncdims[7]) ) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Elements_Nodes)");
-        if (! (ids->put(&mesh->Elements->Nodes[0], num_Elements, num_Elements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(Elements_Nodes)");
-    }
-
-    // // // // // Face_Elements // // // // //
-    if (num_FaceElements > 0) {
-        // FaceElements_Id
-        if (! ( ids = dataFile.add_var("FaceElements_Id", ncIdxType, ncdims[4])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(FaceElements_Id)");
-        if (! (ids->put(&mesh->FaceElements->Id[0], num_FaceElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(FaceElements_Id)");
-
-        // FaceElements_Tag
-        if (! ( ids = dataFile.add_var("FaceElements_Tag", ncInt, ncdims[4])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(FaceElements_Tag)");
-        if (! (ids->put(&mesh->FaceElements->Tag[0], num_FaceElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(FaceElements_Tag)");
-
-        // FaceElements_Owner
-        if (! ( ids = dataFile.add_var("FaceElements_Owner", ncInt, ncdims[4])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(FaceElements_Owner)");
-        if (! (ids->put(&mesh->FaceElements->Owner[0], num_FaceElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(FaceElements_Owner)");
-
-        // FaceElements_Color
-        if (! ( ids = dataFile.add_var("FaceElements_Color", ncInt, ncdims[4])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(FaceElements_Color)");
-        if (! (ids->put(&mesh->FaceElements->Color[0], num_FaceElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(FaceElements_Color)");
-
-        // FaceElements_Nodes
-        if (! ( ids = dataFile.add_var("FaceElements_Nodes", ncIdxType, ncdims[4], ncdims[8]) ) )
-            throw FinleyAdapterException(msgPrefix+"add_var(FaceElements_Nodes)");
-        if (! (ids->put(&mesh->FaceElements->Nodes[0], num_FaceElements, num_FaceElements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(FaceElements_Nodes)");
-    }
-
-    // // // // // Contact_Elements // // // // //
-    if (num_ContactElements > 0) {
-
-        // ContactElements_Id
-        if (! ( ids = dataFile.add_var("ContactElements_Id", ncIdxType, ncdims[5])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(ContactElements_Id)");
-        if (! (ids->put(&mesh->ContactElements->Id[0], num_ContactElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(ContactElements_Id)");
-
-        // ContactElements_Tag
-        if (! ( ids = dataFile.add_var("ContactElements_Tag", ncInt, ncdims[5])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(ContactElements_Tag)");
-        if (! (ids->put(&mesh->ContactElements->Tag[0], num_ContactElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(ContactElements_Tag)");
-
-        // ContactElements_Owner
-        if (! ( ids = dataFile.add_var("ContactElements_Owner", ncInt, ncdims[5])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(ContactElements_Owner)");
-        if (! (ids->put(&mesh->ContactElements->Owner[0], num_ContactElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(ContactElements_Owner)");
-
-        // ContactElements_Color
-        if (! ( ids = dataFile.add_var("ContactElements_Color", ncInt, ncdims[5])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(ContactElements_Color)");
-        if (! (ids->put(&mesh->ContactElements->Color[0], num_ContactElements)) )
-            throw FinleyAdapterException(msgPrefix+"put(ContactElements_Color)");
-
-        // ContactElements_Nodes
-        if (! ( ids = dataFile.add_var("ContactElements_Nodes", ncIdxType, ncdims[5], ncdims[9]) ) )
-            throw FinleyAdapterException(msgPrefix+"add_var(ContactElements_Nodes)");
-        if (! (ids->put(&mesh->ContactElements->Nodes[0], num_ContactElements, num_ContactElements_numNodes)) )
-            throw FinleyAdapterException(msgPrefix+"put(ContactElements_Nodes)");
-    }
-
-    // // // // // Points // // // // //
-    if (num_Points > 0) {
-        // Points_Id
-        if (! ( ids = dataFile.add_var("Points_Id", ncIdxType, ncdims[6])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Points_Id)");
-        if (! (ids->put(&mesh->Points->Id[0], num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"put(Points_Id)");
-
-        // Points_Tag
-        if (! ( ids = dataFile.add_var("Points_Tag", ncInt, ncdims[6])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Points_Tag)");
-        if (! (ids->put(&mesh->Points->Tag[0], num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"put(Points_Tag)");
-
-        // Points_Owner
-        if (! ( ids = dataFile.add_var("Points_Owner", ncInt, ncdims[6])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Points_Owner)");
-        if (! (ids->put(&mesh->Points->Owner[0], num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"put(Points_Owner)");
-
-        // Points_Color
-        if (! ( ids = dataFile.add_var("Points_Color", ncInt, ncdims[6])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Points_Color)");
-        if (! (ids->put(&mesh->Points->Color[0], num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"put(Points_Color)");
-
-        // Points_Nodes
-        // mesh->Nodes->Id[mesh->Points->Nodes[INDEX2(0,i,1)]]
-        if (! ( ids = dataFile.add_var("Points_Nodes", ncIdxType, ncdims[6]) ) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Points_Nodes)");
-        if (! (ids->put(&(mesh->Points->Nodes[0]), num_Points)) )
-            throw FinleyAdapterException(msgPrefix+"put(Points_Nodes)");
-    }
-
-    // // // // // TagMap // // // // //
-    if (num_Tags>0) {
-        // Temp storage to gather node IDs
-        vector<int> Tags_keys;
-
-        // Copy tag data into temp arrays
-        TagMap::const_iterator it;
-        for (it=mesh->tagMap.begin(); it!=mesh->tagMap.end(); it++) {
-            Tags_keys.push_back(it->second);
-        }
-
-        // Tags_keys
-        if (! (ids = dataFile.add_var("Tags_keys", ncInt, ncdims[10])) )
-            throw FinleyAdapterException(msgPrefix+"add_var(Tags_keys)");
-        int_ptr = &Tags_keys[0];
-        if (! (ids->put(int_ptr, num_Tags)) )
-            throw FinleyAdapterException(msgPrefix+"put(Tags_keys)");
-
-        // Tags_names_*
-        // This is an array of strings, it should be stored as an array but
-        // instead I have hacked in one attribute per string because the NetCDF
-        // manual doesn't tell how to do an array of strings
-        int i = 0;
-        for (it=mesh->tagMap.begin(); it!=mesh->tagMap.end(); it++, i++) {
-            stringstream tagnamestream;
-            tagnamestream << "Tags_name_" << i;
-            const string tagname = tagnamestream.str();
-            if (!dataFile.add_att(tagname.c_str(), it->first.c_str()))
-                throw FinleyAdapterException(msgPrefix+"add_att(Tags_names_X)");
-        }
-    }
-
-    // Send token to next MPI process so he can take his turn
-#ifdef ESYS_MPI
-    if (mpi_rank<mpi_size-1) {
-        MPI_Send(&num_Tags, 0, MPI_INT, mpi_rank+1, 81800, mesh->MPIInfo->comm);
-    }
-#endif
-
-   // NetCDF file is closed by destructor of NcFile object
-
-#else // USE_NETCDF
-    setError(IO_ERROR, "MeshAdapter::dump: not configured with netCDF. Please contact your installation manager.");
-#endif // USE_NETCDF
-    checkFinleyError();
-}
-
-string MeshAdapter::getDescription() const
-{
-    return "FinleyMesh";
-}
-
-string MeshAdapter::functionSpaceTypeAsString(int functionSpaceType) const
-{
-    FunctionSpaceNamesMapType::iterator loc;
-    loc=m_functionSpaceTypeNames.find(functionSpaceType);
-    if (loc==m_functionSpaceTypeNames.end()) {
-        return "Invalid function space type code.";
-    } else {
-        return loc->second;
-    }
-}
-
-bool MeshAdapter::isValidFunctionSpaceType(int functionSpaceType) const
-{
-    FunctionSpaceNamesMapType::iterator loc;
-    loc=m_functionSpaceTypeNames.find(functionSpaceType);
-    return (loc!=m_functionSpaceTypeNames.end());
-}
-
-void MeshAdapter::setFunctionSpaceTypeNames()
-{
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                DegreesOfFreedom,"Finley_DegreesOfFreedom [Solution(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedDegreesOfFreedom,"Finley_ReducedDegreesOfFreedom [ReducedSolution(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                Nodes,"Finley_Nodes [ContinuousFunction(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedNodes,"Finley_Reduced_Nodes [ReducedContinuousFunction(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                Elements,"Finley_Elements [Function(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedElements,"Finley_Reduced_Elements [ReducedFunction(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                FaceElements,"Finley_Face_Elements [FunctionOnBoundary(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedFaceElements,"Finley_Reduced_Face_Elements [ReducedFunctionOnBoundary(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                Points,"Finley_Points [DiracDeltaFunctions(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ContactElementsZero,"Finley_Contact_Elements_0 [FunctionOnContactZero(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedContactElementsZero,"Finley_Reduced_Contact_Elements_0 [ReducedFunctionOnContactZero(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ContactElementsOne,"Finley_Contact_Elements_1 [FunctionOnContactOne(domain)]"));
-    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
-                ReducedContactElementsOne,"Finley_Reduced_Contact_Elements_1 [ReducedFunctionOnContactOne(domain)]"));
-}
-
-int MeshAdapter::getContinuousFunctionCode() const
-{
-    return Nodes;
-}
-
-int MeshAdapter::getReducedContinuousFunctionCode() const
-{
-    return ReducedNodes;
-}
-
-int MeshAdapter::getFunctionCode() const
-{
-    return Elements;
-}
-
-int MeshAdapter::getReducedFunctionCode() const
-{
-    return ReducedElements;
-}
-
-int MeshAdapter::getFunctionOnBoundaryCode() const
-{
-    return FaceElements;
-}
-
-int MeshAdapter::getReducedFunctionOnBoundaryCode() const
-{
-    return ReducedFaceElements;
-}
-
-int MeshAdapter::getFunctionOnContactZeroCode() const
-{
-    return ContactElementsZero;
-}
-
-int MeshAdapter::getReducedFunctionOnContactZeroCode() const
-{
-    return ReducedContactElementsZero;
-}
-
-int MeshAdapter::getFunctionOnContactOneCode() const
-{
-    return ContactElementsOne;
-}
-
-int MeshAdapter::getReducedFunctionOnContactOneCode() const
-{
-    return ReducedContactElementsOne;
-}
-
-int MeshAdapter::getSolutionCode() const
-{
-    return DegreesOfFreedom;
-}
-
-int MeshAdapter::getReducedSolutionCode() const
-{
-    return ReducedDegreesOfFreedom;
-}
-
-int MeshAdapter::getDiracDeltaFunctionsCode() const
-{
-    return Points;
-}
-
-//
-// return the spatial dimension of the Mesh:
-//
-int MeshAdapter::getDim() const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    return mesh->getDim();
-}
-
-//
-// Return the number of data points summed across all MPI processes
-//
-dim_t MeshAdapter::getNumDataPointsGlobal() const
-{
-    return m_finleyMesh.get()->Nodes->getGlobalNumNodes();
-}
-
-//
-// return the number of data points per sample and the number of samples
-// needed to represent data on a parts of the mesh.
-//
-pair<int,dim_t> MeshAdapter::getDataShape(int functionSpaceCode) const
-{
-    int numDataPointsPerSample=0;
-    dim_t numSamples=0;
-    Mesh* mesh=m_finleyMesh.get();
-    switch (functionSpaceCode) {
-        case Nodes:
-            numDataPointsPerSample=1;
-            numSamples=mesh->Nodes->getNumNodes();
-            break;
-        case ReducedNodes:
-            numDataPointsPerSample=1;
-            numSamples=mesh->Nodes->getNumReducedNodes();
-            break;
-        case Elements:
-            if (mesh->Elements!=NULL) {
-                numSamples=mesh->Elements->numElements;
-                numDataPointsPerSample=mesh->Elements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
-            }
-            break;
-        case ReducedElements:
-            if (mesh->Elements!=NULL) {
-                numSamples=mesh->Elements->numElements;
-                numDataPointsPerSample=mesh->Elements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
-            }
-            break;
-        case FaceElements:
-            if (mesh->FaceElements!=NULL) {
-                numDataPointsPerSample=mesh->FaceElements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
-                numSamples=mesh->FaceElements->numElements;
-            }
-            break;
-        case ReducedFaceElements:
-            if (mesh->FaceElements!=NULL) {
-                numDataPointsPerSample=mesh->FaceElements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
-                numSamples=mesh->FaceElements->numElements;
-            }
-            break;
-        case Points:
-            if (mesh->Points!=NULL) {
-                numDataPointsPerSample=1;
-                numSamples=mesh->Points->numElements;
-            }
-            break;
-        case ContactElementsZero:
-            if (mesh->ContactElements!=NULL) {
-                numDataPointsPerSample=mesh->ContactElements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
-                numSamples=mesh->ContactElements->numElements;
-            }
-            break;
-        case ReducedContactElementsZero:
-            if (mesh->ContactElements!=NULL) {
-                numDataPointsPerSample=mesh->ContactElements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
-                numSamples=mesh->ContactElements->numElements;
-            }
-            break;
-        case ContactElementsOne:
-            if (mesh->ContactElements!=NULL) {
-                numDataPointsPerSample=mesh->ContactElements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
-                numSamples=mesh->ContactElements->numElements;
-            }
-            break;
-        case ReducedContactElementsOne:
-            if (mesh->ContactElements!=NULL) {
-                numDataPointsPerSample=mesh->ContactElements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
-                numSamples=mesh->ContactElements->numElements;
-            }
-            break;
-        case DegreesOfFreedom:
-            if (mesh->Nodes!=NULL) {
-                numDataPointsPerSample=1;
-                numSamples=mesh->Nodes->getNumDegreesOfFreedom();
-            }
-            break;
-        case ReducedDegreesOfFreedom:
-            if (mesh->Nodes!=NULL) {
-                numDataPointsPerSample=1;
-                numSamples=mesh->Nodes->getNumReducedDegreesOfFreedom();
-            }
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Invalid function space type: " << functionSpaceCode << " for domain: " << getDescription();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    return pair<int,int>(numDataPointsPerSample,numSamples);
-}
-
-//
-// adds linear PDE of second order into a given stiffness matrix and right
-// hand side:
-//
-void MeshAdapter::addPDEToSystem(
-        escript::AbstractSystemMatrix& mat, escript::Data& rhs,
-        const escript::Data& A, const escript::Data& B, const escript::Data& C,
-        const escript::Data& D, const escript::Data& X, const escript::Data& Y,
-        const escript::Data& d, const escript::Data& y,
-        const escript::Data& d_contact, const escript::Data& y_contact,
-        const escript::Data& d_dirac, const escript::Data& y_dirac) const
-{
-    SystemMatrixAdapter* smat=dynamic_cast<SystemMatrixAdapter*>(&mat);
-    if (!smat)
-        throw FinleyAdapterException("finley only supports Paso system matrices.");
-
-    Mesh* mesh=m_finleyMesh.get();
-    paso::SystemMatrix_ptr S(smat->getPaso_SystemMatrix());
-    Assemble_PDE(mesh->Nodes, mesh->Elements, S, rhs, A, B, C, D, X, Y);
-    checkFinleyError();
-
-
-    Assemble_PDE(mesh->Nodes, mesh->FaceElements, S, rhs,
-            escript::Data(), escript::Data(), escript::Data(), d,
-            escript::Data(), y);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->ContactElements, S, rhs,
-            escript::Data(), escript::Data(), escript::Data(), d_contact,
-            escript::Data(), y_contact);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->Points, S, rhs, escript::Data(),
-            escript::Data(), escript::Data(), d_dirac, escript::Data(), y_dirac);
-    checkFinleyError();
-}
-
-void MeshAdapter::addPDEToLumpedSystem(escript::Data& mat,
-        const escript::Data& D, const escript::Data& d,
-        const escript::Data& d_dirac, const bool useHRZ) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    Assemble_LumpedSystem(mesh->Nodes, mesh->Elements, mat, D, useHRZ);
-    checkFinleyError();
-
-    Assemble_LumpedSystem(mesh->Nodes, mesh->FaceElements, mat, d, useHRZ);
-    checkFinleyError();
-
-    Assemble_LumpedSystem(mesh->Nodes, mesh->Points, mat, d_dirac, useHRZ);
-    checkFinleyError();
-}
-
-//
-// adds linear PDE of second order into the right hand side only
-//
-void MeshAdapter::addPDEToRHS(escript::Data& rhs, const escript::Data& X,
-        const escript::Data& Y, const escript::Data& y,
-        const escript::Data& y_contact, const escript::Data& y_dirac) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    Assemble_PDE(mesh->Nodes, mesh->Elements, paso::SystemMatrix_ptr(), rhs,
-            escript::Data(), escript::Data(), escript::Data(), escript::Data(),
-            X, Y);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->FaceElements, paso::SystemMatrix_ptr(),
-            rhs, escript::Data(), escript::Data(), escript::Data(),
-            escript::Data(), escript::Data(), y);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->ContactElements, paso::SystemMatrix_ptr(),
-            rhs, escript::Data(), escript::Data(), escript::Data(),
-            escript::Data(), escript::Data(), y_contact);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->Points, paso::SystemMatrix_ptr(), rhs,
-            escript::Data(), escript::Data(), escript::Data(), escript::Data(),
-            escript::Data(), y_dirac);
-    checkFinleyError();
-}
-
-//
-// adds PDE of second order into a transport problem
-//
-void MeshAdapter::addPDEToTransportProblem(
-        escript::AbstractTransportProblem& tp, escript::Data& source,
-        const escript::Data& M, const escript::Data& A, const escript::Data& B,
-        const escript::Data& C, const escript::Data& D, const escript::Data& X,
-        const escript::Data& Y, const escript::Data& d, const escript::Data& y,
-        const escript::Data& d_contact, const escript::Data& y_contact,
-        const escript::Data& d_dirac, const escript::Data& y_dirac) const
-{
-    TransportProblemAdapter* tpa=dynamic_cast<TransportProblemAdapter*>(&tp);
-    if (!tpa)
-        throw FinleyAdapterException("finley only supports Paso transport problems.");
-
-    source.expand();
-
-    Mesh* mesh=m_finleyMesh.get();
-    paso::TransportProblem_ptr _tp(tpa->getPaso_TransportProblem());
-
-    Assemble_PDE(mesh->Nodes, mesh->Elements, _tp->mass_matrix, source,
-                        escript::Data(), escript::Data(), escript::Data(),
-                        M, escript::Data(), escript::Data());
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->Elements, _tp->transport_matrix,
-                        source, A, B, C, D, X, Y);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->FaceElements, _tp->transport_matrix,
-                        source, escript::Data(), escript::Data(),
-                        escript::Data(), d, escript::Data(), y);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->ContactElements,
-                        _tp->transport_matrix, source, escript::Data(),
-                        escript::Data(), escript::Data(), d_contact,
-                        escript::Data(), y_contact);
-    checkFinleyError();
-
-    Assemble_PDE(mesh->Nodes, mesh->Points, _tp->transport_matrix,
-                        source, escript::Data(), escript::Data(),
-                        escript::Data(), d_dirac, escript::Data(), y_dirac);
-    checkFinleyError();
-}
-
-//
-// interpolates data between different function spaces
-//
-void MeshAdapter::interpolateOnDomain(escript::Data& target, const escript::Data& in) const
-{
-    const MeshAdapter& inDomain=dynamic_cast<const MeshAdapter&>(*(in.getFunctionSpace().getDomain()));
-    const MeshAdapter& targetDomain=dynamic_cast<const MeshAdapter&>(*(target.getFunctionSpace().getDomain()));
-    if (inDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of interpolant.");
-    if (targetDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of interpolation target.");
-
-    Mesh* mesh=m_finleyMesh.get();
-    switch(in.getFunctionSpace().getTypeCode()) {
-        case Nodes:
-            switch(target.getFunctionSpace().getTypeCode()) {
-                case Nodes:
-                case ReducedNodes:
-                case DegreesOfFreedom:
-                case ReducedDegreesOfFreedom:
-                    Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    break;
-                case Elements:
-                case ReducedElements:
-                    Assemble_interpolate(mesh->Nodes, mesh->Elements, in,target);
-                    break;
-                case FaceElements:
-                case ReducedFaceElements:
-                    Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in, target);
-                    break;
-                case Points:
-                    Assemble_interpolate(mesh->Nodes, mesh->Points, in, target);
-                    break;
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in, target);
-                    break;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation on Domain: Finley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-                    throw FinleyAdapterException(temp.str());
-                    break;
-            }
-            break;
-        case ReducedNodes:
-            switch(target.getFunctionSpace().getTypeCode()) {
-                case Nodes:
-                case ReducedNodes:
-                case DegreesOfFreedom:
-                case ReducedDegreesOfFreedom:
-                    Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    break;
-                case Elements:
-                case ReducedElements:
-                    Assemble_interpolate(mesh->Nodes, mesh->Elements, in, target);
-                    break;
-                case FaceElements:
-                case ReducedFaceElements:
-                    Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in, target);
-                    break;
-                case Points:
-                    Assemble_interpolate(mesh->Nodes, mesh->Points, in, target);
-                    break;
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in, target);
-                    break;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation on Domain: Finley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-                    throw FinleyAdapterException(temp.str());
-                    break;
-            }
-            break;
-        case Elements:
-            if (target.getFunctionSpace().getTypeCode()==Elements) {
-                Assemble_CopyElementData(mesh->Elements, target, in);
-            } else if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
-                Assemble_AverageElementData(mesh->Elements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on elements possible.");
-            }
-            break;
-        case ReducedElements:
-            if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
-                Assemble_CopyElementData(mesh->Elements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on elements with reduced integration order possible.");
-            }
-            break;
-        case FaceElements:
-            if (target.getFunctionSpace().getTypeCode()==FaceElements) {
-                Assemble_CopyElementData(mesh->FaceElements, target, in);
-            } else if (target.getFunctionSpace().getTypeCode()==ReducedFaceElements) {
-                Assemble_AverageElementData(mesh->FaceElements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on face elements possible.");
-            }
-            break;
-        case ReducedFaceElements:
-            if (target.getFunctionSpace().getTypeCode()==ReducedFaceElements) {
-                Assemble_CopyElementData(mesh->FaceElements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on face elements with reduced integration order possible.");
-            }
-            break;
-        case Points:
-            if (target.getFunctionSpace().getTypeCode()==Points) {
-                Assemble_CopyElementData(mesh->Points, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on points possible.");
-            }
-            break;
-        case ContactElementsZero:
-        case ContactElementsOne:
-            if (target.getFunctionSpace().getTypeCode()==ContactElementsZero || target.getFunctionSpace().getTypeCode()==ContactElementsOne) {
-                Assemble_CopyElementData(mesh->ContactElements, target, in);
-            } else if (target.getFunctionSpace().getTypeCode()==ReducedContactElementsZero || target.getFunctionSpace().getTypeCode()==ReducedContactElementsOne) {
-                Assemble_AverageElementData(mesh->ContactElements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on contact elements possible.");
-            }
-            break;
-        case ReducedContactElementsZero:
-        case ReducedContactElementsOne:
-            if (target.getFunctionSpace().getTypeCode()==ReducedContactElementsZero || target.getFunctionSpace().getTypeCode()==ReducedContactElementsOne) {
-                Assemble_CopyElementData(mesh->ContactElements, target, in);
-            } else {
-                throw FinleyAdapterException("Error - No interpolation with data on contact elements with reduced integration order possible.");
-            }
-            break;
-        case DegreesOfFreedom:
-            switch(target.getFunctionSpace().getTypeCode()) {
-                case ReducedDegreesOfFreedom:
-                case DegreesOfFreedom:
-                    Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    break;
-
-                case Nodes:
-                case ReducedNodes:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in);
-                        in2.expand();
-                        Assemble_CopyNodalData(mesh->Nodes, target, in2);
-                    } else {
-                        Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    }
-                    break;
-                case Elements:
-                case ReducedElements:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, continuousFunction(*this));
-                        Assemble_interpolate(mesh->Nodes, mesh->Elements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->Elements, in, target);
-                    }
-                    break;
-                case FaceElements:
-                case ReducedFaceElements:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, continuousFunction(*this));
-                        Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in, target);
-                    }
-                    break;
-                case Points:
-                    if (getMPISize()>1) {
-                        //escript::Data in2=escript::Data(in, continuousFunction(*this) );
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->Points, in, target);
-                    }
-                    break;
-                case ContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsZero:
-                case ReducedContactElementsOne:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, continuousFunction(*this));
-                        Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in, target);
-                    }
-                    break;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-                    throw FinleyAdapterException(temp.str());
-            }
-            break;
-        case ReducedDegreesOfFreedom:
-            switch(target.getFunctionSpace().getTypeCode()) {
-                case Nodes:
-                    throw FinleyAdapterException("Error - Finley does not support interpolation from reduced degrees of freedom to mesh nodes.");
-                case ReducedNodes:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in);
-                        in2.expand();
-                        Assemble_CopyNodalData(mesh->Nodes, target, in2);
-                    } else {
-                        Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    }
-                    break;
-                case DegreesOfFreedom:
-                    throw FinleyAdapterException("Error - Finley does not support interpolation from reduced degrees of freedom to degrees of freedom");
-                    break;
-                case ReducedDegreesOfFreedom:
-                    Assemble_CopyNodalData(mesh->Nodes, target, in);
-                    break;
-                case Elements:
-                case ReducedElements:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, reducedContinuousFunction(*this) );
-                        Assemble_interpolate(mesh->Nodes, mesh->Elements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->Elements, in, target);
-                    }
-                    break;
-                case FaceElements:
-                case ReducedFaceElements:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, reducedContinuousFunction(*this) );
-                        Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->FaceElements, in, target);
-                    }
-                    break;
-                case Points:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, reducedContinuousFunction(*this));
-                        Assemble_interpolate(mesh->Nodes, mesh->Points, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->Points, in, target);
-                    }
-                    break;
-                case ContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsZero:
-                case ReducedContactElementsOne:
-                    if (getMPISize()>1) {
-                        escript::Data in2=escript::Data(in, reducedContinuousFunction(*this));
-                        Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in2, target);
-                    } else {
-                        Assemble_interpolate(mesh->Nodes, mesh->ContactElements, in, target);
-                    }
-                    break;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
-                    throw FinleyAdapterException(temp.str());
-                    break;
-            }
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Interpolation On Domain: Finley does not know anything about function space type %d" << in.getFunctionSpace().getTypeCode();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    checkFinleyError();
-}
-
-//
-// copies the locations of sample points into x
-//
-void MeshAdapter::setToX(escript::Data& arg) const
-{
-    const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-    if (argDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of data point locations");
-    Mesh* mesh=m_finleyMesh.get();
-    // in case of appropriate function space we can do the job directly:
-    if (arg.getFunctionSpace().getTypeCode()==Nodes) {
-        Assemble_NodeCoordinates(mesh->Nodes, arg);
-    } else {
-        escript::Data tmp_data=Vector(0., continuousFunction(*this), true);
-        Assemble_NodeCoordinates(mesh->Nodes, tmp_data);
-        // this is then interpolated onto arg:
-        interpolateOnDomain(arg, tmp_data);
-    }
-    checkFinleyError();
-}
-
-//
-// return the normal vectors at the location of data points as a Data object
-//
-void MeshAdapter::setToNormal(escript::Data& normal) const
-{
-    const MeshAdapter& normalDomain=dynamic_cast<const MeshAdapter&>(*(normal.getFunctionSpace().getDomain()));
-    if (normalDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of normal locations");
-    Mesh* mesh=m_finleyMesh.get();
-    switch(normal.getFunctionSpace().getTypeCode()) {
-        case Nodes:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for nodes");
-            break;
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for reduced nodes");
-            break;
-        case Elements:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for elements");
-            break;
-        case ReducedElements:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for elements with reduced integration order");
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            Assemble_getNormal(mesh->Nodes, mesh->FaceElements, normal);
-            break;
-        case Points:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for point elements");
-            break;
-        case ContactElementsOne:
-        case ContactElementsZero:
-        case ReducedContactElementsOne:
-        case ReducedContactElementsZero:
-            Assemble_getNormal(mesh->Nodes, mesh->ContactElements, normal);
-            break;
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for degrees of freedom.");
-            break;
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - Finley does not support surface normal vectors for reduced degrees of freedom.");
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Normal Vectors: Finley does not know anything about function space type " << normal.getFunctionSpace().getTypeCode();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    checkFinleyError();
-}
-
-//
-// interpolates data to other domain
-//
-void MeshAdapter::interpolateAcross(escript::Data& target, const escript::Data& source) const
-{
-    throw FinleyAdapterException("Error - Finley does not allow interpolation across domains.");
-}
-
-//
-// calculates the integral of a function defined on arg
-//
-void MeshAdapter::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
-{
-    const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-    if (argDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of integration kernel");
-
-    double blocktimer_start = blocktimer_time();
-    Mesh* mesh=m_finleyMesh.get();
-    switch(arg.getFunctionSpace().getTypeCode()) {
-        case Nodes:
-            {
-                escript::Data temp(arg, escript::function(*this));
-                Assemble_integrate(mesh->Nodes, mesh->Elements, temp, &integrals[0]);
-            }
-            break;
-        case ReducedNodes:
-            {
-                escript::Data temp(arg, escript::function(*this));
-                Assemble_integrate(mesh->Nodes, mesh->Elements, temp, &integrals[0]);
-            }
-            break;
-        case Elements:
-        case ReducedElements:
-            Assemble_integrate(mesh->Nodes, mesh->Elements, arg, &integrals[0]);
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            Assemble_integrate(mesh->Nodes, mesh->FaceElements, arg, &integrals[0]);
-            break;
-        case Points:
-            throw FinleyAdapterException("Error - Integral of data on points is not supported.");
-            break;
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            Assemble_integrate(mesh->Nodes, mesh->ContactElements, arg, &integrals[0]);
-            break;
-        case DegreesOfFreedom:
-        case ReducedDegreesOfFreedom:
-            {
-                escript::Data temp(arg, escript::function(*this));
-                Assemble_integrate(mesh->Nodes, mesh->Elements, temp, &integrals[0]);
-            }
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Integrals: Finley does not know anything about function space type " << arg.getFunctionSpace().getTypeCode();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    checkFinleyError();
-    blocktimer_increment("integrate()", blocktimer_start);
-}
-
-//
-// calculates the gradient of arg
-//
-void MeshAdapter::setToGradient(escript::Data& grad, const escript::Data& arg) const
-{
-    const MeshAdapter& argDomain=dynamic_cast<const MeshAdapter&>(*(arg.getFunctionSpace().getDomain()));
-    if (argDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of gradient argument");
-    const MeshAdapter& gradDomain=dynamic_cast<const MeshAdapter&>(*(grad.getFunctionSpace().getDomain()));
-    if (gradDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of gradient");
-
-    Mesh* mesh=m_finleyMesh.get();
-    escript::Data nodeData;
-    if (getMPISize()>1) {
-        if (arg.getFunctionSpace().getTypeCode() == DegreesOfFreedom) {
-            nodeData=escript::Data(arg, continuousFunction(*this));
-        } else if(arg.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
-            nodeData=escript::Data(arg, reducedContinuousFunction(*this));
-        } else {
-            nodeData = arg;
-        }
-    } else {
-        nodeData = arg;
-    }
-    switch(grad.getFunctionSpace().getTypeCode()) {
-        case Nodes:
-            throw FinleyAdapterException("Error - Gradient at nodes is not supported.");
-            break;
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - Gradient at reduced nodes is not supported.");
-            break;
-        case Elements:
-        case ReducedElements:
-            Assemble_gradient(mesh->Nodes, mesh->Elements, grad, nodeData);
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            Assemble_gradient(mesh->Nodes, mesh->FaceElements, grad, nodeData);
-            break;
-        case Points:
-            throw FinleyAdapterException("Error - Gradient at points is not supported.");
-            break;
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            Assemble_gradient(mesh->Nodes, mesh->ContactElements, grad, nodeData);
-            break;
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - Gradient at degrees of freedom is not supported.");
-            break;
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - Gradient at reduced degrees of freedom is not supported.");
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Gradient: Finley does not know anything about function space type " << arg.getFunctionSpace().getTypeCode();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    checkFinleyError();
-}
-
-//
-// returns the size of elements
-//
-void MeshAdapter::setToSize(escript::Data& size) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    switch(size.getFunctionSpace().getTypeCode()) {
-        case Nodes:
-            throw FinleyAdapterException("Error - Size of nodes is not supported.");
-            break;
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - Size of reduced nodes is not supported.");
-            break;
-        case Elements:
-        case ReducedElements:
-            Assemble_getSize(mesh->Nodes, mesh->Elements, size);
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            Assemble_getSize(mesh->Nodes, mesh->FaceElements, size);
-            break;
-        case Points:
-            throw FinleyAdapterException("Error - Size of point elements is not supported.");
-            break;
-        case ContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsZero:
-        case ReducedContactElementsOne:
-            Assemble_getSize(mesh->Nodes,mesh->ContactElements,size);
-            break;
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - Size of degrees of freedom is not supported.");
-            break;
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - Size of reduced degrees of freedom is not supported.");
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Element size: Finley does not know anything about function space type " << size.getFunctionSpace().getTypeCode();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    checkFinleyError();
-}
-
-//
-// sets the location of nodes
-//
-void MeshAdapter::setNewX(const escript::Data& new_x)
-{
-    Mesh* mesh=m_finleyMesh.get();
-    const MeshAdapter& newDomain=dynamic_cast<const MeshAdapter&>(*(new_x.getFunctionSpace().getDomain()));
-    if (newDomain!=*this)
-        throw FinleyAdapterException("Error - Illegal domain of new point locations");
-    if (new_x.getFunctionSpace() == continuousFunction(*this)) {
-        mesh->setCoordinates(new_x);
-    } else {
-        throw FinleyAdapterException("As of escript version 3.3 SetX() only accepts ContinuousFunction arguments. Please interpolate.");
-    }
-    checkFinleyError();
-}
-
-bool MeshAdapter::ownSample(int fs_code, index_t id) const
-{
-    if (getMPISize() > 1 && fs_code != FINLEY_DEGREES_OF_FREEDOM &&
-            fs_code != FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
-#ifdef ESYS_MPI
-        index_t myFirstNode=0, myLastNode=0;
-        index_t* globalNodeIndex = NULL;
-        Mesh* mesh_p=m_finleyMesh.get();
-        /*
-         * this method is only used by saveDataCSV which would use the returned
-         * values for reduced nodes wrongly so this case is disabled for now
-        if (fs_code == FINLEY_REDUCED_NODES) {
-            myFirstNode = NodeFile_getFirstReducedNode(mesh_p->Nodes);
-            myLastNode = NodeFile_getLastReducedNode(mesh_p->Nodes);
-            globalNodeIndex = NodeFile_borrowGlobalReducedNodesIndex(mesh_p->Nodes);
-        } else
-        */
-        if (fs_code == FINLEY_NODES) {
-            myFirstNode = mesh_p->Nodes->getFirstNode();
-            myLastNode = mesh_p->Nodes->getLastNode();
-            globalNodeIndex = mesh_p->Nodes->borrowGlobalNodesIndex();
-        } else {
-            throw FinleyAdapterException("Unsupported function space type for ownSample()");
-        }
-
-        const index_t k = globalNodeIndex[id];
-        return (myFirstNode <= k && k < myLastNode);
-#endif
-    }
-    return true;
-}
-
-
-//
-// creates a SystemMatrixAdapter stiffness matrix an initializes it with zeros
-//
-escript::ASM_ptr MeshAdapter::newSystemMatrix(const int row_blocksize,
-                            const escript::FunctionSpace& row_functionspace,
-                            const int column_blocksize,
-                            const escript::FunctionSpace& column_functionspace,
-                            const int type) const
-{
-    // is the domain right?
-    const MeshAdapter& row_domain=dynamic_cast<const MeshAdapter&>(*(row_functionspace.getDomain()));
-    if (row_domain!=*this)
-        throw FinleyAdapterException("Error - domain of row function space does not match the domain of matrix generator.");
-    const MeshAdapter& col_domain=dynamic_cast<const MeshAdapter&>(*(column_functionspace.getDomain()));
-    if (col_domain!=*this)
-        throw FinleyAdapterException("Error - domain of column function space does not match the domain of matrix generator.");
-
-    int reduceRowOrder=0;
-    int reduceColOrder=0;
-    // is the function space type right?
-    if (row_functionspace.getTypeCode() == ReducedDegreesOfFreedom) {
-        reduceRowOrder=1;
-    } else if (row_functionspace.getTypeCode() != DegreesOfFreedom) {
-        throw FinleyAdapterException("Error - illegal function space type for system matrix rows.");
-    }
-    if (column_functionspace.getTypeCode() == ReducedDegreesOfFreedom) {
-        reduceColOrder=1;
-    } else if (column_functionspace.getTypeCode() != DegreesOfFreedom) {
-        throw FinleyAdapterException("Error - illegal function space type for system matrix columns.");
-    }
-
-    // generate matrix:
-    paso::SystemMatrixPattern_ptr pattern = getFinley_Mesh()->getPattern(
-            reduceRowOrder, reduceColOrder);
-    checkFinleyError();
-    paso::SystemMatrix_ptr fsystemMatrix;
-    const int trilinos = 0;
-    if (trilinos) {
-#ifdef TRILINOS
-        // FIXME: Allocation Epetra_VrbMatrix here...
-#endif
-    } else {
-        fsystemMatrix.reset(new paso::SystemMatrix(type, pattern,
-                            row_blocksize, column_blocksize, false));
-    }
-    checkPasoError();
-    SystemMatrixAdapter* sma=new SystemMatrixAdapter(fsystemMatrix, row_blocksize, row_functionspace, column_blocksize, column_functionspace);
-    return escript::ASM_ptr(sma);
-}
-
-//
-// creates a TransportProblemAdapter
-//
-escript::ATP_ptr MeshAdapter::newTransportProblem(const int blocksize,
-        const escript::FunctionSpace& functionspace, const int type) const
-{
-    // is the domain right?
-    const MeshAdapter& domain=dynamic_cast<const MeshAdapter&>(*(functionspace.getDomain()));
-    if (domain!=*this)
-        throw FinleyAdapterException("Error - domain of function space does not match the domain of transport problem generator.");
-
-    // is the function space type right?
-    int reduceOrder=0;
-    if (functionspace.getTypeCode() == ReducedDegreesOfFreedom) {
-        reduceOrder=1;
-    } else if (functionspace.getTypeCode() != DegreesOfFreedom) {
-        throw FinleyAdapterException("Error - illegal function space type for transport problem.");
-    }
-
-    // generate transport problem:
-    paso::SystemMatrixPattern_ptr pattern = getFinley_Mesh()->getPattern(
-            reduceOrder, reduceOrder);
-    checkFinleyError();
-    paso::TransportProblem_ptr transportProblem(new paso::TransportProblem(
-                                                pattern, blocksize));
-    checkPasoError();
-    TransportProblemAdapter* tpa=new TransportProblemAdapter(
-            transportProblem, blocksize, functionspace);
-    return escript::ATP_ptr(tpa);
-}
-
-//
-// returns true if data on functionSpaceCode is considered as being cell centered
-//
-bool MeshAdapter::isCellOriented(int functionSpaceCode) const
-{
-    switch(functionSpaceCode) {
-        case Nodes:
-        case DegreesOfFreedom:
-        case ReducedDegreesOfFreedom:
-            return false;
-        case Elements:
-        case FaceElements:
-        case Points:
-        case ContactElementsZero:
-        case ContactElementsOne:
-        case ReducedElements:
-        case ReducedFaceElements:
-        case ReducedContactElementsZero:
-        case ReducedContactElementsOne:
-            return true;
-        default:
-            stringstream temp;
-            temp << "Error - Cell: Finley does not know anything about function space type " << functionSpaceCode;
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    return false;
-}
-
-bool
-MeshAdapter::commonFunctionSpace(const vector<int>& fs, int& resultcode) const
-{
-    /* The idea is to use equivalence classes. [Types which can be interpolated
-       back and forth]:
-        class 1: DOF <-> Nodes
-        class 2: ReducedDOF <-> ReducedNodes
-        class 3: Points
-        class 4: Elements
-        class 5: ReducedElements
-        class 6: FaceElements
-        class 7: ReducedFaceElements
-        class 8: ContactElementZero <-> ContactElementOne
-        class 9: ReducedContactElementZero <-> ReducedContactElementOne
-
-    There is also a set of lines. Interpolation is possible down a line but
-    not between lines.
-    class 1 and 2 belong to all lines so aren't considered.
-        line 0: class 3
-        line 1: class 4,5
-        line 2: class 6,7
-        line 3: class 8,9
-
-    For classes with multiple members (eg class 2) we have vars to record if
-    there is at least one instance.
-    e.g. hasnodes is true if we have at least one instance of Nodes.
-    */
-    if (fs.empty())
-        return false;
-
-    vector<int> hasclass(10);
-    vector<int> hasline(4);     
-    bool hasnodes=false;
-    bool hasrednodes=false;
-    bool hascez=false;
-    bool hasrcez=false;
-    for (int i=0;i<fs.size();++i) {
-        switch(fs[i]) {
-            case Nodes:
-                hasnodes=true; // fall through
-            case DegreesOfFreedom:
-                hasclass[1]=1;
-                break;
-            case ReducedNodes:
-                hasrednodes=true; // fall through
-            case ReducedDegreesOfFreedom:
-                hasclass[2]=1;
-                break;
-            case Points:
-                hasline[0]=1;
-                hasclass[3]=1;
-                break;
-            case Elements:
-                hasclass[4]=1;
-                hasline[1]=1;
-                break;
-            case ReducedElements:
-                hasclass[5]=1;
-                hasline[1]=1;
-                break;
-            case FaceElements:
-                hasclass[6]=1;
-                hasline[2]=1;
-                break;
-            case ReducedFaceElements:
-                hasclass[7]=1;
-                hasline[2]=1;
-                break;
-            case ContactElementsZero:
-                hascez=true; // fall through
-            case ContactElementsOne:
-                hasclass[8]=1;
-                hasline[3]=1;
-                break;
-            case ReducedContactElementsZero:
-                hasrcez=true; // fall through
-            case ReducedContactElementsOne:
-                hasclass[9]=1;
-                hasline[3]=1;
-                break;
-            default:
-                return false;
-        }
-    }
-    int totlines=hasline[0]+hasline[1]+hasline[2]+hasline[3];
-
-    // fail if we have more than one leaf group
-    if (totlines>1)
-        return false; // there are at least two branches we can't interpolate between
-    else if (totlines==1) {
-        if (hasline[0]==1)              // we have points
-            resultcode=Points;
-        else if (hasline[1]==1) {
-            if (hasclass[5]==1)
-                resultcode=ReducedElements;
-            else
-                resultcode=Elements;
-        } else if (hasline[2]==1) {
-            if (hasclass[7]==1)
-                resultcode=ReducedFaceElements;
-            else
-                resultcode=FaceElements;
-        } else {   // so we must be in line3
-            if (hasclass[9]==1) {
-                // need something from class 9
-                resultcode=(hasrcez ? ReducedContactElementsZero : ReducedContactElementsOne);
-            } else {
-                // something from class 8
-                resultcode=(hascez?ContactElementsZero:ContactElementsOne);
-            }
-        }
-    } else { // totlines==0
-        if (hasclass[2]==1) {
-            // something from class 2
-            resultcode=(hasrednodes ? ReducedNodes : ReducedDegreesOfFreedom);
-        } else { 
-            // something from class 1
-            resultcode=(hasnodes ? Nodes : DegreesOfFreedom);
-        }
-    }
-    return true;
-}
-
-bool MeshAdapter::probeInterpolationOnDomain(int functionSpaceType_source,
-                                             int functionSpaceType_target) const
-{
-    switch(functionSpaceType_source) {
-        case Nodes:
-            switch(functionSpaceType_target) {
-                case Nodes:
-                case ReducedNodes:
-                case ReducedDegreesOfFreedom:
-                case DegreesOfFreedom:
-                case Elements:
-                case ReducedElements:
-                case FaceElements:
-                case ReducedFaceElements:
-                case Points:
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    return true;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << functionSpaceType_target;
-                    throw FinleyAdapterException(temp.str());
-            }
-            break;
-        case ReducedNodes:
-            switch(functionSpaceType_target) {
-                case ReducedNodes:
-                case ReducedDegreesOfFreedom:
-                case Elements:
-                case ReducedElements:
-                case FaceElements:
-                case ReducedFaceElements:
-                case Points:
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    return true;
-                case Nodes:
-                case DegreesOfFreedom:
-                    return false;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << functionSpaceType_target;
-                    throw FinleyAdapterException(temp.str());
-            }
-            break;
-        case Elements:
-            if (functionSpaceType_target==Elements) {
-                return true;
-            } else if (functionSpaceType_target==ReducedElements) {
-                return true;
-            } else {
-                return false;
-            }
-        case ReducedElements:
-            if (functionSpaceType_target==ReducedElements) {
-                return true;
-            } else {
-                return false;
-            }
-        case FaceElements:
-            if (functionSpaceType_target==FaceElements) {
-                return true;
-            } else if (functionSpaceType_target==ReducedFaceElements) {
-                return true;
-            } else {
-                return false;
-            }
-        case ReducedFaceElements:
-            if (functionSpaceType_target==ReducedFaceElements) {
-                return true;
-            } else {
-                return false;
-            }
-        case Points:
-            if (functionSpaceType_target==Points) {
-                return true;
-            } else {
-                return false;
-            }
-        case ContactElementsZero:
-        case ContactElementsOne:
-            if (functionSpaceType_target==ContactElementsZero || functionSpaceType_target==ContactElementsOne) {
-                return true;
-            } else if (functionSpaceType_target==ReducedContactElementsZero || functionSpaceType_target==ReducedContactElementsOne) {
-                return true;
-            } else {
-                return false;
-            }
-        case ReducedContactElementsZero:
-        case ReducedContactElementsOne:
-            if (functionSpaceType_target==ReducedContactElementsZero || functionSpaceType_target==ReducedContactElementsOne) {
-                return true;
-            } else {
-                return false;
-            }
-        case DegreesOfFreedom:
-            switch(functionSpaceType_target) {
-                case ReducedDegreesOfFreedom:
-                case DegreesOfFreedom:
-                case Nodes:
-                case ReducedNodes:
-                case Elements:
-                case ReducedElements:
-                case Points:
-                case FaceElements:
-                case ReducedFaceElements:
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    return true;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << functionSpaceType_target;
-                    throw FinleyAdapterException(temp.str());
-            }
-            break;
-        case ReducedDegreesOfFreedom:
-            switch(functionSpaceType_target) {
-                case ReducedDegreesOfFreedom:
-                case ReducedNodes:
-                case Elements:
-                case ReducedElements:
-                case FaceElements:
-                case ReducedFaceElements:
-                case Points:
-                case ContactElementsZero:
-                case ReducedContactElementsZero:
-                case ContactElementsOne:
-                case ReducedContactElementsOne:
-                    return true;
-                case Nodes:
-                case DegreesOfFreedom:
-                    return false;
-                default:
-                    stringstream temp;
-                    temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << functionSpaceType_target;
-                    throw FinleyAdapterException(temp.str());
-            }
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Interpolation On Domain: Finley does not know anything about function space type " << functionSpaceType_source;
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    return false;
-}
-
-signed char MeshAdapter::preferredInterpolationOnDomain(int functionSpaceType_source, int functionSpaceType_target) const
-{
-    if (probeInterpolationOnDomain(functionSpaceType_source, functionSpaceType_target))
-        return 1;
-
-    if (probeInterpolationOnDomain(functionSpaceType_target, functionSpaceType_source))
-        return -1;
-
-    return 0;
-}
-
-bool MeshAdapter::probeInterpolationAcross(int functionSpaceType_source,
-        const escript::AbstractDomain& targetDomain,
-        int functionSpaceType_target) const
-{
-    return false;
-}
-
-bool MeshAdapter::operator==(const escript::AbstractDomain& other) const
-{
-    const MeshAdapter* temp=dynamic_cast<const MeshAdapter*>(&other);
-    if (temp) {
-        return (m_finleyMesh==temp->m_finleyMesh);
-    } else {
-        return false;
-    }
-}
-
-bool MeshAdapter::operator!=(const escript::AbstractDomain& other) const
-{
-    return !(operator==(other));
-}
-
-int MeshAdapter::getSystemMatrixTypeId(const boost::python::object& options) const
-{
-    const escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy>(options);
-
-    return SystemMatrixAdapter::getSystemMatrixTypeId(sb.getSolverMethod(),
-                sb.getPreconditioner(), sb.getPackage(), sb.isSymmetric(),
-                m_finleyMesh->MPIInfo);
-}
-
-int MeshAdapter::getTransportTypeId(int solver, int preconditioner, int package, bool symmetry) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    return TransportProblemAdapter::getTransportTypeId(solver, preconditioner,
-                package, symmetry, mesh->MPIInfo);
-}
-
-escript::Data MeshAdapter::getX() const
-{
-    return continuousFunction(*this).getX();
-}
-
-escript::Data MeshAdapter::getNormal() const
-{
-    return functionOnBoundary(*this).getNormal();
-}
-
-escript::Data MeshAdapter::getSize() const
-{
-    return escript::function(*this).getSize();
-}
-
-const index_t* MeshAdapter::borrowSampleReferenceIDs(int functionSpaceType) const
-{
-    index_t *out = NULL;
-    Mesh* mesh=m_finleyMesh.get();
-    switch (functionSpaceType) {
-        case Nodes:
-            out=mesh->Nodes->Id;
-            break;
-        case ReducedNodes:
-            out=mesh->Nodes->reducedNodesId;
-            break;
-        case Elements:
-        case ReducedElements:
-            out=mesh->Elements->Id;
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            out=mesh->FaceElements->Id;
-            break;
-        case Points:
-            out=mesh->Points->Id;
-            break;
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            out=mesh->ContactElements->Id;
-            break;
-        case DegreesOfFreedom:
-            out=mesh->Nodes->degreesOfFreedomId;
-            break;
-        case ReducedDegreesOfFreedom:
-            out=mesh->Nodes->reducedDegreesOfFreedomId;
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Invalid function space type: " << functionSpaceType << " for domain: " << getDescription();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    return out;
-}
-int MeshAdapter::getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const
-{
-    int out=0;
-    Mesh* mesh=m_finleyMesh.get();
-    switch (functionSpaceType) {
-        case Nodes:
-            out=mesh->Nodes->Tag[sampleNo];
-            break;
-        case ReducedNodes:
-            throw FinleyAdapterException(" Error - ReducedNodes does not support tags.");
-            break;
-        case Elements:
-        case ReducedElements:
-            out=mesh->Elements->Tag[sampleNo];
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            out=mesh->FaceElements->Tag[sampleNo];
-            break;
-        case Points:
-            out=mesh->Points->Tag[sampleNo];
-            break;
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            out=mesh->ContactElements->Tag[sampleNo];
-            break;
-        case DegreesOfFreedom:
-            throw FinleyAdapterException(" Error - DegreesOfFreedom does not support tags.");
-            break;
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException(" Error - ReducedDegreesOfFreedom does not support tags.");
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Invalid function space type: " << functionSpaceType << " for domain: " << getDescription();
-            throw FinleyAdapterException(temp.str());
-            break;
-    }
-    return out;
-}
-
-
-void MeshAdapter::setTags(const int functionSpaceType, const int newTag, const escript::Data& mask) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    switch(functionSpaceType) {
-        case Nodes:
-            mesh->Nodes->setTags(newTag, mask);
-            break;
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - ReducedNodes does not support tags");
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - DegreesOfFreedom does not support tags");
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-        case Elements:
-        case ReducedElements:
-            mesh->Elements->setTags(newTag, mask);
-            break;
-        case FaceElements:
-        case ReducedFaceElements:
-            mesh->FaceElements->setTags(newTag, mask);
-            break;
-        case Points:
-            mesh->Points->setTags(newTag, mask);
-            break;
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            mesh->ContactElements->setTags(newTag, mask);
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Finley does not know anything about function space type " << functionSpaceType;
-            throw FinleyAdapterException(temp.str());
-    }
-    checkFinleyError();
-}
-
-void MeshAdapter::setTagMap(const string& name, int tag)
-{
-    Mesh* mesh=m_finleyMesh.get();
-    mesh->addTagMap(name.c_str(), tag);
-    checkFinleyError();
-}
-
-int MeshAdapter::getTag(const string& name) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    int tag = mesh->getTag(name.c_str());
-    checkFinleyError();
-    return tag;
-}
-
-bool MeshAdapter::isValidTagName(const string& name) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    return mesh->isValidTagName(name.c_str());
-}
-
-string MeshAdapter::showTagNames() const
-{
-    stringstream temp;
-    Mesh* mesh=m_finleyMesh.get();
-    TagMap::const_iterator it = mesh->tagMap.begin();
-    while (it != mesh->tagMap.end()) {
-        temp << it->first;
-        ++it;
-        if (it != mesh->tagMap.end())
-            temp << ", ";
-    }
-    return temp.str();
-}
-
-int MeshAdapter::getNumberOfTagsInUse(int functionSpaceCode) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    switch(functionSpaceCode) {
-        case Nodes:
-            return mesh->Nodes->tagsInUse.size();
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - ReducedNodes does not support tags");
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - DegreesOfFreedom does not support tags");
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-        case Elements:
-        case ReducedElements:
-            return mesh->Elements->tagsInUse.size();
-        case FaceElements:
-        case ReducedFaceElements:
-            return mesh->FaceElements->tagsInUse.size();
-        case Points:
-            return mesh->Points->tagsInUse.size();
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            return mesh->ContactElements->tagsInUse.size();
-        default:
-            stringstream ss;
-            ss << "Finley does not know anything about function space type "
-                 << functionSpaceCode;
-            throw FinleyAdapterException(ss.str());
-    }
-    return 0;
-}
-
-const int* MeshAdapter::borrowListOfTagsInUse(int functionSpaceCode) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    switch(functionSpaceCode) {
-        case Nodes:
-            if (mesh->Nodes->tagsInUse.empty())
-                return NULL;
-            else
-                return &mesh->Nodes->tagsInUse[0];
-        case ReducedNodes:
-            throw FinleyAdapterException("Error - ReducedNodes does not support tags");
-        case DegreesOfFreedom:
-            throw FinleyAdapterException("Error - DegreesOfFreedom does not support tags");
-        case ReducedDegreesOfFreedom:
-            throw FinleyAdapterException("Error - ReducedDegreesOfFreedom does not support tags");
-        case Elements:
-        case ReducedElements:
-            if (mesh->Elements->tagsInUse.empty())
-                return NULL;
-            else
-                return &mesh->Elements->tagsInUse[0];
-        case FaceElements:
-        case ReducedFaceElements:
-            if (mesh->FaceElements->tagsInUse.empty())
-                return NULL;
-            else
-                return &mesh->FaceElements->tagsInUse[0];
-        case Points:
-            if (mesh->Points->tagsInUse.empty())
-                return NULL;
-            else
-                return &mesh->Points->tagsInUse[0];
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            if (mesh->ContactElements->tagsInUse.empty())
-                return NULL;
-            else
-                return &mesh->ContactElements->tagsInUse[0];
-        default:
-            stringstream temp;
-            temp << "Error - Finley does not know anything about function space type " << functionSpaceCode;
-            throw FinleyAdapterException(temp.str());
-    }
-    return NULL;
-}
-
-
-bool MeshAdapter::canTag(int functionSpaceCode) const
-{
-    switch(functionSpaceCode) {
-        case Nodes:
-        case Elements:
-        case ReducedElements:
-        case FaceElements:
-        case ReducedFaceElements:
-        case Points:
-        case ContactElementsZero:
-        case ReducedContactElementsZero:
-        case ContactElementsOne:
-        case ReducedContactElementsOne:
-            return true;
-        default:
-            return false;
-    }
-}
-
-escript::AbstractDomain::StatusType MeshAdapter::getStatus() const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    return mesh->getStatus();
-}
-
-int MeshAdapter::getApproximationOrder(const int functionSpaceCode) const
-{
-    Mesh* mesh=m_finleyMesh.get();
-    int order =-1;
-    switch(functionSpaceCode) {
-        case Nodes:
-        case DegreesOfFreedom:
-            order=mesh->approximationOrder;
-            break;
-        case ReducedNodes:
-        case ReducedDegreesOfFreedom:
-            order=mesh->reducedApproximationOrder;
-            break;
-        case Elements:
-        case FaceElements:
-        case Points:
-        case ContactElementsZero:
-        case ContactElementsOne:
-            order=mesh->integrationOrder;
-            break;
-        case ReducedElements:
-        case ReducedFaceElements:
-        case ReducedContactElementsZero:
-        case ReducedContactElementsOne:
-            order=mesh->reducedIntegrationOrder;
-            break;
-        default:
-            stringstream temp;
-            temp << "Error - Finley does not know anything about function space type " << functionSpaceCode;
-            throw FinleyAdapterException(temp.str());
-    }
-    return order;
-}
-
-bool MeshAdapter::supportsContactElements() const
-{
-    return true;
-}
-
-escript::Data MeshAdapter::randomFill(const escript::DataTypes::ShapeType& shape,
-       const escript::FunctionSpace& what, long seed,
-       const boost::python::tuple& filter) const
-{
-    escript::Data towipe(0, shape, what, true);
-    // since we just made this object, no sharing is possible and we don't need to check for
-    // exlusive write
-    escript::DataTypes::ValueType& dv=towipe.getExpandedVectorReference();
-    const size_t dvsize=dv.size();
-    esysUtils::randomFillArray(seed, &(dv[0]), dvsize);
-    return towipe;	 
-}
-
-
-void MeshAdapter::addDiracPoints(const vector<double>& points,
-                                 const vector<int>& tags) const
-{
-    // points will be flattened
-    const int dim = getDim();
-    int numPoints=points.size()/dim;
-    int numTags=tags.size();
-    Mesh* mesh=m_finleyMesh.get();
-
-    if ( points.size() % dim != 0 ) {
-        char err[200];
-        unsigned long size = points.size();
-        sprintf(err,"Error - number of coords in diractags is %lu this should be a multiple of the specified dimension:%d.",size,dim);
-        throw FinleyAdapterException(err);
-    }
-
-    if (numPoints != numTags)
-    {
-	   throw FinleyAdapterException("Error - number of diractags must match number of diracpoints.");
-    }
-
-    if (numPoints > 0) {
-        mesh->addPoints(numPoints, &points[0], &tags[0]);
-        checkFinleyError();
-    }
-}
-
-// void MeshAdapter::addDiracPoints(const bp::list& points, const bp::list& tags) const
-// {
-//       const int dim = getDim();
-//       int numPoints=bp::extract<int>(points.attr("__len__")());
-//       int numTags=bp::extract<int>(tags.attr("__len__")());
-//       Mesh* mesh=m_finleyMesh.get();
-//
-//       if  ( (numTags > 0) && ( numPoints !=  numTags ) )
-//       throw FinleyAdapterException("Error - if tags are given number of tags and points must match.");
-//
-//       double* points_ptr=TMPMEMALLOC(numPoints * dim, double);
-//       int*    tags_ptr= TMPMEMALLOC(numPoints, int);
-//
-//       for (int i=0;i<numPoints;++i) {
-//         int tag_id=-1;
-//         int numComps=bp::extract<int>(points[i].attr("__len__")());
-//         if  ( numComps !=   dim ) {
-//                stringstream temp;
-//                temp << "Error - illegal number of components " << numComps << " for point " << i;
-//                throw FinleyAdapterException(temp.str());
-//         }
-//         points_ptr[ i * dim     ] = bp::extract<double>(points[i][0]);
-//         if ( dim > 1 ) points_ptr[ i * dim + 1 ] = bp::extract<double>(points[i][1]);
-//         if ( dim > 2 ) points_ptr[ i * dim + 2 ] = bp::extract<double>(points[i][2]);
-//
-//         if ( numTags > 0) {
-//                bp::extract<string> ex_str(tags[i]);
-//                if  ( ex_str.check() ) {
-//                    tag_id=getTag( ex_str());
-//                } else {
-//                     bp::extract<int> ex_int(tags[i]);
-//                     if ( ex_int.check() ) {
-//                         tag_id=ex_int();
-//                     } else {
-//                          stringstream temp;
-//                          temp << "Error - unable to extract tag for point " << i;
-//                          throw FinleyAdapterException(temp.str());
-//                    }
-//                }
-//         }
-//            tags_ptr[i]=tag_id;
-//       }
-//
-//       Finley_Mesh_addPoints(mesh, numPoints, points_ptr, tags_ptr);
-//       checkPasoError();
-//
-//       TMPMEMFREE(points_ptr);
-//       TMPMEMFREE(tags_ptr);
-// }
-
-/*
-void MeshAdapter::addDiracPoint( const bp::list& point, const int tag) const
-{
-    bp::list points;
-    bp::list tags;
-    points.append(point);
-    tags.append(tag);
-    addDiracPoints(points, tags);
-}
-*/
-
-/*
-void MeshAdapter::addDiracPointWithTagName( const bp::list& point, const string& tag) const
-{
-    bp::list points;
-    bp::list tags;
-    points.append(point);
-    tags.append(tag);
-    addDiracPoints(points, tags);
-}
-*/
-}  // end of namespace
-
diff --git a/finley/src/CPPAdapter/MeshAdapter.h b/finley/src/CPPAdapter/MeshAdapter.h
deleted file mode 100644
index 0120b02..0000000
--- a/finley/src/CPPAdapter/MeshAdapter.h
+++ /dev/null
@@ -1,668 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined finley_MeshAdapter_20040526_H
-#define finley_MeshAdapter_20040526_H
-#include "system_dep.h"
-
-#include "finley/Mesh.h"
-#include "finley/Finley.h"
-#include "finley/Assemble.h"
-#include "FinleyAdapterException.h"
-
-#include <pasowrap/SystemMatrixAdapter.h>
-#include <pasowrap/TransportProblemAdapter.h>
-#include "escript/AbstractContinuousDomain.h"
-#include "escript/FunctionSpace.h"
-#include "escript/FunctionSpaceFactory.h"
-
-#include <boost/shared_ptr.hpp>
-#include <boost/python/dict.hpp>
-#include <boost/python/extract.hpp>
-
-#include <map>
-#include <vector>
-#include <string>
-#include <sstream>
-
-namespace finley {
-  
-// These are friends implemented in MeshAdapterFactory.cpp  
-// They are only fwd declared here so that vis.studio will accept the friend
-// decls
-FINLEY_DLL_API
-escript::Domain_ptr brick(esysUtils::JMPI& p, dim_t n0, dim_t n1, dim_t n2,
-                          int order, double l0, double l1, double l2,
-                          bool periodic0, bool periodic1, bool periodic2,
-                          int integrationOrder, int reducedIntegrationOrder,
-                          bool useElementsOnFace, bool useFullElementOrder,
-                          bool optimize, const std::vector<double>& points,
-                          const std::vector<int>& tags,
-                          const std::map<std::string, int>& tagNamesToNums
-                    );
-
-FINLEY_DLL_API              
-escript::Domain_ptr rectangle(esysUtils::JMPI& p, dim_t n0, dim_t n1,
-                              int order, double l0, double l1,
-                              bool periodic0, bool periodic1,
-                              int integrationOrder, int reducedIntegrationOrder,
-                              bool useElementsOnFace, bool useFullElementOrder,
-                              bool optimize, const std::vector<double>& points,
-                              const std::vector<int>& tags,
-                              const std::map<std::string, int>& tagNamesToNums
-                    );        
-  
-struct null_deleter { void operator()(void const *ptr) const {} };
-
-
-/**
-   \brief implements the AbstractContinuousDomain interface for the Finley
-          library.
-*/
-class FINLEY_DLL_API MeshAdapter : public escript::AbstractContinuousDomain
-{
-public:
-  //
-  // Codes for function space types supported
-  static const int DegreesOfFreedom;
-  static const int ReducedDegreesOfFreedom;
-  static const int Nodes;
-  static const int ReducedNodes;
-  static const int Elements;
-  static const int ReducedElements;
-  static const int FaceElements;
-  static const int ReducedFaceElements;
-  static const int Points;
-  static const int ContactElementsZero;
-  static const int ReducedContactElementsZero;
-  static const int ContactElementsOne;
-  static const int ReducedContactElementsOne;
-
-  /**
-     \brief
-     Constructor for MeshAdapter
-
-     Description:
-     Constructor for MeshAdapter. The pointer passed to MeshAdapter
-     is deleted using a call to Finley_Mesh_free in the
-     MeshAdapter destructor.
-
-     Throws:
-     May throw an exception derived from EsysException
-
-     \param finleyMesh Input - A pointer to the externally constructed 
-                               finley mesh.The pointer passed to MeshAdapter
-                               is deleted using a call to 
-                               Finley_Mesh_free in the MeshAdapter 
-                               destructor.
-  */
-  MeshAdapter(Mesh* finleyMesh=NULL);
-
-  /**
-     \brief
-     Copy constructor.
-  */
-  MeshAdapter(const MeshAdapter& in);
-
-  /**
-     \brief
-     Destructor for MeshAdapter. As specified in the constructor
-     this calls Finley_Mesh_free for the pointer given to the 
-     constructor.
-  */
-  ~MeshAdapter();
-
-  /**
-     \brief
-     return the number of processors used for this domain
-  */
-  virtual int getMPISize() const;
-  /**
-     \brief
-     return the number MPI rank of this processor
-  */
-
-  virtual int getMPIRank() const;
-
-  /**
-     \brief
-     If compiled for MPI then execute an MPI_Barrier, else do nothing
-  */
-
-  virtual void MPIBarrier() const;
-
-  /**
-     \brief
-     Return true if on MPI processor 0, else false
-  */
-
-  virtual bool onMasterProcessor() const;
-
-  MPI_Comm getMPIComm() const;
-
-  /**
-     \brief
-     Write the current mesh to a file with the given name.
-     \param fileName Input - The name of the file to write to.
-  */
-  void write(const std::string& fileName) const;
-
-  /**
-     \brief
-     \param full
-  */
-  void Print_Mesh_Info(bool full=false) const;
-
-  /**
-     \brief
-     dumps the mesh to a file with the given name.
-     \param fileName Input - The name of the file
-  */
-  void dump(const std::string& fileName) const;
-
-  /**
-     \brief
-     return the pointer to the underlying finley mesh structure
-  */
-  Mesh* getFinley_Mesh() const;
-
-   /**
-     \brief
-     Return the tag key for the given sample number.
-     \param functionSpaceType Input - The function space type.
-     \param sampleNo Input - The sample number.
-  */
-  int getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const;
-
-  /**
-     \brief
-     Return the reference number of  the given sample number.
-     \param functionSpaceType Input - The function space type.
-  */
-  const index_t* borrowSampleReferenceIDs(int functionSpaceType) const;
-
-  /**
-     \brief
-     Returns true if the given integer is a valid function space type
-     for this domain.
-  */
-  virtual bool isValidFunctionSpaceType(int functionSpaceType) const;
-
-  /**
-     \brief
-     Return a description for this domain
-  */
-  virtual std::string getDescription() const;
-
-  /**
-     \brief
-     Return a description for the given function space type code
-  */
-  virtual std::string functionSpaceTypeAsString(int functionSpaceType) const;
-
-  /**
-     \brief
-     Build the table of function space type names
-  */
-  void setFunctionSpaceTypeNames();
-
-  /**
-     \brief
-     Return a continuous FunctionSpace code
-  */
-  virtual int getContinuousFunctionCode() const;
-
-  /**
-     \brief
-     Return a continuous on reduced order nodes FunctionSpace code
-  */
-  virtual int getReducedContinuousFunctionCode() const;
-
-  /**
-     \brief
-     Return a function FunctionSpace code
-  */
-  virtual int getFunctionCode() const;
-
-  /**
-     \brief
-     Return a function with reduced integration order FunctionSpace code
-  */
-  virtual int getReducedFunctionCode() const;
-
-  /**
-     \brief
-     Return a function on boundary FunctionSpace code
-  */
-  virtual int getFunctionOnBoundaryCode() const;
-
-  /**
-     \brief
-     Return a function on boundary with reduced integration order FunctionSpace code
-  */
-  virtual int getReducedFunctionOnBoundaryCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactZero code
-  */
-  virtual int getFunctionOnContactZeroCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactZero code  with reduced integration order
-  */
-  virtual int getReducedFunctionOnContactZeroCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactOne code
-  */
-  virtual int getFunctionOnContactOneCode() const;
-
-  /**
-     \brief
-     Return a FunctionOnContactOne code  with reduced integration order
-  */
-  virtual int getReducedFunctionOnContactOneCode() const;
-
-  /**
-     \brief
-     Return a Solution code
-  */
-  virtual int getSolutionCode() const;
-
-  /**
-     \brief
-     Return a ReducedSolution code
-  */
-  virtual int getReducedSolutionCode() const;
-
-  /**
-     \brief
-     Return a DiracDeltaFunctions code
-  */
-  virtual int getDiracDeltaFunctionsCode() const;
-
-  /**
-     \brief
-  */
-  typedef std::map<int, std::string> FunctionSpaceNamesMapType;
-
-  /**
-     \brief
-  */
-  virtual int getDim() const;
-
-  /**
-     \brief
-      Returns a status indicator of the domain. The status identifier should be unique over 
-      the live time if the object but may be updated if changes to the domain happen, e.g. 
-      modifications to its geometry. 
-
-     This has to be implemented by the actual Domain adapter.
-  */
-  virtual StatusType getStatus() const;
-
-
-  /**
-     \brief
-     Return the number of data points summed across all MPI processes
-  */
-  virtual dim_t getNumDataPointsGlobal() const;
-
-  /**
-     \brief
-     Return the number of data points per sample, and the number of samples as a pair.
-     \param functionSpaceCode Input -
-  */
-  virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
-
-  /**
-     \brief
-     copies the location of data points into arg. The domain of arg has to match this.
-     has to be implemented by the actual Domain adapter.
-  */
-  virtual void setToX(escript::Data& arg) const;
-
-  /**
-     \brief
-     sets a map from a clear tag name to a tag key
-     \param name Input - tag name.
-     \param tag Input - tag key.
-  */
-  virtual void setTagMap(const std::string& name,  int tag);
-
-  /**
-     \brief
-     Return the tag key for tag name.
-     \param name Input - tag name
-  */
-  virtual int getTag(const std::string& name) const;
-
-  /**
-     \brief
-     Returns true if name is a defined tage name. 
-     \param name Input - tag name to be checked.
-  */
-  virtual bool isValidTagName(const std::string& name) const;
-
-  /**
-     \brief
-     Returns all tag names in a single string sperated by commas
-  */
-  virtual std::string showTagNames() const;
-
-  /**
-     \brief
-     assigns new location to the domain
-  */
-  virtual void setNewX(const escript::Data& arg);
-
-  /**
-     \brief
-     interpolates data given on source onto target where source and target have to be given on the same domain.
-  */
-  virtual void interpolateOnDomain(escript::Data& target, const escript::Data& source) const;
-
-  virtual bool probeInterpolationOnDomain(int functionSpaceType_source, int functionSpaceType_target) const;
-  
-  virtual signed char preferredInterpolationOnDomain(int functionSpaceType_source, int functionSpaceType_target) const;
-  
-  
-  /**
-    \brief given a vector of FunctionSpace typecodes, pass back a code which then can all be interpolated to.
-    \return true is result is valid, false if not
-  */
-  bool commonFunctionSpace(const std::vector<int>& fs, int& resultcode) const;
-
-  /**
-     \brief
-     interpolates data given on source onto target where source and target are given on different domains.
-     has to be implemented by the actual Domain adapter.
-  */
-  virtual void interpolateAcross(escript::Data& target, const escript::Data& source) const;
-
-  /**
-  \brief determines whether interpolation from source to target is possible.
-  Must be implemented by the actual Domain adapter
-  */
-  virtual bool probeInterpolationAcross(int functionSpaceType_source, const escript::AbstractDomain& targetDomain, int functionSpaceType_target) const;
-
-  /**
-     \brief
-     copies the surface normals at data points into out. The actual function space to be considered
-     is defined by out. out has to be defined on this.
-  */
-  virtual void setToNormal(escript::Data& out) const;
-
-  /**
-     \brief
-     copies the size of samples into out. The actual function space to be considered
-     is defined by out. out has to be defined on this.
-  */
-  virtual void setToSize(escript::Data& out) const;
-
-  /**
-     \brief
-     copies the gradient of arg into grad. The actual function space to be considered
-     for the gradient is defined by grad. arg and grad have to be defined on this.
-  */
-  virtual void setToGradient(escript::Data& grad, const escript::Data& arg) const;
-
-  /**
-     \brief
-     copies the integrals of the function defined by arg into integrals.
-     arg has to be defined on this.
-  */
-  virtual void setToIntegrals(std::vector<double>& integrals, const escript::Data& arg) const;
-
-  /**
-     \brief
-     return the identifier of the matrix type to be used for the global stiffness matrix when a particular solver, package, perconditioner,
-     and symmetric matrix is used.
-     \param options a python object containing the solver, package,
-            preconditioner and symmetry
-  */
-  virtual int getSystemMatrixTypeId(const boost::python::object& options) const;
-
-  /**
-     \brief
-     return the identifier of the transport problem type to be used when a particular solver, perconditioner, package
-     and symmetric matrix is used.
-     \param solver 
-     \param preconditioner
-     \param package
-     \param symmetry 
-  */
-  virtual int getTransportTypeId(const int solver, const int preconditioner, const int package, const bool symmetry) const;
-
-  /**
-     \brief
-     returns true if data on this domain and a function space of type functionSpaceCode has to 
-     considered as cell centered data.
-  */
-  virtual bool isCellOriented(int functionSpaceCode) const;
-
-
-  virtual bool ownSample(int fsCode, index_t id) const;
-
-  /**
-     \brief
-     returns the function space representation of the type functionSpaceCode on this domain
-     as a vtkObject.
-  */
-  // vtkObject createVtkObject(int functionSpaceCode) const;
-
-  /**
-     \brief
-     adds a PDE onto the stiffness matrix mat and a rhs 
-  */
-  virtual void addPDEToSystem(
-                     escript::AbstractSystemMatrix& mat, escript::Data& rhs,
-                     const escript::Data& A, const escript::Data& B, const escript::Data& C, 
-                     const escript::Data& D, const escript::Data& X, const escript::Data& Y,
-                     const escript::Data& d, const escript::Data& y,
-                     const escript::Data& d_contact, const escript::Data& y_contact,
-                     const escript::Data& d_dirac, const escript::Data& y_dirac) const;
-  /**
-     \brief
-     adds a PDE onto the lumped stiffness matrix matrix
-  */
-  virtual void addPDEToLumpedSystem(
-                     escript::Data& mat,
-                     const escript::Data& D, 
-                     const escript::Data& d,
-                     const escript::Data& d_dirac,
-                     const bool useHRZ) const;
-
-  /**
-     \brief
-     adds a PDE onto the stiffness matrix mat and a rhs 
-  */
-  virtual void addPDEToRHS(escript::Data& rhs,
-                     const escript::Data& X, const escript::Data& Y,
-                     const escript::Data& y, const escript::Data& y_contact, const escript::Data& y_dirac) const;
-
-  /**
-     \brief
-     adds a PDE onto a transport problem
-  */
-  virtual void addPDEToTransportProblem(
-                     escript::AbstractTransportProblem& tp, escript::Data& source, 
-                     const escript::Data& M,
-                     const escript::Data& A, const escript::Data& B, const escript::Data& C,const  escript::Data& D,
-                     const  escript::Data& X,const  escript::Data& Y,
-                     const escript::Data& d, const escript::Data& y,
-                     const escript::Data& d_contact,const escript::Data& y_contact, const escript::Data& d_dirac,const escript::Data& y_dirac) const;
-
-  /**
-     \brief
-    creates a SystemMatrixAdapter stiffness matrix and initializes it with zeros:
-  */
-  escript::ASM_ptr newSystemMatrix(
-                      const int row_blocksize,
-                      const escript::FunctionSpace& row_functionspace,
-                      const int column_blocksize,
-                      const escript::FunctionSpace& column_functionspace,
-                      const int type) const;
-
-  /**
-   \brief 
-    creates a TransportProblemAdapter 
-
-  */
-  escript::ATP_ptr newTransportProblem(
-                      const int blocksize,
-                      const escript::FunctionSpace& functionspace,
-                      const int type) const;
-
-  /**
-     \brief returns locations in the FEM nodes
-  */
-  virtual escript::Data getX() const;
-
-  /**
-     \brief return boundary normals at the quadrature point on the face elements
-  */
-  virtual escript::Data getNormal() const;
-
-  /**
-     \brief returns the element size
-  */
-  virtual escript::Data getSize() const;
-
-  /**
-     \brief comparison operators
-  */
-  virtual bool operator==(const escript::AbstractDomain& other) const;
-  virtual bool operator!=(const escript::AbstractDomain& other) const;
-
-  /**
-     \brief assigns new tag newTag to all samples of functionspace with a positive
-     value of mask for any its sample point.
-
-  */
-  virtual void setTags(const int functionSpaceType, const int newTag, const escript::Data& mask) const;
-
-  /**
-      \brief
-          return the number of tags in use and a pointer to an array with the number of tags in use
-  */
-  virtual int getNumberOfTagsInUse(int functionSpaceCode) const;
-
-  virtual const int* borrowListOfTagsInUse(int functionSpaceCode) const;
-
-  /**
-     \brief Checks if this domain allows tags for the specified functionSpaceCode.
-  */
-  virtual
-  bool canTag(int functionSpaceCode) const;
-
-   /**
-   \brief returns the approximation order used for a function space functionSpaceCode
-   */
-
-  virtual 
-  int getApproximationOrder(const int functionSpaceCode) const;
-
-  bool supportsContactElements() const;
-
-  virtual escript::Data randomFill(const escript::DataTypes::ShapeType& shape,
-       const escript::FunctionSpace& what, long seed, const boost::python::tuple& filter) const;       
-
-
-private:
-  
-  /**
-   \brief  adds points to support more Dirac delta function.
-   
-   Do NOT call these at any time other than construction!
-   Using them later creates consistency problems
-   */
-  void addDiracPoints(const std::vector<double>& points, const std::vector<int>& tags) const;
-//  void addDiracPoint( const boost::python::list& points, const int tag=-1) const;
-//   void addDiracPointWithTagName( const boost::python::list& points, const std::string& tag) const;
-
-  //
-  // pointer to the externally created finley mesh
-  boost::shared_ptr<Mesh> m_finleyMesh;
-  
-  // This is only provided so that the friends below can add tags during construction
-  // do not use for any other purpose
-  boost::shared_ptr<Mesh> getMesh()
-  {
-      return m_finleyMesh;
-  }
- 
-  static FunctionSpaceNamesMapType m_functionSpaceTypeNames;
-
-  friend escript::Domain_ptr brick(esysUtils::JMPI& p,
-                    dim_t n0, dim_t n1, dim_t n2, int order,
-                    double l0, double l1, double l2,
-                    bool periodic0, bool periodic1, bool periodic2,
-                    int integrationOrder,
-                    int reducedIntegrationOrder,
-                    bool useElementsOnFace,
-                    bool useFullElementOrder,
-                    bool optimize, 
-                    const std::vector<double>& points,
-                    const std::vector<int>& tags,
-                    const std::map<std::string, int>& tagNamesToNums);
-                    
-                    
-  friend escript::Domain_ptr rectangle(esysUtils::JMPI& p,
-                        dim_t n0, dim_t n1, int order,
-                        double l0, double l1,
-                        bool periodic0, bool periodic1,
-                        int integrationOrder,
-                        int reducedIntegrationOrder,
-                        bool useElementsOnFace,
-                        bool useFullElementOrder,
-                        bool optimize,
-                        const std::vector<double>& points,
-                        const std::vector<int>& tags,
-                        const std::map<std::string, int>& tagNamesToNums); 
-
-   friend escript::Domain_ptr readMesh_driver(const boost::python::list& args);
-
-   friend escript::Domain_ptr readMesh(esysUtils::JMPI& p,
-                                     const std::string& fileName,
-                                     int integrationOrder,
-                                     int reducedIntegrationOrder,
-                                     bool optimize,
-                                     const std::vector<double>& points,
-                                     const std::vector<int>& tags);
-
-  friend escript::Domain_ptr readGmsh_driver(const boost::python::list& args);
-
-  friend escript::Domain_ptr readGmsh(esysUtils::JMPI& p,
-                               const std::string& fileName,
-                               int numDim, 
-                               int integrationOrder,
-                               int reducedIntegrationOrder, 
-                               bool optimize,
-                               bool useMacroElements,
-                               const std::vector<double>& points,
-                               const std::vector<int>& tags);
-};
-
-
-} // end of namespace
-
-#endif
-
diff --git a/finley/src/CPPAdapter/MeshAdapterFactory.cpp b/finley/src/CPPAdapter/MeshAdapterFactory.cpp
deleted file mode 100644
index d01769d..0000000
--- a/finley/src/CPPAdapter/MeshAdapterFactory.cpp
+++ /dev/null
@@ -1,986 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-
-#include "MeshAdapterFactory.h"
-#include <esysUtils/blocktimer.h>
-#include <esysUtils/Esys_MPI.h>
-
-#ifdef USE_NETCDF
-#include <netcdfcpp.h>
-#endif
-
-#include <boost/python/extract.hpp>
-#include <boost/scoped_array.hpp>
-
-#include <sstream>
-
-using namespace std;
-using namespace escript;
-
-namespace finley {
-
-#ifdef USE_NETCDF
-// A convenience method to retrieve an integer attribute from a NetCDF file
-template<typename T>
-T ncReadAtt(NcFile *dataFile, const string &fName, const string& attrName)
-{
-    NcAtt *attr = dataFile->get_att(attrName.c_str());
-    if (!attr) {
-        stringstream msg;
-        msg << "loadMesh: Error retrieving integer attribute '" << attrName
-            << "' from NetCDF file '" << fName << "'";
-        throw FinleyAdapterException(msg.str());
-    }
-    T value = (sizeof(T) > 4 ? attr->as_long(0) : attr->as_int(0));
-    delete attr;
-    return value;
-}
-#endif
-
-inline void cleanupAndThrow(Mesh* mesh, string msg)
-{
-    delete mesh;
-    string msgPrefix("loadMesh: NetCDF operation failed - ");
-    throw FinleyAdapterException(msgPrefix+msg);
-}
-
-Domain_ptr loadMesh(const std::string& fileName)
-{
-#ifdef USE_NETCDF
-    esysUtils::JMPI mpi_info = esysUtils::makeInfo( MPI_COMM_WORLD );
-
-    const string fName(esysUtils::appendRankToFileName(fileName,
-                        mpi_info->size, mpi_info->rank));
-
-    double blocktimer_start = blocktimer_time();
-    resetError();
-
-    // Open NetCDF file for reading
-    NcAtt *attr;
-    NcVar *nc_var_temp;
-    // netCDF error handler
-    NcError err(NcError::silent_nonfatal);
-    // Create the NetCDF file.
-    NcFile dataFile(fName.c_str(), NcFile::ReadOnly);
-    if (!dataFile.is_valid()) {
-        stringstream msg;
-        msg << "loadMesh: Opening NetCDF file '" << fName << "' for reading failed.";
-        throw FinleyAdapterException(msg.str());
-    }
-
-    // Read NetCDF integer attributes
-
-    // index_size was only introduced with 64-bit index support so fall back
-    // to 32 bits if not found.
-    int index_size;
-    try {
-        index_size = ncReadAtt<int>(&dataFile, fName, "index_size");
-    } catch (FinleyAdapterException& e) {
-        index_size = 4;
-    }
-    // technically we could cast if reading 32-bit data on 64-bit escript
-    // but cost-benefit analysis clearly favours this implementation for now
-    if (sizeof(index_t) != index_size) {
-        throw FinleyAdapterException("loadMesh: size of index types at runtime differ from dump file");
-    }
-
-    int mpi_size = ncReadAtt<int>(&dataFile, fName, "mpi_size");
-    int mpi_rank = ncReadAtt<int>(&dataFile, fName, "mpi_rank");
-    int numDim = ncReadAtt<int>(&dataFile, fName, "numDim");
-    int order = ncReadAtt<int>(&dataFile, fName, "order");
-    int reduced_order = ncReadAtt<int>(&dataFile, fName, "reduced_order");
-    dim_t numNodes = ncReadAtt<dim_t>(&dataFile, fName, "numNodes");
-    dim_t num_Elements = ncReadAtt<dim_t>(&dataFile, fName, "num_Elements");
-    dim_t num_FaceElements = ncReadAtt<dim_t>(&dataFile, fName, "num_FaceElements");
-    dim_t num_ContactElements = ncReadAtt<dim_t>(&dataFile, fName, "num_ContactElements");
-    dim_t num_Points = ncReadAtt<dim_t>(&dataFile, fName, "num_Points");
-    int num_Elements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_Elements_numNodes");
-    int Elements_TypeId = ncReadAtt<int>(&dataFile, fName, "Elements_TypeId");
-    int num_FaceElements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_FaceElements_numNodes");
-    int FaceElements_TypeId = ncReadAtt<int>(&dataFile, fName, "FaceElements_TypeId");
-    int num_ContactElements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_ContactElements_numNodes");
-    int ContactElements_TypeId = ncReadAtt<int>(&dataFile, fName, "ContactElements_TypeId");
-    int Points_TypeId = ncReadAtt<int>(&dataFile, fName, "Points_TypeId");
-    int num_Tags = ncReadAtt<int>(&dataFile, fName, "num_Tags");
-
-    // Verify size and rank
-    if (mpi_info->size != mpi_size) {
-        stringstream msg;
-        msg << "loadMesh: The NetCDF file '" << fName
-            << "' can only be read on " << mpi_size
-            << " CPUs. Currently running: " << mpi_info->size;
-        throw FinleyAdapterException(msg.str());
-    }
-    if (mpi_info->rank != mpi_rank) {
-        stringstream msg;
-        msg << "loadMesh: The NetCDF file '" << fName
-            << "' should be ready on CPU #" << mpi_rank
-            << " and NOT on #" << mpi_info->rank;
-        throw FinleyAdapterException(msg.str());
-    }
-
-    // Read mesh name
-    if (! (attr=dataFile.get_att("Name")) ) {
-        stringstream msg;
-        msg << "loadMesh: Error retrieving mesh name from NetCDF file '"
-            << fName << "'";
-        throw FinleyAdapterException(msg.str());
-    }
-    boost::scoped_array<char> name(attr->as_string(0));
-    delete attr;
-
-    // allocate mesh
-    Mesh *mesh_p = new Mesh(name.get(), numDim, mpi_info);
-    if (noError()) {
-        // read nodes
-        mesh_p->Nodes->allocTable(numNodes);
-        // Nodes_Id
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_Id")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Id)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->Id[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Id)");
-        // Nodes_Tag
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_Tag")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Tag)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->Tag[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Tag)");
-        // Nodes_gDOF
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_gDOF")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_gDOF)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalDegreesOfFreedom[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_gDOF)");
-        // Nodes_gNI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_gNI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_gNI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalNodesIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_gNI)");
-        // Nodes_grDfI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_grDfI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_grDfI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalReducedDOFIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_grDfI)");
-        // Nodes_grNI
-        if (! ( nc_var_temp = dataFile.get_var("Nodes_grNI")) )
-            cleanupAndThrow(mesh_p, "get_var(Nodes_grNI)");
-        if (! nc_var_temp->get(&mesh_p->Nodes->globalReducedNodesIndex[0], numNodes) )
-            cleanupAndThrow(mesh_p, "get(Nodes_grNI)");
-        // Nodes_Coordinates
-        if (!(nc_var_temp = dataFile.get_var("Nodes_Coordinates")))
-            cleanupAndThrow(mesh_p, "get_var(Nodes_Coordinates)");
-        if (! nc_var_temp->get(&(mesh_p->Nodes->Coordinates[0]), numNodes, numDim) )
-            cleanupAndThrow(mesh_p, "get(Nodes_Coordinates)");
-        mesh_p->Nodes->updateTagList();
-
-        // read elements
-        if (noError()) {
-            const_ReferenceElementSet_ptr refElements(new ReferenceElementSet(
-                        (ElementTypeId)Elements_TypeId, order, reduced_order));
-            if (noError())  {
-                mesh_p->Elements=new ElementFile(refElements, mpi_info);
-            }
-            if (noError())
-                mesh_p->Elements->allocTable(num_Elements);
-            if (noError()) {
-                mesh_p->Elements->minColor=0;
-                mesh_p->Elements->maxColor=num_Elements-1;
-                if (num_Elements>0) {
-                   // Elements_Id
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Id")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Id)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Id[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Id)");
-                   // Elements_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Tag")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Tag[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Tag)");
-                   // Elements_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Owner")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Owner)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Owner[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Owner)");
-                   // Elements_Color
-                   if (! ( nc_var_temp = dataFile.get_var("Elements_Color")) )
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Color)");
-                   if (! nc_var_temp->get(&mesh_p->Elements->Color[0], num_Elements) )
-                       cleanupAndThrow(mesh_p, "get(Elements_Color)");
-                   // Now we need to adjust maxColor
-                   index_t mc=mesh_p->Elements->Color[0];
-                   for (index_t i=1;i<num_Elements;++i) {
-                       if (mc<mesh_p->Elements->Color[i]) {
-                           mc = mesh_p->Elements->Color[i];
-                       }
-                   }
-                   mesh_p->Elements->maxColor=mc;
-                   // Elements_Nodes
-                   int *Elements_Nodes = new int[num_Elements*num_Elements_numNodes];
-                   if (!(nc_var_temp = dataFile.get_var("Elements_Nodes"))) {
-                       delete[] Elements_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(Elements_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(Elements_Nodes[0]), num_Elements, num_Elements_numNodes) ) {
-                       delete[] Elements_Nodes;
-                       cleanupAndThrow(mesh_p, "get(Elements_Nodes)");
-                   }
-
-                   // Copy temp array into mesh_p->Elements->Nodes
-                   for (int i=0; i<num_Elements; i++) {
-                       for (int j=0; j<num_Elements_numNodes; j++) {
-                           mesh_p->Elements->Nodes[INDEX2(j,i,num_Elements_numNodes)]
-                                = Elements_Nodes[INDEX2(j,i,num_Elements_numNodes)];
-                       }
-                   }
-                   delete[] Elements_Nodes;
-                } /* num_Elements>0 */
-                mesh_p->Elements->updateTagList();
-            }
-        }
-
-        /* get the face elements */
-        if (noError()) {
-            const_ReferenceElementSet_ptr refFaceElements(
-                    new ReferenceElementSet((ElementTypeId)FaceElements_TypeId,
-                        order, reduced_order));
-            if (noError())  {
-                mesh_p->FaceElements=new ElementFile(refFaceElements, mpi_info);
-            }
-            if (noError())
-                mesh_p->FaceElements->allocTable(num_FaceElements);
-            if (noError()) {
-                mesh_p->FaceElements->minColor=0;
-                mesh_p->FaceElements->maxColor=num_FaceElements-1;
-                if (num_FaceElements>0) {
-                   // FaceElements_Id
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Id")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Id)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Id[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Id)");
-                   // FaceElements_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Tag")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Tag[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Tag)");
-                   // FaceElements_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Owner")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Owner)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Owner[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Owner)");
-                   // FaceElements_Color
-                   if (! ( nc_var_temp = dataFile.get_var("FaceElements_Color")) )
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Color)");
-                   if (! nc_var_temp->get(&mesh_p->FaceElements->Color[0], num_FaceElements) )
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Color)");
-                   // Now we need to adjust maxColor
-                   index_t mc=mesh_p->FaceElements->Color[0];
-                   for (index_t i=1;i<num_FaceElements;++i) {
-                       if (mc<mesh_p->FaceElements->Color[i]) {
-                           mc = mesh_p->FaceElements->Color[i];
-                       }
-                   }
-                   mesh_p->FaceElements->maxColor=mc;
-                   // FaceElements_Nodes
-                   int *FaceElements_Nodes = new int[num_FaceElements*num_FaceElements_numNodes];
-                   if (!(nc_var_temp = dataFile.get_var("FaceElements_Nodes"))) {
-                       delete[] FaceElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(FaceElements_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(FaceElements_Nodes[0]), num_FaceElements, num_FaceElements_numNodes) ) {
-                       delete[] FaceElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get(FaceElements_Nodes)");
-                   }
-                   // Copy temp array into mesh_p->FaceElements->Nodes
-                   for (int i=0; i<num_FaceElements; i++) {
-                       for (int j=0; j<num_FaceElements_numNodes; j++) {
-                           mesh_p->FaceElements->Nodes[INDEX2(j,i,num_FaceElements_numNodes)] = FaceElements_Nodes[INDEX2(j,i,num_FaceElements_numNodes)];
-                       }
-                   }
-                   delete[] FaceElements_Nodes;
-                } /* num_FaceElements>0 */
-                mesh_p->FaceElements->updateTagList();
-            }
-        }
-
-        /* get the Contact elements */
-        if (noError()) {
-            const_ReferenceElementSet_ptr refContactElements(
-                 new ReferenceElementSet((ElementTypeId)ContactElements_TypeId,
-                     order, reduced_order));
-            if (noError()) {
-                mesh_p->ContactElements=new ElementFile(refContactElements, mpi_info);
-            }
-            if (noError())
-                mesh_p->ContactElements->allocTable(num_ContactElements);
-            if (noError()) {
-                mesh_p->ContactElements->minColor=0;
-                mesh_p->ContactElements->maxColor=num_ContactElements-1;
-                if (num_ContactElements>0) {
-                   // ContactElements_Id
-                   if (! ( nc_var_temp = dataFile.get_var("ContactElements_Id")) )
-                       cleanupAndThrow(mesh_p, "get_var(ContactElements_Id)");
-                   if (! nc_var_temp->get(&mesh_p->ContactElements->Id[0], num_ContactElements) )
-                       cleanupAndThrow(mesh_p, "get(ContactElements_Id)");
-                   // ContactElements_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("ContactElements_Tag")) )
-                       cleanupAndThrow(mesh_p, "get_var(ContactElements_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->ContactElements->Tag[0], num_ContactElements) )
-                       cleanupAndThrow(mesh_p, "get(ContactElements_Tag)");
-                   // ContactElements_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("ContactElements_Owner")) )
-                       cleanupAndThrow(mesh_p, "get_var(ContactElements_Owner)");
-                   if (! nc_var_temp->get(&mesh_p->ContactElements->Owner[0], num_ContactElements) )
-                       cleanupAndThrow(mesh_p, "get(ContactElements_Owner)");
-                   // ContactElements_Color
-                   if (! ( nc_var_temp = dataFile.get_var("ContactElements_Color")) )
-                       cleanupAndThrow(mesh_p, "get_var(ContactElements_Color)");
-                   if (! nc_var_temp->get(&mesh_p->ContactElements->Color[0], num_ContactElements) )
-                       cleanupAndThrow(mesh_p, "get(ContactElements_Color)");
-                   // Now we need to adjust maxColor
-                   index_t mc=mesh_p->ContactElements->Color[0];
-                   for (index_t i=1;i<num_ContactElements;++i) {
-                       if (mc<mesh_p->ContactElements->Color[i]) {
-                           mc = mesh_p->ContactElements->Color[i];
-                       }
-                   }
-                   mesh_p->ContactElements->maxColor=mc;
-                   // ContactElements_Nodes
-                   int *ContactElements_Nodes = new int[num_ContactElements*num_ContactElements_numNodes];
-                   if (!(nc_var_temp = dataFile.get_var("ContactElements_Nodes"))) {
-                       delete[] ContactElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(ContactElements_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(ContactElements_Nodes[0]), num_ContactElements, num_ContactElements_numNodes) ) {
-                       delete[] ContactElements_Nodes;
-                       cleanupAndThrow(mesh_p, "get(ContactElements_Nodes)");
-                   }
-                   // Copy temp array into mesh_p->ContactElements->Nodes
-                   for (int i=0; i<num_ContactElements; i++) {
-                       for (int j=0; j<num_ContactElements_numNodes; j++) {
-                           mesh_p->ContactElements->Nodes[INDEX2(j,i,num_ContactElements_numNodes)]= ContactElements_Nodes[INDEX2(j,i,num_ContactElements_numNodes)];
-                       }
-                   }
-                   delete[] ContactElements_Nodes;
-               } /* num_ContactElements>0 */
-               mesh_p->ContactElements->updateTagList();
-           }
-        }
-
-        // get the Points (nodal elements)
-        if (noError()) {
-            const_ReferenceElementSet_ptr refPoints(new ReferenceElementSet(
-                        (ElementTypeId)Points_TypeId, order, reduced_order));
-            if (noError())  {
-                mesh_p->Points=new ElementFile(refPoints, mpi_info);
-            }
-            if (noError())
-                mesh_p->Points->allocTable(num_Points);
-            if (noError()) {
-                mesh_p->Points->minColor=0;
-                mesh_p->Points->maxColor=num_Points-1;
-                if (num_Points>0) {
-                   // Points_Id
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Id")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Id)");
-                   if (! nc_var_temp->get(&mesh_p->Points->Id[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Id)");
-                   // Points_Tag
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Tag")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Tag)");
-                   if (! nc_var_temp->get(&mesh_p->Points->Tag[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Tag)");
-                   // Points_Owner
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Owner")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Owner)");
-                   if (!nc_var_temp->get(&mesh_p->Points->Owner[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Owner)");
-                   // Points_Color
-                   if (! ( nc_var_temp = dataFile.get_var("Points_Color")))
-                       cleanupAndThrow(mesh_p, "get_var(Points_Color)");
-                   if (!nc_var_temp->get(&mesh_p->Points->Color[0], num_Points))
-                       cleanupAndThrow(mesh_p, "get(Points_Color)");
-                   // Now we need to adjust maxColor
-                   index_t mc=mesh_p->Points->Color[0];
-                   for (index_t i=1;i<num_Points;++i) {
-                       if (mc<mesh_p->Points->Color[i]) {
-                           mc = mesh_p->Points->Color[i];
-                       }
-                   }
-                   mesh_p->Points->maxColor=mc;
-                   // Points_Nodes
-                   int *Points_Nodes = new int[num_Points];
-                   if (!(nc_var_temp = dataFile.get_var("Points_Nodes"))) {
-                       delete[] Points_Nodes;
-                       cleanupAndThrow(mesh_p, "get_var(Points_Nodes)");
-                   }
-                   if (! nc_var_temp->get(&(Points_Nodes[0]), num_Points) ) {
-                       delete[] Points_Nodes;
-                       cleanupAndThrow(mesh_p, "get(Points_Nodes)");
-                   }
-                   // Copy temp array into mesh_p->Points->Nodes
-                   for (int i=0; i<num_Points; i++) {
-                       mesh_p->Points->Id[mesh_p->Points->Nodes[INDEX2(0,i,1)]] = Points_Nodes[i];
-                   }
-                   delete[] Points_Nodes;
-                } /* num_Points>0 */
-                mesh_p->Points->updateTagList();
-            }
-        }
-
-        // get the tags
-        if (noError()) {
-          if (num_Tags > 0) {
-            // Temp storage to gather node IDs
-            int *Tags_keys = new int[num_Tags];
-            char name_temp[4096];
-            int i;
-
-            // Tags_keys
-            if (! ( nc_var_temp = dataFile.get_var("Tags_keys")) ) {
-                delete[] Tags_keys;
-                cleanupAndThrow(mesh_p, "get_var(Tags_keys)");
-            }
-            if (! nc_var_temp->get(&Tags_keys[0], num_Tags) ) {
-                delete[] Tags_keys;
-                cleanupAndThrow(mesh_p, "get(Tags_keys)");
-            }
-            for (i=0; i<num_Tags; i++) {
-              // Retrieve tag name
-              sprintf(name_temp, "Tags_name_%d", i);
-              if (! (attr=dataFile.get_att(name_temp)) ) {
-                  delete[] Tags_keys;
-                  stringstream msg;
-                  msg << "get_att(" << name_temp << ")";
-                  cleanupAndThrow(mesh_p, msg.str());
-              }
-              boost::scoped_array<char> name(attr->as_string(0));
-              delete attr;
-              mesh_p->addTagMap(name.get(), Tags_keys[i]);
-            }
-            delete[] Tags_keys;
-          }
-        }
-   
-        if (noError()) {
-            // Nodes_DofDistribution
-            std::vector<index_t> first_DofComponent(mpi_size+1);
-            if (! (nc_var_temp = dataFile.get_var("Nodes_DofDistribution")) ) {
-                cleanupAndThrow(mesh_p, "get_var(Nodes_DofDistribution)");
-            }
-            if (!nc_var_temp->get(&first_DofComponent[0], mpi_size+1)) {
-                cleanupAndThrow(mesh_p, "get(Nodes_DofDistribution)");
-            }
-
-            // Nodes_NodeDistribution
-            std::vector<index_t> first_NodeComponent(mpi_size+1);
-            if (! (nc_var_temp = dataFile.get_var("Nodes_NodeDistribution")) ) {
-                cleanupAndThrow(mesh_p, "get_var(Nodes_NodeDistribution)");
-            }
-            if (!nc_var_temp->get(&first_NodeComponent[0], mpi_size+1)) {
-                cleanupAndThrow(mesh_p, "get(Nodes_NodeDistribution)");
-            }
-            mesh_p->createMappings(first_DofComponent, first_NodeComponent);
-        }
-
-    } /* noError() after new Mesh() */
-
-    checkFinleyError();
-    
-    MeshAdapter* ma=new MeshAdapter(mesh_p);
-    Domain_ptr dom(ma);
-
-    blocktimer_increment("LoadMesh()", blocktimer_start);
-    return dom;
-#else
-    throw FinleyAdapterException("loadMesh: not compiled with NetCDF. Please contact your installation manager.");
-#endif /* USE_NETCDF */
-}
-
-Domain_ptr readMesh(esysUtils::JMPI& info, const std::string& fileName,
-                    int integrationOrder, int reducedIntegrationOrder,
-                    bool optimize, const std::vector<double>& points,
-                    const std::vector<int>& tags)
-{
-    if (fileName.size() == 0 )
-        throw FinleyAdapterException("Null file name!");
-
-    double blocktimer_start = blocktimer_time();
-    Mesh* fMesh=Mesh::read(info, fileName, integrationOrder, reducedIntegrationOrder, optimize);
-    checkFinleyError();
-    MeshAdapter* ma=new MeshAdapter(fMesh);
-    ma->addDiracPoints(points, tags);    
-    blocktimer_increment("ReadMesh()", blocktimer_start);
-    return Domain_ptr(ma);
-}
-
-  
-Domain_ptr readMesh_driver(const boost::python::list& args)
-{
-    using boost::python::extract;
-    int l=len(args);
-    if (l<7) {
-        throw FinleyAdapterException("Insufficient arguments to readMesh_driver");
-    }
-    string fileName=extract<string>(args[0])();
-    int integrationOrder=extract<int>(args[1])();
-    int reducedIntegrationOrder=extract<int>(args[2])();
-    bool optimize=extract<bool>(args[3])();
-    vector<double> points;
-    vector<int> tags;
-
-    // we need to convert lists to stl vectors
-    boost::python::list pypoints=extract<boost::python::list>(args[4]);
-    boost::python::list pytags=extract<boost::python::list>(args[5]);
-    int numpts=extract<int>(pypoints.attr("__len__")());
-    int numtags=extract<int>(pytags.attr("__len__")());
-
-    boost::python::object pworld=args[6];
-    esysUtils::JMPI info;
-    if (!pworld.is_none()) {
-        extract<SubWorld_ptr> ex(pworld);
-        if (!ex.check()) {
-            throw FinleyAdapterException("Invalid escriptWorld parameter.");
-        }
-        info=ex()->getMPI();
-    } else {
-        info=esysUtils::makeInfo(MPI_COMM_WORLD);
-    }
-    Domain_ptr result=readMesh(info, fileName, integrationOrder,
-                               reducedIntegrationOrder, optimize, points, tags);
-
-    for (int i=0; i<numpts; ++i) {
-        boost::python::object temp=pypoints[i];
-        int l=extract<int>(temp.attr("__len__")());
-        for (int k=0;k<l;++k) {
-              points.push_back(extract<double>(temp[k]));
-        }
-    }
-    // bricks use up to 200 but the existing tag check will find that
-    int curmax=40;
-    TagMap& tagmap=dynamic_cast<MeshAdapter*>(result.get())->getMesh()->tagMap;
-    // first we work out what tags are already in use
-    for (TagMap::iterator it=tagmap.begin(); it!=tagmap.end(); ++it) {
-        if (it->second>curmax) {
-            curmax=it->second+1;
-        }
-    }
-
-    tags.resize(numtags, -1);
-    for (int i=0;i<numtags;++i) {
-        extract<int> ex_int(pytags[i]);
-        extract<string> ex_str(pytags[i]);
-        if (ex_int.check()) {
-            tags[i]=ex_int();
-            if (tags[i]>= curmax) {
-                curmax=tags[i]+1;
-            }
-        } else if (ex_str.check()) {
-            string s=ex_str();
-            map<string, int>::iterator it=tagmap.find(s);
-            if (it!=tagmap.end()) {
-                // we have the tag already so look it up
-                tags[i]=it->second;
-            } else {
-                result->setTagMap(s,curmax);
-                tags[i]=curmax;
-                curmax++;
-            }
-        } else {
-            throw FinleyAdapterException("Error - Unable to extract tag value.");
-        }
-    }
-    // now we need to add the dirac points
-    dynamic_cast<MeshAdapter*>(result.get())->addDiracPoints(points, tags);
-    return result;
-}  
-  
-Domain_ptr readGmsh(esysUtils::JMPI& info, const std::string& fileName,
-                    int numDim, int integrationOrder,
-                    int reducedIntegrationOrder, bool optimize,
-                    bool useMacroElements, const std::vector<double>& points,
-                    const std::vector<int>& tags)
-{
-    if (fileName.size() == 0 )
-        throw FinleyAdapterException("Null file name!");
-
-    double blocktimer_start = blocktimer_time();
-    Mesh* fMesh=Mesh::readGmsh(info, fileName, numDim, integrationOrder, reducedIntegrationOrder, optimize, useMacroElements);
-    checkFinleyError();
-    blocktimer_increment("ReadGmsh()", blocktimer_start);
-    MeshAdapter* ma=new MeshAdapter(fMesh);
-    ma->addDiracPoints(points, tags);
-    return Domain_ptr(ma);
-}
-  
-Domain_ptr readGmsh_driver(const boost::python::list& args)
-{
-    using boost::python::extract;
-    int l=len(args);
-    if (l<7) {
-        throw FinleyAdapterException("Insufficient arguments to readMesh_driver");
-    }
-    string fileName=extract<string>(args[0])();
-    int numDim=extract<int>(args[1])();
-    int integrationOrder=extract<int>(args[2])();
-    int reducedIntegrationOrder=extract<int>(args[3])();
-    bool optimize=extract<bool>(args[4])();
-    bool useMacroElements=extract<bool>(args[5])();
-    vector<double> points;
-    vector<int> tags;
-
-    // we need to convert lists to stl vectors
-    boost::python::list pypoints=extract<boost::python::list>(args[6]);
-    boost::python::list pytags=extract<boost::python::list>(args[7]);
-    int numpts=extract<int>(pypoints.attr("__len__")());
-    int numtags=extract<int>(pytags.attr("__len__")());
-    boost::python::object pworld=args[8];
-    esysUtils::JMPI info;
-    if (!pworld.is_none()) {
-        extract<SubWorld_ptr> ex(pworld);
-        if (!ex.check()) {
-            throw FinleyAdapterException("Invalid escriptWorld parameter.");
-        }
-        info=ex()->getMPI();
-    } else {
-        info=esysUtils::makeInfo(MPI_COMM_WORLD);
-    }
-    Domain_ptr result = readGmsh(info, fileName, numDim, integrationOrder,
-                                 reducedIntegrationOrder, optimize,
-                                 useMacroElements, points, tags);      
-
-    for (int i=0;i<numpts;++i) {
-        boost::python::object temp=pypoints[i];
-        int l=extract<int>(temp.attr("__len__")());
-        for (int k=0;k<l;++k) {
-            points.push_back(extract<double>(temp[k]));
-        }
-    }
-    int curmax=40; // bricks use up to 30
-    TagMap& tagmap=dynamic_cast<MeshAdapter*>(result.get())->getMesh()->tagMap;
-    // first we work out what tags are already in use
-    for (TagMap::iterator it=tagmap.begin(); it!=tagmap.end(); ++it) {
-        if (it->second>curmax) {
-            curmax=it->second+1;
-        }
-    }
-
-    tags.resize(numtags, -1);
-    for (int i=0;i<numtags;++i) {
-        extract<int> ex_int(pytags[i]);
-        extract<string> ex_str(pytags[i]);
-        if (ex_int.check()) {
-            tags[i]=ex_int();
-            if (tags[i]>= curmax) {
-                curmax=tags[i]+1;
-            }
-        } else if (ex_str.check()) {
-            string s=ex_str();
-            map<string, int>::iterator it=tagmap.find(s);
-            if (it!=tagmap.end()) {
-                // we have the tag already so look it up
-                tags[i]=it->second;
-            } else {
-                result->setTagMap(s,curmax);
-                tags[i]=curmax;
-                curmax++;
-            }
-        } else {
-            throw FinleyAdapterException("Error - Unable to extract tag value");
-        }
-    }
-    // now we need to add the dirac points
-    dynamic_cast<MeshAdapter*>(result.get())->addDiracPoints(points, tags);
-    return result;
-}   
-  
-Domain_ptr brick(esysUtils::JMPI& info, dim_t n0, dim_t n1, dim_t n2, int order,
-                 double l0, double l1, double l2,
-                 bool periodic0, bool periodic1, bool periodic2,
-                 int integrationOrder, int reducedIntegrationOrder,
-                 bool useElementsOnFace, bool useFullElementOrder,
-                 bool optimize, const std::vector<double>& points,
-                 const std::vector<int>& tags,
-                 const std::map<std::string, int>& tagNamesToNums)
-{
-    const dim_t numElements[] = {n0, n1, n2};
-    const double length[] = {l0, l1, l2};
-    const bool periodic[] = {periodic0, periodic1, periodic2};
-
-    Mesh* fMesh = NULL;
-    if (order==1) {
-        fMesh=RectangularMesh_Hex8(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace, useFullElementOrder, optimize, info);
-    } else if (order==2) {
-        fMesh=RectangularMesh_Hex20(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace, useFullElementOrder, false, optimize, info);
-    } else if (order==-1) {
-        fMesh=RectangularMesh_Hex20(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace, useFullElementOrder, true, optimize, info);
-    } else {
-        stringstream message;
-        message << "Illegal interpolation order " << order;
-        throw FinleyAdapterException(message.str());
-    }
-
-    // Convert any finley errors into a C++ exception
-    checkFinleyError();
-    MeshAdapter* dom = new MeshAdapter(fMesh);
-    dom->addDiracPoints(points, tags);
-    Mesh* out=dom->getMesh().get();     
-    for (map<string, int>::const_iterator it=tagNamesToNums.begin();it!=tagNamesToNums.end();++it)
-    {
-        out->addTagMap(it->first.c_str(), it->second);
-    }
-    out->Points->updateTagList();
-    return Domain_ptr(dom);
-}
-
-Domain_ptr brick_driver(const boost::python::list& args)
-{
-    using boost::python::extract;
-
-    // we need to convert lists to stl vectors
-    boost::python::list pypoints=extract<boost::python::list>(args[15]);
-    boost::python::list pytags=extract<boost::python::list>(args[16]);
-    int numpts=extract<int>(pypoints.attr("__len__")());
-    int numtags=extract<int>(pytags.attr("__len__")());
-    vector<double> points;
-    vector<int> tags;
-    tags.resize(numtags, -1);
-    for (int i=0;i<numpts;++i) {
-        boost::python::object temp=pypoints[i];
-        int l=extract<int>(temp.attr("__len__")());
-        for (int k=0;k<l;++k) {
-            points.push_back(extract<double>(temp[k]));           
-        }
-    }
-    map<string, int> namestonums;
-    int curmax=40; // bricks use up to 30
-    for (int i=0;i<numtags;++i) {
-        extract<int> ex_int(pytags[i]);
-        extract<string> ex_str(pytags[i]);
-        if (ex_int.check()) {
-            tags[i]=ex_int();
-            if (tags[i]>= curmax) {
-                curmax=tags[i]+1;
-            }
-        } else if (ex_str.check()) {
-            string s=ex_str();
-            map<string, int>::iterator it=namestonums.find(s);
-            if (it!=namestonums.end()) {
-                // we have the tag already so look it up
-                tags[i]=it->second;
-            } else {
-                namestonums[s]=curmax;
-                tags[i]=curmax;
-                curmax++;
-            }
-        } else {
-            throw FinleyAdapterException("Error - Unable to extract tag value.");
-        }
-    }
-    boost::python::object pworld=args[17];
-    esysUtils::JMPI info;
-    if (!pworld.is_none()) {
-        extract<SubWorld_ptr> ex(pworld);
-        if (!ex.check())
-        {
-            throw FinleyAdapterException("Invalid escriptWorld parameter.");
-        }
-        info=ex()->getMPI();
-    } else {
-        info=esysUtils::makeInfo(MPI_COMM_WORLD);
-    }
-    return brick(info, static_cast<dim_t>(extract<float>(args[0])),
-                   static_cast<dim_t>(extract<float>(args[1])),
-                   static_cast<dim_t>(extract<float>(args[2])),
-                   extract<int>(args[3]), extract<double>(args[4]),
-                   extract<double>(args[5]), extract<double>(args[6]),
-                   extract<int>(args[7]), extract<int>(args[8]),
-                   extract<int>(args[9]), extract<int>(args[10]),
-                   extract<int>(args[11]), extract<int>(args[12]),
-                   extract<int>(args[13]), extract<int>(args[14]),
-                   points, tags, namestonums);
-}
-
-Domain_ptr rectangle(esysUtils::JMPI& info, dim_t n0, dim_t n1, int order,
-                     double l0, double l1, bool periodic0, bool periodic1,
-                     int integrationOrder, int reducedIntegrationOrder,
-                     bool useElementsOnFace, bool useFullElementOrder,
-                     bool optimize, const vector<double>& points,
-                     const vector<int>& tags,
-                     const std::map<std::string, int>& tagNamesToNums)
-{
-    const dim_t numElements[] = {n0, n1};
-    const double length[] = {l0, l1};
-    const bool periodic[] = {periodic0, periodic1};
-
-    Mesh* fMesh = NULL;
-    if (order==1) {
-        fMesh=RectangularMesh_Rec4(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace, useFullElementOrder, optimize, info);
-    } else if (order==2) {
-        fMesh=RectangularMesh_Rec8(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace,useFullElementOrder, false, optimize, info);
-    } else if (order==-1) {
-        fMesh=RectangularMesh_Rec8(numElements, length, periodic,
-                integrationOrder, reducedIntegrationOrder,
-                useElementsOnFace, useFullElementOrder, true, optimize, info);
-    } else {
-        stringstream message;
-        message << "Illegal interpolation order " << order;
-        throw FinleyAdapterException(message.str());
-    }
-
-    // Convert any finley errors into a C++ exception
-    checkFinleyError();
-    MeshAdapter* dom = new MeshAdapter(fMesh);
-    dom->addDiracPoints(points, tags);
-    Mesh* out=dom->getMesh().get();     
-    for (map<string, int>::const_iterator it=tagNamesToNums.begin();it!=tagNamesToNums.end();++it)
-    {
-        out->addTagMap(it->first.c_str(), it->second);
-    }
-    out->Points->updateTagList();
-    return Domain_ptr(dom);
-}
-
-Domain_ptr meshMerge(const boost::python::list& meshList)
-{
-    // extract the meshes from meshList
-    int num=boost::python::extract<int>(meshList.attr("__len__")());
-    vector<Mesh*> meshes(num);
-    for (int i=0; i<num; ++i) {
-        AbstractContinuousDomain& meshListMember=boost::python::extract<AbstractContinuousDomain&>(meshList[i]);
-        const MeshAdapter* finley_meshListMember=static_cast<const MeshAdapter*>(&meshListMember);
-        meshes[i]=finley_meshListMember->getFinley_Mesh();
-    }
-
-    // merge the meshes
-    Mesh* fMesh=Mesh_merge(meshes);
-
-    // Convert any finley errors into a C++ exception
-    checkFinleyError();
-    return Domain_ptr(new MeshAdapter(fMesh));
-}
-
-Domain_ptr rectangle_driver(const boost::python::list& args)
-{
-    using boost::python::extract;
-
-    // we need to convert lists to stl vectors
-    boost::python::list pypoints=extract<boost::python::list>(args[12]);
-    boost::python::list pytags=extract<boost::python::list>(args[13]);
-    int numpts=extract<int>(pypoints.attr("__len__")());
-    int numtags=extract<int>(pytags.attr("__len__")());
-    vector<double> points;
-    vector<int> tags;
-    tags.resize(numtags, -1);
-    for (int i=0;i<numpts;++i) {
-        boost::python::object temp=pypoints[i];
-        int l=extract<int>(temp.attr("__len__")());
-        for (int k=0;k<l;++k) {
-            points.push_back(extract<double>(temp[k]));           
-        }
-    }
-    map<string, int> tagstonames;
-    int curmax=40;
-    // but which order to assign tags to names?????
-    for (int i=0;i<numtags;++i) {
-        extract<int> ex_int(pytags[i]);
-        extract<string> ex_str(pytags[i]);
-        if (ex_int.check()) {
-            tags[i]=ex_int();
-            if (tags[i]>= curmax) {
-                curmax=tags[i]+1;
-            }
-        } else if (ex_str.check()) {
-            string s=ex_str();
-            map<string, int>::iterator it=tagstonames.find(s);
-            if (it!=tagstonames.end()) {
-                // we have the tag already so look it up
-                tags[i]=it->second;
-            } else {
-                tagstonames[s]=curmax;
-                tags[i]=curmax;
-                curmax++;
-            }
-        } else {
-            throw FinleyAdapterException("Error - Unable to extract tag value.");
-        }
-    }
-    boost::python::object pworld=args[14];
-    esysUtils::JMPI info;
-    if (!pworld.is_none()) {
-        extract<SubWorld_ptr> ex(pworld);
-        if (!ex.check()) {
-            throw FinleyAdapterException("Invalid escriptWorld parameter.");
-        }
-        info=ex()->getMPI();
-    } else {
-        info=esysUtils::makeInfo(MPI_COMM_WORLD);
-    }
-
-    return rectangle(info, static_cast<dim_t>(extract<float>(args[0])),
-                       static_cast<dim_t>(extract<float>(args[1])),
-                       extract<int>(args[2]), extract<double>(args[3]),
-                       extract<double>(args[4]), extract<int>(args[5]),
-                       extract<int>(args[6]), extract<int>(args[7]),
-                       extract<int>(args[8]), extract<int>(args[9]),
-                       extract<int>(args[10]), extract<int>(args[11]), 
-                       points, tags, tagstonames);
-}  
-
-Domain_ptr glueFaces(const boost::python::list& meshList, double safety_factor,
-                     double tolerance, bool optimize)
-{
-    // merge the meshes:
-    Domain_ptr merged_meshes=meshMerge(meshList);
-
-    // glue the faces:
-    const MeshAdapter* merged_finley_meshes=dynamic_cast<const MeshAdapter*>(merged_meshes.get());
-    Mesh* fMesh = merged_finley_meshes->getFinley_Mesh();
-    fMesh->glueFaces(safety_factor, tolerance, optimize);
-
-    // Convert any finley errors into a C++ exception
-    checkFinleyError();
-    return merged_meshes;
-}
-
-Domain_ptr joinFaces(const boost::python::list& meshList, double safety_factor,
-                     double tolerance, bool optimize)
-{
-    // merge the meshes:
-    Domain_ptr merged_meshes=meshMerge(meshList);
-
-    // join the faces:
-    const MeshAdapter* merged_finley_meshes=static_cast<const MeshAdapter*>(merged_meshes.get());
-    Mesh* fMesh=merged_finley_meshes->getFinley_Mesh();
-    fMesh->joinFaces(safety_factor, tolerance, optimize);
-
-    // Convert any finley errors into a C++ exception
-    checkFinleyError();
-    return merged_meshes;
-}
-
-} // end of namespace
-
diff --git a/finley/src/CPPAdapter/MeshAdapterFactory.h b/finley/src/CPPAdapter/MeshAdapterFactory.h
deleted file mode 100644
index f82e0ee..0000000
--- a/finley/src/CPPAdapter/MeshAdapterFactory.h
+++ /dev/null
@@ -1,233 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef __FINLEY_MESHADAPTERFACTORY_H__
-#define __FINLEY_MESHADAPTERFACTORY_H__
-
-#include "system_dep.h"
-
-#include <finley/Finley.h>
-#include <finley/Mesh.h>
-#include <finley/RectangularMesh.h>
-
-#include "MeshAdapter.h"
-
-#include <escript/AbstractContinuousDomain.h>
-#include <escript/SubWorld.h>
-
-#include <boost/python/list.hpp>
-
-#include <sstream>
-
-namespace finley {
-
-  /**
-     \brief
-     A suite of factory methods for creating various MeshAdapters.
-
-     Description:
-     A suite of factory methods for creating various MeshAdapters.
-  */
- 
-/**
-    \brief
-    recovers mesg from a dump file
-    \param fileName Input -  The name of the file.
-*/
-FINLEY_DLL_API
-escript::Domain_ptr loadMesh(const std::string& fileName);
-
-/**
-    \brief
-    Read a mesh from a file. For MPI parallel runs fan out the mesh to multiple processes.
-    \param fileName Input -  The name of the file.
-    \param integrationOrder Input - order of the quadrature scheme.  
-    If integrationOrder<0 the integration order is selected independently.
-    \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-    If reducedIntegrationOrder<0 the integration order is selected independently.
-    \param optimize Input - switches on the optimization of node labels 
-    
-    \warning These defaults are also encoded in readMesh_driver. Please ensure any changes are consistant
-*/
-FINLEY_DLL_API
-escript::Domain_ptr readMesh(const std::string& fileName,
-                             int integrationOrder=-1,
-                             int reducedIntegrationOrder=-1,
-                             bool optimize=false,
-                             const std::vector<double>& points=std::vector<double>(),
-                             const std::vector<int>& tags=std::vector<int>());
-
-/**
-    \brief Python driver for readMesh()
-    \param args see readMesh() definition for order of params
-*/
-FINLEY_DLL_API
-escript::Domain_ptr readMesh_driver(const boost::python::list& args);
-   
-/**
-    \brief
-    Read a gmsh mesh file
-    \param fileName Input -  The name of the file.
-    \param numDim Input -  spatial dimension
-    \param integrationOrder Input - order of the quadrature scheme.  
-    If integrationOrder<0 the integration order is selected independently.
-    \param reducedIntegrationOrder Input - order of the reduced quadrature scheme.  
-    If reducedIntegrationOrder<0 the integration order is selected independently.
-    \param optimize Input - switches on the optimization of node labels 
-    \param useMacroElements
-*/
-FINLEY_DLL_API
-escript::Domain_ptr readGmsh(const std::string& fileName, int numDim,
-                             int integrationOrder=-1,
-                             int reducedIntegrationOrder=-1,
-                             bool optimize=false, bool useMacroElements=false,
-                             const std::vector<double>& points=std::vector<double>(),
-                             const std::vector<int>& tags=std::vector<int>());
-
-/**
-    \brief Python driver for readGMesh()
-    \param args see readGMesh() definition for order of params
-*/
-FINLEY_DLL_API
-escript::Domain_ptr readGmsh_driver(const boost::python::list& args); 
-  
-/**
-    \brief
-    Creates a rectangular mesh with n0 x n1 x n2 elements over the brick 
-    [0,l0] x [0,l1] x [0,l2].
-
-    \param n0,n1,n2 number of elements in each dimension
-    \param order =1, =-1 or =2 gives the order of shape function
-                 (-1= macro elements of order 1)
-    \param l0,l1,l2 length of each side of brick
-    \param periodic0, periodic1, periodic2 whether or not boundary 
-           conditions of the dimension are periodic
-    \param integrationOrder order of the quadrature scheme.  
-         If integrationOrder<0 the integration order is selected independently.
-    \param reducedIntegrationOrder order of the reduced quadrature scheme.  
-         If reducedIntegrationOrder<0 the integration order is selected independently.
-    \param useElementsOnFace whether or not to use elements on face
-    \param useFullElementOrder whether to use second order elements
-    \param optimize whether to apply optimization
-    \param points
-    \param tags
-    \param tagNamesToNums
-*/
-FINLEY_DLL_API
-escript::Domain_ptr brick(esysUtils::JMPI& info,
-                          dim_t n0=1, dim_t n1=1, dim_t n2=1, int order=1,
-                          double l0=1., double l1=1., double l2=1.,
-                          bool periodic0=false, bool periodic1=false,
-                          bool periodic2=false, int integrationOrder=-1,
-                          int reducedIntegrationOrder=-1,
-                          bool useElementsOnFace=false,
-                          bool useFullElementOrder=false, bool optimize=false,
-                          const std::vector<double>& points=std::vector<double>(),
-                          const std::vector<int>& tags=std::vector<int>(),
-                          const std::map<std::string, int>& tagNamesToNums=std::map<std::string, int>());
-                    
-   /**
-   \brief Python driver for brick()
-   \param args see brick() definition for order of params
-   */
-   FINLEY_DLL_API
-   escript::Domain_ptr brick_driver(const boost::python::list& args);
-
-   /**
-   \brief Python driver for rectangle()
-   \param args see rectangle() definition for order of params
-   */
-   FINLEY_DLL_API
-   escript::Domain_ptr rectangle_driver(const boost::python::list& args);   
-   
-   
-  /**
-     \brief
-     Creates a rectangular mesh with n0 x n1 elements over the brick 
-     [0,l0] x [0,l1].
-
-     \param n0,n1 number of elements in each dimension
-     \param l0,l1 length of each side of brick
-     \param order =1, =-1 or =2 gives the order of shape function
-                  (-1= macro elements of order 1)
-     \param periodic0, periodic1 whether or not the boundary conditions of the
-            dimension are periodic
-     \param integrationOrder order of the quadrature scheme. 
-            If integrationOrder<0 the integration order is selected 
-            independently.
-     \param reducedIntegrationOrder order of the reduced quadrature scheme.  
-            If reducedIntegrationOrder<0 the integration order is selected
-            independently.
-     \param useElementsOnFace whether or not to use elements on face
-     \param useFullElementOrder
-     \param optimize
-     \param points
-     \param tags
-     \param tagNamesToNums
-  */
-FINLEY_DLL_API
-escript::Domain_ptr rectangle(esysUtils::JMPI& info,
-                              dim_t n0=1, dim_t n1=1, dim_t order=1,
-                              double l0=1.0, double l1=1.0,
-                              bool periodic0=false, bool periodic1=false,
-                              int integrationOrder=-1,
-                              int reducedIntegrationOrder=-1,
-                              bool useElementsOnFace=false,
-                              bool useFullElementOrder=false,
-                              bool optimize=false,
-                              const std::vector<double>& points=std::vector<double>(),
-                              const std::vector<int>& tags=std::vector<int>(),
-                              const std::map<std::string, int>& tagNamesToNums=std::map<std::string, int>());
-
-/**
-    \brief
-    Merges a list of meshes into one list.
-    \param meshList Input - The list of meshes.
-*/
-FINLEY_DLL_API
-escript::Domain_ptr meshMerge(const boost::python::list& meshList);
-/**
-    \brief
-    Detects matching faces in the mesh, removes them from the mesh 
-    and joins the elements touched by the face elements.
-    \param meshList Input - The list of meshes.
-    \param safetyFactor Input - ??
-    \param tolerance Input - ??
-    \param optimize Input - switches on the optimization of node labels 
-*/
-FINLEY_DLL_API
-escript::Domain_ptr glueFaces(const boost::python::list& meshList,
-                              double safetyFactor=0.2, double tolerance=1.e-8,
-                              bool optimize=false);
-
-/**
-    \brief
-    Detects matching faces in the mesh and replaces them by joint elements.
-    \param meshList Input - The list of meshes.
-    \param safetyFactor Input - ??
-    \param tolerance Input - ??
-    \param optimize Input - switches on the optimization of node labels 
-*/
-FINLEY_DLL_API
-escript::Domain_ptr joinFaces(const boost::python::list& meshList,
-                              double safetyFactor=0.2, double tolerance=1.e-8,
-                              bool optimize=false);
- 
-} // end of namespace
-
-#endif // __FINLEY_MESHADAPTERFACTORY_H__
-
diff --git a/finley/src/CPPAdapter/finleycpp.cpp b/finley/src/CPPAdapter/finleycpp.cpp
deleted file mode 100644
index 7ee1207..0000000
--- a/finley/src/CPPAdapter/finleycpp.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#ifdef ESYS_MPI
-#include "esysUtils/Esys_MPI.h"
-#endif
-#include "../Finley.h"
-
-#include <pasowrap/SystemMatrixAdapter.h>
-#include <pasowrap/TransportProblemAdapter.h>
-
-
-#include "MeshAdapter.h"
-#include "MeshAdapterFactory.h"
-// #include "SystemMatrixAdapter.h"
-// #include "TransportProblemAdapter.h"
-
-#include "FinleyAdapterException.h"
-// #include "esysUtils/EsysException.h"
-#include "esysUtils/esysExceptionTranslator.h"
-
-#include "escript/AbstractContinuousDomain.h"
-
-#include <boost/python.hpp>
-#include <boost/python/module.hpp>
-#include <boost/python/def.hpp>
-#include <boost/python/detail/defaults_gen.hpp>
-#include <boost/version.hpp>
-
-using namespace boost::python;
-
-BOOST_PYTHON_MODULE(finleycpp)
-{
-// This feature was added in boost v1.34
-#if ((BOOST_VERSION/100)%1000 > 34) || (BOOST_VERSION/100000 >1)
-  // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
-  docstring_options docopt(true, true, false);
-#endif
-
-  scope().attr("__doc__") = "To use this module, please import esys.finley";    
-  
-  
-  //
-  // NOTE: The return_value_policy is necessary for functions that
-  // return pointers.
-  //
-  register_exception_translator<finley::FinleyAdapterException>(&(esysUtils::RuntimeErrorTranslator));
-
-  def("LoadMesh",finley::loadMesh,
-      (arg("fileName")="file.nc"),":rtype: `Domain`"
-/*      ,return_value_policy<manage_new_object>());*/
-      );
-
-  
-  def("__ReadMesh_driver", finley::readMesh_driver,
-      (arg("params"))
-//  def("ReadMesh",finley::readMesh,
-//      (arg("fileName")="file.fly",arg("integrationOrder")=-1,  arg("reducedIntegrationOrder")=-1,  arg("optimize")=true)
-	,"Read a mesh from a file. For MPI parallel runs fan out the mesh to multiple processes.\n\n"
-":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
-":param integrationOrder: order of the quadrature scheme. If *integrationOrder<0* the integration order is selected independently.\n"
-":type integrationOrder: ``int``\n"
-":param reducedIntegrationOrder: order of the quadrature scheme. If *reducedIntegrationOrder<0* the integration order is selected independently.\n"
-":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``");
-
-  def("__ReadGmsh_driver", finley::readGmsh_driver,
-      (arg("params"))  
-//  def("ReadGmsh",finley::readGmsh,
-//      (arg("fileName")="file.msh",
-//       arg("numDim"), 
-//       arg("integrationOrder")=-1, 
-//       arg("reducedIntegrationOrder")=-1, 
-//       arg("optimize")=true,  
-//       arg("useMacroElements")=false)
-,"Read a gmsh mesh file\n\n"
-":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
-":param integrationOrder: order of the quadrature scheme. If *integrationOrder<0* the integration order is selected independently.\n"
-":type integrationOrder: ``int``\n"
-":param reducedIntegrationOrder: order of the quadrature scheme. If *reducedIntegrationOrder<0* the integration order is selected independently.\n"
-":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``\n"
-":param useMacroElements: Enable the usage of macro elements instead of second order elements.\n:type useMacroElements: ``bool``"
-);
-
-  def ("__Brick_driver",finley::brick_driver,
-      (arg("params"))
-,"Creates a rectangular mesh with n0 x n1 x n2 elements over the brick [0,l0] x [0,l1] x [0,l2]."
-"\n\n:param n0: number of elements in direction 0\n:type n0: ``int``\n:param n1: number of elements in direction 1\n:type n1: ``int``\n"
-":param n2:number of elements in direction 2\n:type n2: ``int``\n"
-":param order: =1, =-1 or =2 gives the order of shape function. If -1 macro elements of order 1 are used.\n"
-":param l0: length of side 0\n"
-":type  l0: ``float``\n"
-":param l1: length of side 1\n"
-":type  l1: ``float``\n"
-":param l2: length of side 2\n"
-":type  l2: ``float``\n"
-":param periodic0: whether or not boundary conditions are periodic in direction 0\n:type periodic0: ``bool``\n"
-":param periodic1: whether or not boundary conditions are periodic in direction 1\n:type periodic1: ``bool``\n"
-":param periodic2: whether or not boundary conditions are periodic in direction 2\n:type periodic2: ``bool``\n"
-":param integrationOrder: order of the quadrature scheme. If integrationOrder<0 the integration order is selected independently.\n"
-":param reducedIntegrationOrder: order of the quadrature scheme. If reducedIntegrationOrder<0 the integration order is selected independently.\n"
-":param useElementsOnFace:  whether or not to use elements on face\n"
-":type useElementsOnFace: ``int``\n"
-":param useFullElementOrder: Whether or not to use Hex27 elements\n"":type useFullElementOrder: ``bool``\n"
-":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``"
-);
-
-  def ("__Rectangle_driver",finley::rectangle_driver,
-      (arg("args")) 
-,"Creates a rectangular mesh with n0 x n1 elements over the brick [0,l0] x [0,l1]."
-"\n\n:param n0:\n:type n0:\n:param n1:\n:type n1:\n"
-":param order: =1, =-1 or =2 gives the order of shape function. If -1 macro elements of order 1 are used.\n"
-":param l0: length of side 0\n:param l1:\n"
-":param integrationOrder: order of the quadrature scheme. If integrationOrder<0 the integration order is selected independently.\n"
-":param reducedIntegrationOrder: order of the quadrature scheme. If reducedIntegrationOrder<0 the integration order is selected independently.\n"
-":param useElementsOnFace:  whether or not to use elements on face\n"
-":type useElementsOnFace: ``int``"
-":param periodic0:  whether or not boundary conditions are periodic\n"
-":param periodic1:\n"
-":param useFullElementOrder: Whether or not to use Rec9 elements\n"":type useFullElementOrder: ``bool``\n"
-":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``"
-);
-
-  def("Merge",finley::meshMerge,args("meshList")
-,"Merges a list of meshes into one mesh.\n\n:rtype: `Domain`"
-  );
-
-  def("GlueFaces",finley::glueFaces,
-      (arg("meshList"),arg("safetyFactor")=0.2,
-      arg("tolerance")=1.e-8,
-      arg("optimize")=true)
-,"Detects matching faces in the mesh, removes them from the mesh and joins the elements touched by the face elements."
-	);
-
-  def("JoinFaces",finley::joinFaces,
-      (arg("meshList"), arg("safetyFactor")=0.2,
-      arg("tolerance")=1.e-8,
-      arg("optimize")=true)
-,"Detects matching faces in the mesh and replaces them by joint elements."
-	);
-
-
-  class_<finley::MeshAdapter, bases<escript::AbstractContinuousDomain> >
-      ("MeshAdapter","A concrete class representing a domain. For more details, please consult the c++ documentation.",init<optional <finley::Mesh*> >())
-      .def(init<const finley::MeshAdapter&>())
-      .def("write",&finley::MeshAdapter::write,args("filename"),
-"Write the current mesh to a file with the given name.")
-      .def("print_mesh_info",&finley::MeshAdapter::Print_Mesh_Info,(arg("full")=false),
-":param full:\n:type full: ``bool``")
-      .def("dump",&finley::MeshAdapter::dump,args("fileName")
-,"dumps the mesh to a file with the given name.")
-      .def("getDescription",&finley::MeshAdapter::getDescription,
-":return: a description for this domain\n:rtype: ``string``")
-      .def("getDim",&finley::MeshAdapter::getDim,":rtype: ``int``")
-      .def("getDataShape",&finley::MeshAdapter::getDataShape, args("functionSpaceCode"),
-":return: a pair (dps, ns) where dps=the number of data points per sample, and ns=the number of samples\n:rtype: ``tuple``")
-      .def("getNumDataPointsGlobal",&finley::MeshAdapter::getNumDataPointsGlobal,
-":return: the number of data points summed across all MPI processes\n"
-":rtype: ``int``")
-      .def("addPDEToSystem",&finley::MeshAdapter::addPDEToSystem,
-args("mat", "rhs","A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
-"adds a PDE onto the stiffness matrix mat and a rhs\n\n"
-":param mat:\n:type mat: `OperatorAdapter`\n:param rhs:\n:type rhs: `Data`\n"
-":param A:\n:type A: `Data`\n"
-":param B:\n:type B: `Data`\n"
-":param C:\n:type C: `Data`\n"
-":param D:\n:type D: `Data`\n"
-":param X:\n:type X: `Data`\n"
-":param Y:\n:type Y: `Data`\n"
-":param d:\n:type d: `Data`\n"
-":param d_contact:\n:type d_contact: `Data`\n"
-":param y_contact:\n:type y_contact: `Data`\n"
-)
-      .def("addPDEToLumpedSystem",&finley::MeshAdapter::addPDEToLumpedSystem,
-args("mat", "D", "d"),
-"adds a PDE onto the lumped stiffness matrix\n\n"
-":param mat:\n:type mat: `Data`\n"
-":param D:\n:type D: `Data`\n"
-":param d:\n:type d: `Data`\n"
-":param useHRZ:\n:type useHRZ: bool\n")
-      .def("addPDEToRHS",&finley::MeshAdapter::addPDEToRHS, 
-args("rhs", "X", "Y", "y", "y_contact"),
-"adds a PDE onto the stiffness matrix mat and a rhs\n\n"
-":param rhs:\n:type rhs: `Data`\n"
-":param X:\n:type X: `Data`\n"
-":param Y:\n:type Y: `Data`\n"
-":param y:\n:type y: `Data`\n"
-":param y_contact:\n:type y_contact: `Data`"
-)
-      .def("addPDEToTransportProblem",&finley::MeshAdapter::addPDEToTransportProblem,
-args( "tp", "source", "M", "A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
-":param tp:\n:type tp: `TransportProblemAdapter`\n"
-":param source:\n:type source: `Data`\n"
-":param M:\n:type M: `Data`\n"
-":param A:\n:type A: `Data`\n"
-":param B:\n:type B: `Data`\n"
-":param C:\n:type C: `Data`\n"
-":param D:\n:type D: `Data`\n"
-":param X:\n:type X: `Data`\n"
-":param Y:\n:type Y: `Data`\n"
-":param d:\n:type d: `Data`\n"
-":param y:\n:type y: `Data`\n"
-":param d_contact:\n:type d_contact: `Data`\n"
-":param y_contact:\n:type y_contact: `Data`\n"
-)
-      .def("newOperator",&finley::MeshAdapter::newSystemMatrix,
-args("row_blocksize", "row_functionspace", "column_blocksize", "column_functionspace", "type"),
-"creates a SystemMatrixAdapter stiffness matrix and initializes it with zeros\n\n"
-":param row_blocksize:\n:type row_blocksize: ``int``\n"
-":param row_functionspace:\n:type row_functionspace: `FunctionSpace`\n"
-":param column_blocksize:\n:type column_blocksize: ``int``\n"
-":param column_functionspace:\n:type column_functionspace: `FunctionSpace`\n"
-":param type:\n:type type: ``int``\n"
-)
-      .def("newTransportProblem",&finley::MeshAdapter::newTransportProblem,
-args("theta", "blocksize", "functionspace", "type"),
-"creates a TransportProblemAdapter\n\n"
-":param theta:\n:type theta: ``float``\n"
-":param blocksize:\n:type blocksize: ``int``\n"
-":param functionspace:\n:type functionspace: `FunctionSpace`\n"
-":param type:\n:type type: ``int``\n"
-)
-      .def("getSystemMatrixTypeId",&finley::MeshAdapter::getSystemMatrixTypeId,
-args("options"),
-":return: the identifier of the matrix type to be used for the global stiffness matrix when particular solver options are used.\n"
-":rtype: ``int``\n"
-":param options:\n:type options: `SolverBuddy`\n"
-)
-      .def("getTransportTypeId",&finley::MeshAdapter::getTransportTypeId,
-args("solver", "preconditioner", "package", "symmetry"),
-":return: the identifier of the transport problem type to be used when a particular solver, preconditioner, package and symmetric matrix is used.\n"
-":rtype: ``int``\n"
-":param solver:\n:type solver: ``int``\n"
-":param preconditioner:\n:type preconditioner: ``int``\n"
-":param package:\n:type package: ``int``\n"
-":param symmetry:\n:type symmetry: ``int``\n"
-)
-      .def("setX",&finley::MeshAdapter::setNewX,
-args("arg"), "assigns new location to the domain\n\n:param arg:\n:type arg: `Data`")
-      .def("getX",&finley::MeshAdapter::getX, ":return: locations in the FEM nodes\n\n"
-":rtype: `Data`")
-      .def("getNormal",&finley::MeshAdapter::getNormal,
-":return: boundary normals at the quadrature point on the face elements\n"
-":rtype: `Data`")
-      .def("getSize",&finley::MeshAdapter::getSize,":return: the element size\n"
-":rtype: `Data`")
-      .def("setTagMap",&finley::MeshAdapter::setTagMap,args("name","tag"),
-"Give a tag number a name.\n\n:param name: Name for the tag\n:type name: ``string``\n"
-":param tag: numeric id\n:type tag: ``int``\n:note: Tag names must be unique within a domain")
-      .def("getTag",&finley::MeshAdapter::getTag,args("name"),":return: tag id for "
-"``name``\n:rtype: ``string``")
-      .def("isValidTagName",&finley::MeshAdapter::isValidTagName,args("name"),
-":return: True is ``name`` corresponds to a tag\n:rtype: ``bool``")
-      .def("showTagNames",&finley::MeshAdapter::showTagNames,":return: A space separated list of tag names\n:rtype: ``string``")
-      .def("getMPISize",&finley::MeshAdapter::getMPISize,":return: the number of processes used for this `Domain`\n:rtype: ``int``")
-      .def("getMPIRank",&finley::MeshAdapter::getMPIRank,":return: the rank of this process\n:rtype: ``int``")
-      .def("MPIBarrier",&finley::MeshAdapter::MPIBarrier,"Wait until all processes have reached this point")
-      .def("onMasterProcessor",&finley::MeshAdapter::onMasterProcessor,":return: True if this code is executing on the master process\n:rtype: `bool`")
-//       .def("addDiracPoints", &finley::MeshAdapter::addDiracPoints,(arg("points"), arg("tags")=finley::EmptyPythonList),
-//       "adds points to support more Dirac delta function.\n\n"
-//        	":param points: list of points where Dirac delta function are to be defined. The location will be adjusted to\n"
-// 	"               to match the nearest node of the finite element mesh.\n:type points: ``list`` of ``list``s of ``floats``\n"
-// 	":param tags: list of tags to be assigned to the points.\n:type tags: ``list`` of ``int``s or ``strings``\n"     
-//       )
-//       .def("addDiracPoint", &finley::MeshAdapter::addDiracPoint,(arg("point"), arg("tag")=-1),
-//         "adds a point to support more Dirac delta function.\n\n"
-//        	":param point: point where Dirac delta function are to be defined. The location will be adjusted to\n"
-// 	"               to match the nearest node of the finite element mesh.\n:type point: ``list`` of ``floats``\n"
-// 	":param tags: tag to be assigned to the point.\n:type tag: ``int``s or ``strings``\n"     
-//       ) 
-//       .def("addDiracPoint", &finley::MeshAdapter::addDiracPointWithTagName,(arg("point"), arg("tag")),
-//         "adds a point to support more Dirac delta function.\n\n"
-//        	":param point: point where Dirac delta function are to be defined. The location will be adjusted to\n"
-// 	"               to match the nearest node of the finite element mesh.\n:type point: ``list`` of ``floats``\n"
-// 	":param tags: tag to be assigned to the point.\n:type tag: ``int``s or ``strings``\n"     
-//       ) 
-;
-
-//   class_<finley::SystemMatrixAdapter, bases<escript::AbstractSystemMatrix> >
-//       ("OperatorAdapter","A concrete class representing an operator. For more details, please see the c++ documentation.", no_init)
-//       .def("print_matrix_info",&finley::SystemMatrixAdapter::Print_Matrix_Info,(arg("full")=false),"prints information about a system matrix")
-//       .def("nullifyRowsAndCols",&finley::SystemMatrixAdapter::nullifyRowsAndCols)
-//       .def("resetValues",&finley::SystemMatrixAdapter::resetValues, "resets the matrix entries")
-//       .def("saveMM",&finley::SystemMatrixAdapter::saveMM,args("fileName"), 
-// "writes the matrix to a file using the Matrix Market file format")
-//       .def("saveHB",&finley::SystemMatrixAdapter::saveHB, args("filename"),
-// "writes the matrix to a file using the Harwell-Boeing file format");
-// 
-//   class_<finley::TransportProblemAdapter, bases<escript::AbstractTransportProblem> >
-//       ("TransportProblemAdapter","",no_init)
-//       .def("getSafeTimeStepSize",&finley::TransportProblemAdapter::getSafeTimeStepSize)
-//       .def("getUnlimitedTimeStepSize",&finley::TransportProblemAdapter::getUnlimitedTimeStepSize)
-//       .def("resetTransport",&finley::TransportProblemAdapter::resetTransport,
-// "resets the transport operator typically as they have been updated");
-}
diff --git a/finley/src/CPPAdapter/system_dep.h b/finley/src/CPPAdapter/system_dep.h
deleted file mode 100644
index 1373684..0000000
--- a/finley/src/CPPAdapter/system_dep.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/**
-\file finley/src/CPPAdapter/system_dep.h
-\ingroup Other
- */
-/*
-   @(#) system_dep.h
-*/
-
-#ifndef finley_system_dep_h
-#define finley_system_dep_h
-
-#include <cmath>
-
-#define FINLEY_DLL_API
-
-#ifdef _WIN32
-
-#   ifndef FINLEY_STATIC_LIB
-#      undef FINLEY_DLL_API
-#      ifdef FINLEY_EXPORTS
-#         define FINLEY_DLL_API __declspec(dllexport)
-#      else
-#         define FINLEY_DLL_API __declspec(dllimport)
-#      endif
-#   endif
-#endif
-
-#endif
-
diff --git a/finley/src/DomainFactory.cpp b/finley/src/DomainFactory.cpp
new file mode 100644
index 0000000..d195df9
--- /dev/null
+++ b/finley/src/DomainFactory.cpp
@@ -0,0 +1,881 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <finley/DomainFactory.h>
+
+#include <escript/index.h>
+#include <escript/SubWorld.h>
+
+#ifdef ESYS_HAVE_NETCDF
+#include <netcdfcpp.h>
+#endif
+
+#include <boost/python/extract.hpp>
+#include <boost/scoped_array.hpp>
+
+#include <sstream>
+
+using namespace std;
+using namespace escript;
+namespace bp = boost::python;
+
+namespace finley {
+
+#ifdef ESYS_HAVE_NETCDF
+// A convenience method to retrieve an integer attribute from a NetCDF file
+template<typename T>
+T ncReadAtt(NcFile* dataFile, const string& fName, const string& attrName)
+{
+    NcAtt* attr = dataFile->get_att(attrName.c_str());
+    if (!attr) {
+        stringstream msg;
+        msg << "loadMesh: Error retrieving integer attribute '" << attrName
+            << "' from NetCDF file '" << fName << "'";
+        throw IOError(msg.str());
+    }
+    T value = (sizeof(T) > 4 ? attr->as_long(0) : attr->as_int(0));
+    delete attr;
+    return value;
+}
+#endif
+
+inline void cleanupAndThrow(FinleyDomain* dom, string msg)
+{
+    delete dom;
+    string msgPrefix("loadMesh: NetCDF operation failed - ");
+    throw IOError(msgPrefix+msg);
+}
+
+Domain_ptr FinleyDomain::load(const string& fileName)
+{
+#ifdef ESYS_HAVE_NETCDF
+    JMPI mpiInfo = makeInfo(MPI_COMM_WORLD);
+    const string fName(mpiInfo->appendRankToFileName(fileName));
+
+    // Open NetCDF file for reading
+    NcAtt *attr;
+    NcVar *nc_var_temp;
+    // netCDF error handler
+    NcError err(NcError::silent_nonfatal);
+    // Create the NetCDF file.
+    NcFile dataFile(fName.c_str(), NcFile::ReadOnly);
+    if (!dataFile.is_valid()) {
+        stringstream msg;
+        msg << "loadMesh: Opening NetCDF file '" << fName << "' for reading failed.";
+        throw IOError(msg.str());
+    }
+
+    // Read NetCDF integer attributes
+
+    // index_size was only introduced with 64-bit index support so fall back
+    // to 32 bits if not found.
+    int index_size;
+    try {
+        index_size = ncReadAtt<int>(&dataFile, fName, "index_size");
+    } catch (IOError& e) {
+        index_size = 4;
+    }
+    // technically we could cast if reading 32-bit data on 64-bit escript
+    // but cost-benefit analysis clearly favours this implementation for now
+    if (sizeof(index_t) != index_size) {
+        throw IOError("loadMesh: size of index types at runtime differ from dump file");
+    }
+
+    int mpi_size = ncReadAtt<int>(&dataFile, fName, "mpi_size");
+    int mpi_rank = ncReadAtt<int>(&dataFile, fName, "mpi_rank");
+    int numDim = ncReadAtt<int>(&dataFile, fName, "numDim");
+    int order = ncReadAtt<int>(&dataFile, fName, "order");
+    int reduced_order = ncReadAtt<int>(&dataFile, fName, "reduced_order");
+    dim_t numNodes = ncReadAtt<dim_t>(&dataFile, fName, "numNodes");
+    dim_t num_Elements = ncReadAtt<dim_t>(&dataFile, fName, "num_Elements");
+    dim_t num_FaceElements = ncReadAtt<dim_t>(&dataFile, fName, "num_FaceElements");
+    dim_t num_ContactElements = ncReadAtt<dim_t>(&dataFile, fName, "num_ContactElements");
+    dim_t num_Points = ncReadAtt<dim_t>(&dataFile, fName, "num_Points");
+    int num_Elements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_Elements_numNodes");
+    int Elements_TypeId = ncReadAtt<int>(&dataFile, fName, "Elements_TypeId");
+    int num_FaceElements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_FaceElements_numNodes");
+    int FaceElements_TypeId = ncReadAtt<int>(&dataFile, fName, "FaceElements_TypeId");
+    int num_ContactElements_numNodes = ncReadAtt<int>(&dataFile, fName, "num_ContactElements_numNodes");
+    int ContactElements_TypeId = ncReadAtt<int>(&dataFile, fName, "ContactElements_TypeId");
+    int Points_TypeId = ncReadAtt<int>(&dataFile, fName, "Points_TypeId");
+    int num_Tags = ncReadAtt<int>(&dataFile, fName, "num_Tags");
+
+    // Verify size and rank
+    if (mpiInfo->size != mpi_size) {
+        stringstream msg;
+        msg << "loadMesh: The NetCDF file '" << fName
+            << "' can only be read on " << mpi_size
+            << " CPUs. Currently running: " << mpiInfo->size;
+        throw FinleyException(msg.str());
+    }
+    if (mpiInfo->rank != mpi_rank) {
+        stringstream msg;
+        msg << "loadMesh: The NetCDF file '" << fName
+            << "' should be read on CPU #" << mpi_rank
+            << " and NOT on #" << mpiInfo->rank;
+        throw FinleyException(msg.str());
+    }
+
+    // Read mesh name
+    if (! (attr=dataFile.get_att("Name")) ) {
+        stringstream msg;
+        msg << "loadMesh: Error retrieving mesh name from NetCDF file '"
+            << fName << "'";
+        throw IOError(msg.str());
+    }
+    boost::scoped_array<char> name(attr->as_string(0));
+    delete attr;
+
+    // allocate mesh
+    FinleyDomain* dom = new FinleyDomain(name.get(), numDim, mpiInfo);
+
+    // read nodes
+    NodeFile* nodes = dom->getNodes();
+    nodes->allocTable(numNodes);
+    // Nodes_Id
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_Id")) )
+        cleanupAndThrow(dom, "get_var(Nodes_Id)");
+    if (! nc_var_temp->get(&nodes->Id[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_Id)");
+    // Nodes_Tag
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_Tag")) )
+        cleanupAndThrow(dom, "get_var(Nodes_Tag)");
+    if (! nc_var_temp->get(&nodes->Tag[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_Tag)");
+    // Nodes_gDOF
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_gDOF")) )
+        cleanupAndThrow(dom, "get_var(Nodes_gDOF)");
+    if (! nc_var_temp->get(&nodes->globalDegreesOfFreedom[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_gDOF)");
+    // Nodes_gNI
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_gNI")) )
+        cleanupAndThrow(dom, "get_var(Nodes_gNI)");
+    if (! nc_var_temp->get(&nodes->globalNodesIndex[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_gNI)");
+    // Nodes_grDfI
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_grDfI")) )
+        cleanupAndThrow(dom, "get_var(Nodes_grDfI)");
+    if (! nc_var_temp->get(&nodes->globalReducedDOFIndex[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_grDfI)");
+    // Nodes_grNI
+    if (! ( nc_var_temp = dataFile.get_var("Nodes_grNI")) )
+        cleanupAndThrow(dom, "get_var(Nodes_grNI)");
+    if (! nc_var_temp->get(&nodes->globalReducedNodesIndex[0], numNodes) )
+        cleanupAndThrow(dom, "get(Nodes_grNI)");
+    // Nodes_Coordinates
+    if (!(nc_var_temp = dataFile.get_var("Nodes_Coordinates")))
+        cleanupAndThrow(dom, "get_var(Nodes_Coordinates)");
+    if (! nc_var_temp->get(&nodes->Coordinates[0], numNodes, numDim) )
+        cleanupAndThrow(dom, "get(Nodes_Coordinates)");
+
+    nodes->updateTagList();
+
+    // read elements
+    const_ReferenceElementSet_ptr refElements(new ReferenceElementSet(
+                (ElementTypeId)Elements_TypeId, order, reduced_order));
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    dom->setElements(elements);
+    elements->allocTable(num_Elements);
+    elements->minColor = 0;
+    elements->maxColor = num_Elements-1;
+    if (num_Elements > 0) {
+       // Elements_Id
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Id")) )
+           cleanupAndThrow(dom, "get_var(Elements_Id)");
+       if (! nc_var_temp->get(&elements->Id[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Id)");
+       // Elements_Tag
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Tag")) )
+           cleanupAndThrow(dom, "get_var(Elements_Tag)");
+       if (! nc_var_temp->get(&elements->Tag[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Tag)");
+       // Elements_Owner
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Owner")) )
+           cleanupAndThrow(dom, "get_var(Elements_Owner)");
+       if (! nc_var_temp->get(&elements->Owner[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Owner)");
+       // Elements_Color
+       if (! ( nc_var_temp = dataFile.get_var("Elements_Color")) )
+           cleanupAndThrow(dom, "get_var(Elements_Color)");
+       if (! nc_var_temp->get(&elements->Color[0], num_Elements) )
+           cleanupAndThrow(dom, "get(Elements_Color)");
+       // Now we need to adjust maxColor
+       index_t mc = elements->Color[0];
+       for (index_t i = 1; i < num_Elements; ++i) {
+           if (mc < elements->Color[i]) {
+               mc = elements->Color[i];
+           }
+       }
+       elements->maxColor = mc;
+       // Elements_Nodes
+       int* Elements_Nodes = new int[num_Elements*num_Elements_numNodes];
+       if (!(nc_var_temp = dataFile.get_var("Elements_Nodes"))) {
+           delete[] Elements_Nodes;
+           cleanupAndThrow(dom, "get_var(Elements_Nodes)");
+       }
+       if (! nc_var_temp->get(&Elements_Nodes[0], num_Elements, num_Elements_numNodes) ) {
+           delete[] Elements_Nodes;
+           cleanupAndThrow(dom, "get(Elements_Nodes)");
+       }
+
+       // Copy temp array into elements->Nodes
+       for (index_t i = 0; i < num_Elements; i++) {
+           for (int j = 0; j < num_Elements_numNodes; j++) {
+               elements->Nodes[INDEX2(j,i,num_Elements_numNodes)]
+                    = Elements_Nodes[INDEX2(j,i,num_Elements_numNodes)];
+           }
+       }
+       delete[] Elements_Nodes;
+    } // num_Elements > 0
+    elements->updateTagList();
+
+    // get the face elements
+    const_ReferenceElementSet_ptr refFaceElements(
+            new ReferenceElementSet((ElementTypeId)FaceElements_TypeId,
+                order, reduced_order));
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    dom->setFaceElements(faces);
+    faces->allocTable(num_FaceElements);
+    faces->minColor = 0;
+    faces->maxColor = num_FaceElements-1;
+    if (num_FaceElements > 0) {
+        // FaceElements_Id
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Id")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Id)");
+        if (! nc_var_temp->get(&faces->Id[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Id)");
+        // FaceElements_Tag
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Tag")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Tag)");
+        if (! nc_var_temp->get(&faces->Tag[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Tag)");
+        // FaceElements_Owner
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Owner")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Owner)");
+        if (! nc_var_temp->get(&faces->Owner[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Owner)");
+        // FaceElements_Color
+        if (! ( nc_var_temp = dataFile.get_var("FaceElements_Color")) )
+            cleanupAndThrow(dom, "get_var(FaceElements_Color)");
+        if (! nc_var_temp->get(&faces->Color[0], num_FaceElements) )
+            cleanupAndThrow(dom, "get(FaceElements_Color)");
+        // Now we need to adjust maxColor
+        index_t mc = faces->Color[0];
+        for (index_t i = 1; i < num_FaceElements; ++i) {
+            if (mc < faces->Color[i]) {
+                mc = faces->Color[i];
+            }
+        }
+        faces->maxColor = mc;
+        // FaceElements_Nodes
+        int* FaceElements_Nodes = new int[num_FaceElements*num_FaceElements_numNodes];
+        if (!(nc_var_temp = dataFile.get_var("FaceElements_Nodes"))) {
+            delete[] FaceElements_Nodes;
+            cleanupAndThrow(dom, "get_var(FaceElements_Nodes)");
+        }
+        if (! nc_var_temp->get(&(FaceElements_Nodes[0]), num_FaceElements, num_FaceElements_numNodes) ) {
+            delete[] FaceElements_Nodes;
+            cleanupAndThrow(dom, "get(FaceElements_Nodes)");
+        }
+        // Copy temp array into faces->Nodes
+        for (index_t i = 0; i < num_FaceElements; i++) {
+            for (int j = 0; j < num_FaceElements_numNodes; j++) {
+                faces->Nodes[INDEX2(j,i,num_FaceElements_numNodes)] = FaceElements_Nodes[INDEX2(j,i,num_FaceElements_numNodes)];
+            }
+        }
+        delete[] FaceElements_Nodes;
+    } // num_FaceElements > 0
+    faces->updateTagList();
+
+    // get the Contact elements
+    const_ReferenceElementSet_ptr refContactElements(
+         new ReferenceElementSet((ElementTypeId)ContactElements_TypeId,
+             order, reduced_order));
+    ElementFile* contacts = new ElementFile(refContactElements, mpiInfo);
+    dom->setContactElements(contacts);
+    contacts->allocTable(num_ContactElements);
+    contacts->minColor = 0;
+    contacts->maxColor = num_ContactElements-1;
+    if (num_ContactElements > 0) {
+        // ContactElements_Id
+        if (! ( nc_var_temp = dataFile.get_var("ContactElements_Id")) )
+            cleanupAndThrow(dom, "get_var(ContactElements_Id)");
+        if (! nc_var_temp->get(&contacts->Id[0], num_ContactElements) )
+            cleanupAndThrow(dom, "get(ContactElements_Id)");
+        // ContactElements_Tag
+        if (! ( nc_var_temp = dataFile.get_var("ContactElements_Tag")) )
+            cleanupAndThrow(dom, "get_var(ContactElements_Tag)");
+        if (! nc_var_temp->get(&contacts->Tag[0], num_ContactElements) )
+            cleanupAndThrow(dom, "get(ContactElements_Tag)");
+        // ContactElements_Owner
+        if (! ( nc_var_temp = dataFile.get_var("ContactElements_Owner")) )
+            cleanupAndThrow(dom, "get_var(ContactElements_Owner)");
+        if (! nc_var_temp->get(&contacts->Owner[0], num_ContactElements) )
+            cleanupAndThrow(dom, "get(ContactElements_Owner)");
+        // ContactElements_Color
+        if (! ( nc_var_temp = dataFile.get_var("ContactElements_Color")) )
+            cleanupAndThrow(dom, "get_var(ContactElements_Color)");
+        if (! nc_var_temp->get(&contacts->Color[0], num_ContactElements) )
+            cleanupAndThrow(dom, "get(ContactElements_Color)");
+        // Now we need to adjust maxColor
+        index_t mc = contacts->Color[0];
+        for (index_t i = 1; i < num_ContactElements; ++i) {
+            if (mc < contacts->Color[i]) {
+                mc = contacts->Color[i];
+            }
+        }
+        contacts->maxColor = mc;
+        // ContactElements_Nodes
+        int* ContactElements_Nodes = new int[num_ContactElements*num_ContactElements_numNodes];
+        if (!(nc_var_temp = dataFile.get_var("ContactElements_Nodes"))) {
+            delete[] ContactElements_Nodes;
+            cleanupAndThrow(dom, "get_var(ContactElements_Nodes)");
+        }
+        if (! nc_var_temp->get(&ContactElements_Nodes[0], num_ContactElements, num_ContactElements_numNodes) ) {
+            delete[] ContactElements_Nodes;
+            cleanupAndThrow(dom, "get(ContactElements_Nodes)");
+        }
+        // Copy temp array into contacts->Nodes
+        for (index_t i = 0; i < num_ContactElements; i++) {
+            for (int j = 0; j < num_ContactElements_numNodes; j++) {
+                contacts->Nodes[INDEX2(j,i,num_ContactElements_numNodes)] = ContactElements_Nodes[INDEX2(j,i,num_ContactElements_numNodes)];
+            }
+        }
+        delete[] ContactElements_Nodes;
+    } // num_ContactElements > 0
+    contacts->updateTagList();
+
+    // get the Points (nodal elements)
+    const_ReferenceElementSet_ptr refPoints(new ReferenceElementSet(
+                (ElementTypeId)Points_TypeId, order, reduced_order));
+    ElementFile* points = new ElementFile(refPoints, mpiInfo);
+    dom->setPoints(points);
+    points->allocTable(num_Points);
+    points->minColor = 0;
+    points->maxColor = num_Points-1;
+    if (num_Points > 0) {
+        // Points_Id
+        if (! ( nc_var_temp = dataFile.get_var("Points_Id")))
+            cleanupAndThrow(dom, "get_var(Points_Id)");
+        if (! nc_var_temp->get(&points->Id[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Id)");
+        // Points_Tag
+        if (! ( nc_var_temp = dataFile.get_var("Points_Tag")))
+            cleanupAndThrow(dom, "get_var(Points_Tag)");
+        if (! nc_var_temp->get(&points->Tag[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Tag)");
+        // Points_Owner
+        if (! ( nc_var_temp = dataFile.get_var("Points_Owner")))
+            cleanupAndThrow(dom, "get_var(Points_Owner)");
+        if (!nc_var_temp->get(&points->Owner[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Owner)");
+        // Points_Color
+        if (! ( nc_var_temp = dataFile.get_var("Points_Color")))
+            cleanupAndThrow(dom, "get_var(Points_Color)");
+        if (!nc_var_temp->get(&points->Color[0], num_Points))
+            cleanupAndThrow(dom, "get(Points_Color)");
+        // Now we need to adjust maxColor
+        index_t mc = points->Color[0];
+        for (index_t i = 1; i < num_Points; ++i) {
+            if (mc < points->Color[i]) {
+                mc = points->Color[i];
+            }
+        }
+        points->maxColor = mc;
+        // Points_Nodes
+        int* Points_Nodes = new int[num_Points];
+        if (!(nc_var_temp = dataFile.get_var("Points_Nodes"))) {
+            delete[] Points_Nodes;
+            cleanupAndThrow(dom, "get_var(Points_Nodes)");
+        }
+        if (! nc_var_temp->get(&Points_Nodes[0], num_Points) ) {
+            delete[] Points_Nodes;
+            cleanupAndThrow(dom, "get(Points_Nodes)");
+        }
+        // Copy temp array into points->Nodes
+        for (index_t i = 0; i < num_Points; i++) {
+            points->Id[points->Nodes[INDEX2(0,i,1)]] = Points_Nodes[i];
+        }
+        delete[] Points_Nodes;
+    } // num_Points > 0
+    points->updateTagList();
+
+    // get the tags
+    if (num_Tags > 0) {
+        // Temp storage to gather node IDs
+        int *Tags_keys = new int[num_Tags];
+        char name_temp[4096];
+        int i;
+
+        // Tags_keys
+        if (! ( nc_var_temp = dataFile.get_var("Tags_keys")) ) {
+            delete[] Tags_keys;
+            cleanupAndThrow(dom, "get_var(Tags_keys)");
+        }
+        if (! nc_var_temp->get(&Tags_keys[0], num_Tags) ) {
+            delete[] Tags_keys;
+            cleanupAndThrow(dom, "get(Tags_keys)");
+        }
+        for (i=0; i<num_Tags; i++) {
+          // Retrieve tag name
+          sprintf(name_temp, "Tags_name_%d", i);
+          if (! (attr=dataFile.get_att(name_temp)) ) {
+              delete[] Tags_keys;
+              stringstream msg;
+              msg << "get_att(" << name_temp << ")";
+              cleanupAndThrow(dom, msg.str());
+          }
+          boost::scoped_array<char> name(attr->as_string(0));
+          delete attr;
+          dom->setTagMap(name.get(), Tags_keys[i]);
+        }
+        delete[] Tags_keys;
+    }
+
+    // Nodes_DofDistribution
+    IndexVector first_DofComponent(mpi_size+1);
+    if (! (nc_var_temp = dataFile.get_var("Nodes_DofDistribution")) ) {
+        cleanupAndThrow(dom, "get_var(Nodes_DofDistribution)");
+    }
+    if (!nc_var_temp->get(&first_DofComponent[0], mpi_size+1)) {
+        cleanupAndThrow(dom, "get(Nodes_DofDistribution)");
+    }
+
+    // Nodes_NodeDistribution
+    IndexVector first_NodeComponent(mpi_size+1);
+    if (! (nc_var_temp = dataFile.get_var("Nodes_NodeDistribution")) ) {
+        cleanupAndThrow(dom, "get_var(Nodes_NodeDistribution)");
+    }
+    if (!nc_var_temp->get(&first_NodeComponent[0], mpi_size+1)) {
+        cleanupAndThrow(dom, "get(Nodes_NodeDistribution)");
+    }
+    dom->createMappings(first_DofComponent, first_NodeComponent);
+
+    return dom->getPtr();
+#else
+    throw FinleyException("loadMesh: not compiled with NetCDF. Please contact your installation manager.");
+#endif // ESYS_HAVE_NETCDF
+}
+
+Domain_ptr readMesh_driver(const bp::list& args)
+{
+    int l = len(args);
+    if (l < 7) {
+        throw ValueError("Insufficient arguments to readMesh_driver");
+    }
+    string fileName = bp::extract<string>(args[0])();
+    int integrationOrder = bp::extract<int>(args[1])();
+    int reducedIntegrationOrder = bp::extract<int>(args[2])();
+    bool optimize = bp::extract<bool>(args[3])();
+    vector<double> points;
+    vector<int> tags;
+
+    // we need to convert lists to stl vectors
+    bp::list pypoints = bp::extract<bp::list>(args[4]);
+    bp::list pytags = bp::extract<bp::list>(args[5]);
+    int numpts = bp::extract<int>(pypoints.attr("__len__")());
+    int numtags = bp::extract<int>(pytags.attr("__len__")());
+
+    bp::object pworld = args[6];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+    Domain_ptr dom(FinleyDomain::read(info, fileName, integrationOrder,
+                                      reducedIntegrationOrder, optimize));
+
+    FinleyDomain* fd = dynamic_cast<FinleyDomain*>(dom.get());
+
+    for (int i = 0; i < numpts; ++i) {
+        bp::object temp = pypoints[i];
+        int l = bp::extract<int>(temp.attr("__len__")());
+        for (int k = 0; k < l; ++k) {
+              points.push_back(bp::extract<double>(temp[k]));
+        }
+    }
+    // bricks use up to 200 but the existing tag check will find that
+    int curmax = 40;
+    const TagMap& tagmap = fd->getTagMap();
+    // first we work out what tags are already in use
+    for (TagMap::const_iterator it = tagmap.begin(); it != tagmap.end(); ++it) {
+        if (it->second > curmax) {
+            curmax = it->second+1;
+        }
+    }
+
+    tags.resize(numtags, -1);
+    for (int i = 0; i < numtags; ++i) {
+        bp::extract<int> ex_int(pytags[i]);
+        bp::extract<string> ex_str(pytags[i]);
+        if (ex_int.check()) {
+            tags[i] = ex_int();
+            if (tags[i] >= curmax) {
+                curmax = tags[i]+1;
+            }
+        } else if (ex_str.check()) {
+            string s = ex_str();
+            TagMap::const_iterator it = tagmap.find(s);
+            if (it != tagmap.end()) {
+                // we have the tag already so look it up
+                tags[i] = it->second;
+            } else {
+                fd->setTagMap(s, curmax);
+                tags[i] = curmax;
+                curmax++;
+            }
+        } else {
+            throw FinleyException("Unable to extract tag value.");
+        }
+    }
+    // now we need to add the dirac points
+    fd->addDiracPoints(points, tags);
+    return dom;
+}
+
+Domain_ptr readGmsh_driver(const bp::list& args)
+{
+    int l = len(args);
+    if (l < 7) {
+        throw ValueError("Insufficient arguments to readMesh_driver");
+    }
+    string fileName = bp::extract<string>(args[0])();
+    int numDim = bp::extract<int>(args[1])();
+    int integrationOrder = bp::extract<int>(args[2])();
+    int reducedIntegrationOrder = bp::extract<int>(args[3])();
+    bool optimize = bp::extract<bool>(args[4])();
+    bool useMacroElements = bp::extract<bool>(args[5])();
+    vector<double> points;
+    vector<int> tags;
+
+    // we need to convert lists to stl vectors
+    bp::list pypoints = bp::extract<bp::list>(args[6]);
+    bp::list pytags = bp::extract<bp::list>(args[7]);
+    int numpts = bp::extract<int>(pypoints.attr("__len__")());
+    int numtags = bp::extract<int>(pytags.attr("__len__")());
+    bp::object pworld = args[8];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+    Domain_ptr dom(FinleyDomain::readGmsh(info, fileName, numDim,
+                                     integrationOrder, reducedIntegrationOrder,
+                                     optimize, useMacroElements));
+    FinleyDomain* fd = dynamic_cast<FinleyDomain*>(dom.get());
+
+    for (int i = 0; i < numpts; ++i) {
+        bp::object temp = pypoints[i];
+        int l = bp::extract<int>(temp.attr("__len__")());
+        for (int k = 0; k < l; ++k) {
+            points.push_back(bp::extract<double>(temp[k]));
+        }
+    }
+    int curmax = 40; // bricks use up to 30
+    const TagMap& tagmap = fd->getTagMap();
+    // first we work out what tags are already in use
+    for (TagMap::const_iterator it = tagmap.begin(); it != tagmap.end(); ++it) {
+        if (it->second > curmax) {
+            curmax = it->second+1;
+        }
+    }
+
+    tags.resize(numtags, -1);
+    for (int i = 0; i < numtags; ++i) {
+        bp::extract<int> ex_int(pytags[i]);
+        bp::extract<string> ex_str(pytags[i]);
+        if (ex_int.check()) {
+            tags[i] = ex_int();
+            if (tags[i] >= curmax) {
+                curmax = tags[i]+1;
+            }
+        } else if (ex_str.check()) {
+            string s = ex_str();
+            TagMap::const_iterator it = tagmap.find(s);
+            if (it != tagmap.end()) {
+                // we have the tag already so look it up
+                tags[i] = it->second;
+            } else {
+                fd->setTagMap(s, curmax);
+                tags[i] = curmax;
+                curmax++;
+            }
+        } else {
+            throw FinleyException("Unable to extract tag value");
+        }
+    }
+    // now we need to add the dirac points
+    fd->addDiracPoints(points, tags);
+    return dom;
+}
+
+Domain_ptr brick(JMPI info, dim_t n0, dim_t n1, dim_t n2, int order,
+                 double l0, double l1, double l2,
+                 bool periodic0, bool periodic1, bool periodic2,
+                 int integrationOrder, int reducedIntegrationOrder,
+                 bool useElementsOnFace, bool useFullElementOrder,
+                 bool optimize, const std::vector<double>& points,
+                 const std::vector<int>& tags,
+                 const std::map<std::string, int>& tagNamesToNums)
+{
+    Domain_ptr dom;
+    if (order == 1) {
+        dom = FinleyDomain::createHex8(n0, n1, n2, l0, l1, l2, periodic0,
+                   periodic1, periodic2, integrationOrder,
+                   reducedIntegrationOrder, useElementsOnFace, optimize, info);
+    } else if (order == 2) {
+        dom = FinleyDomain::createHex20(n0, n1, n2, l0, l1, l2, periodic0,
+                                   periodic1, periodic2, integrationOrder,
+                                   reducedIntegrationOrder, useElementsOnFace,
+                                   useFullElementOrder, false, optimize, info);
+    } else if (order == -1) {
+        dom = FinleyDomain::createHex20(n0, n1, n2, l0, l1, l2, periodic0,
+                                   periodic1, periodic2, integrationOrder,
+                                   reducedIntegrationOrder, useElementsOnFace,
+                                   useFullElementOrder, true, optimize, info);
+    } else {
+        stringstream message;
+        message << "Illegal interpolation order " << order;
+        throw ValueError(message.str());
+    }
+
+    FinleyDomain* fd = dynamic_cast<FinleyDomain*>(dom.get());
+    fd->addDiracPoints(points, tags);
+    for (TagMap::const_iterator it = tagNamesToNums.begin(); it != tagNamesToNums.end(); ++it) {
+        fd->setTagMap(it->first, it->second);
+    }
+    fd->getPoints()->updateTagList();
+    return dom;
+}
+
+Domain_ptr brick_driver(const bp::list& args)
+{
+    // we need to convert lists to stl vectors
+    bp::list pypoints = bp::extract<bp::list>(args[15]);
+    bp::list pytags = bp::extract<bp::list>(args[16]);
+    int numpts = bp::extract<int>(pypoints.attr("__len__")());
+    int numtags = bp::extract<int>(pytags.attr("__len__")());
+    vector<double> points;
+    vector<int> tags;
+    tags.resize(numtags, -1);
+    for (int i = 0; i < numpts; ++i) {
+        bp::object temp = pypoints[i];
+        int l = bp::extract<int>(temp.attr("__len__")());
+        for (int k = 0; k < l; ++k) {
+            points.push_back(bp::extract<double>(temp[k]));
+        }
+    }
+    map<string, int> namestonums;
+    int curmax = 40; // bricks use up to 30
+    for (int i = 0; i < numtags; ++i) {
+        bp::extract<int> ex_int(pytags[i]);
+        bp::extract<string> ex_str(pytags[i]);
+        if (ex_int.check()) {
+            tags[i] = ex_int();
+            if (tags[i] >= curmax) {
+                curmax = tags[i]+1;
+            }
+        } else if (ex_str.check()) {
+            string s = ex_str();
+            TagMap::iterator it = namestonums.find(s);
+            if (it != namestonums.end()) {
+                // we have the tag already so look it up
+                tags[i] = it->second;
+            } else {
+                namestonums[s] = curmax;
+                tags[i] = curmax;
+                curmax++;
+            }
+        } else {
+            throw FinleyException("Unable to extract tag value.");
+        }
+    }
+    bp::object pworld = args[17];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+    return brick(info, static_cast<dim_t>(bp::extract<float>(args[0])),
+                 static_cast<dim_t>(bp::extract<float>(args[1])),
+                 static_cast<dim_t>(bp::extract<float>(args[2])),
+                 bp::extract<int>(args[3]), bp::extract<double>(args[4]),
+                 bp::extract<double>(args[5]), bp::extract<double>(args[6]),
+                 bp::extract<int>(args[7]), bp::extract<int>(args[8]),
+                 bp::extract<int>(args[9]), bp::extract<int>(args[10]),
+                 bp::extract<int>(args[11]), bp::extract<int>(args[12]),
+                 bp::extract<int>(args[13]), bp::extract<int>(args[14]),
+                 points, tags, namestonums);
+}
+
+Domain_ptr rectangle(JMPI info, dim_t n0, dim_t n1, int order,
+                     double l0, double l1, bool periodic0, bool periodic1,
+                     int integrationOrder, int reducedIntegrationOrder,
+                     bool useElementsOnFace, bool useFullElementOrder,
+                     bool optimize, const vector<double>& points,
+                     const vector<int>& tags,
+                     const std::map<std::string, int>& tagNamesToNums)
+{
+    Domain_ptr dom;
+    if (order == 1) {
+        dom = FinleyDomain::createRec4(n0, n1, l0, l1, periodic0, periodic1,
+                                     integrationOrder, reducedIntegrationOrder,
+                                     useElementsOnFace, optimize, info);
+    } else if (order == 2) {
+        dom = FinleyDomain::createRec8(n0, n1, l0, l1, periodic0, periodic1,
+                 integrationOrder, reducedIntegrationOrder,
+                 useElementsOnFace,useFullElementOrder, false, optimize, info);
+    } else if (order == -1) {
+        dom = FinleyDomain::createRec8(n0, n1, l0, l1, periodic0, periodic1,
+                 integrationOrder, reducedIntegrationOrder,
+                 useElementsOnFace, useFullElementOrder, true, optimize, info);
+    } else {
+        stringstream message;
+        message << "Illegal interpolation order " << order;
+        throw ValueError(message.str());
+    }
+
+    FinleyDomain* fd = dynamic_cast<FinleyDomain*>(dom.get());
+    fd->addDiracPoints(points, tags);
+    for (TagMap::const_iterator it = tagNamesToNums.begin(); it != tagNamesToNums.end(); ++it)
+    {
+        fd->setTagMap(it->first, it->second);
+    }
+    fd->getPoints()->updateTagList();
+    return dom;
+}
+
+Domain_ptr rectangle_driver(const bp::list& args)
+{
+    // we need to convert lists to stl vectors
+    bp::list pypoints = bp::extract<bp::list>(args[12]);
+    bp::list pytags = bp::extract<bp::list>(args[13]);
+    int numpts = bp::extract<int>(pypoints.attr("__len__")());
+    int numtags = bp::extract<int>(pytags.attr("__len__")());
+    vector<double> points;
+    vector<int> tags;
+    tags.resize(numtags, -1);
+    for (int i = 0; i < numpts; ++i) {
+        bp::object temp = pypoints[i];
+        int l = bp::extract<int>(temp.attr("__len__")());
+        for (int k = 0; k < l; ++k) {
+            points.push_back(bp::extract<double>(temp[k]));
+        }
+    }
+    TagMap tagstonames;
+    int curmax = 40;
+    // but which order to assign tags to names?????
+    for (int i = 0; i < numtags; ++i) {
+        bp::extract<int> ex_int(pytags[i]);
+        bp::extract<string> ex_str(pytags[i]);
+        if (ex_int.check()) {
+            tags[i] = ex_int();
+            if (tags[i] >= curmax) {
+                curmax = tags[i]+1;
+            }
+        } else if (ex_str.check()) {
+            string s = ex_str();
+            TagMap::iterator it = tagstonames.find(s);
+            if (it != tagstonames.end()) {
+                // we have the tag already so look it up
+                tags[i] = it->second;
+            } else {
+                tagstonames[s] = curmax;
+                tags[i] = curmax;
+                curmax++;
+            }
+        } else {
+            throw FinleyException("Unable to extract tag value.");
+        }
+    }
+    bp::object pworld = args[14];
+    JMPI info;
+    if (!pworld.is_none()) {
+        bp::extract<SubWorld_ptr> ex(pworld);
+        if (!ex.check()) {
+            throw ValueError("Invalid escriptWorld parameter.");
+        }
+        info = ex()->getMPI();
+    } else {
+        info = makeInfo(MPI_COMM_WORLD);
+    }
+
+    return rectangle(info, static_cast<dim_t>(bp::extract<float>(args[0])),
+                     static_cast<dim_t>(bp::extract<float>(args[1])),
+                     bp::extract<int>(args[2]), bp::extract<double>(args[3]),
+                     bp::extract<double>(args[4]), bp::extract<int>(args[5]),
+                     bp::extract<int>(args[6]), bp::extract<int>(args[7]),
+                     bp::extract<int>(args[8]), bp::extract<int>(args[9]),
+                     bp::extract<int>(args[10]), bp::extract<int>(args[11]),
+                     points, tags, tagstonames);
+}
+
+Domain_ptr meshMerge(const bp::list& meshList)
+{
+    // extract the meshes from meshList
+    int num = bp::extract<int>(meshList.attr("__len__")());
+    vector<const FinleyDomain*> meshes(num);
+    for (int i = 0; i < num; ++i) {
+        AbstractContinuousDomain& meshListMember = bp::extract<AbstractContinuousDomain&>(meshList[i]);
+        meshes[i] = dynamic_cast<const FinleyDomain*>(&meshListMember);
+    }
+
+    // merge the meshes
+    FinleyDomain* dom = FinleyDomain::merge(meshes);
+
+    return dom->getPtr();
+}
+
+Domain_ptr glueFaces(const bp::list& meshList, double safetyFactor,
+                     double tolerance, bool optimize)
+{
+    // merge the meshes
+    Domain_ptr merged_meshes = meshMerge(meshList);
+
+    // glue the faces
+    FinleyDomain* merged = dynamic_cast<FinleyDomain*>(merged_meshes.get());
+    merged->glueFaces(safetyFactor, tolerance, optimize);
+    return merged_meshes;
+}
+
+Domain_ptr joinFaces(const bp::list& meshList, double safetyFactor,
+                     double tolerance, bool optimize)
+{
+    // merge the meshes
+    Domain_ptr merged_meshes = meshMerge(meshList);
+
+    // join the faces
+    FinleyDomain* merged = dynamic_cast<FinleyDomain*>(merged_meshes.get());
+    merged->joinFaces(safetyFactor, tolerance, optimize);
+    return merged_meshes;
+}
+
+} // namespace finley
diff --git a/finley/src/DomainFactory.h b/finley/src/DomainFactory.h
new file mode 100644
index 0000000..9625ce2
--- /dev/null
+++ b/finley/src/DomainFactory.h
@@ -0,0 +1,162 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __FINLEY_DOMAINFACTORY_H__
+#define __FINLEY_DOMAINFACTORY_H__
+
+#include <finley/FinleyDomain.h>
+
+#include <boost/python/list.hpp>
+
+#include <sstream>
+
+/**
+    \brief
+    A suite of factory methods for creating various finley domains.
+*/
+
+namespace finley {
+
+/**
+    \brief Python driver for readMesh()
+    \param args see readMesh() definition for order of parameters
+*/
+escript::Domain_ptr readMesh_driver(const boost::python::list& args);
+   
+/**
+    \brief Python driver for readGMesh()
+    \param args see readGMesh() definition for order of parameters
+*/
+escript::Domain_ptr readGmsh_driver(const boost::python::list& args);
+
+/**
+    \brief
+    Creates a rectangular mesh with n0 x n1 x n2 elements over the brick 
+    [0,l0] x [0,l1] x [0,l2].
+
+    \param jmpi pointer to MPI world information structure
+    \param n0,n1,n2 number of elements in each dimension
+    \param order order of shape functions (1, 2, or -1 for macro
+                 elements of order 1)
+    \param l0,l1,l2 length of each side of brick
+    \param periodic0,periodic1,periodic2 whether or not boundary 
+           conditions of the dimension are periodic
+    \param integrationOrder order of the quadrature scheme.
+                            If <0 the order is selected automatically.
+    \param reducedIntegrationOrder order of the reduced quadrature scheme.
+                                   If <0 the order is selected automatically.
+    \param useElementsOnFace whether or not to use elements on face
+    \param useFullElementOrder whether to use second order elements
+    \param optimize whether to apply optimization of node labels
+    \param points dirac points to add
+    \param tags
+    \param tagNamesToNums
+*/
+escript::Domain_ptr brick(escript::JMPI jmpi,
+                    dim_t n0=1, dim_t n1=1, dim_t n2=1, int order=1,
+                    double l0=1.0, double l1=1.0, double l2=1.0,
+                    bool periodic0=false, bool periodic1=false, bool periodic2=false,
+                    int integrationOrder=-1, int reducedIntegrationOrder=-1,
+                    bool useElementsOnFace=false,
+                    bool useFullElementOrder=false, bool optimize=false,
+                    const std::vector<double>& points=std::vector<double>(),
+                    const std::vector<int>& tags=std::vector<int>(),
+                    const std::map<std::string, int>& tagNamesToNums=std::map<std::string, int>());
+
+/**
+    \brief Python driver for brick()
+    \param args see brick() definition for order of params
+*/
+escript::Domain_ptr brick_driver(const boost::python::list& args);
+
+/**
+    \brief
+    Creates a 2-dimensional rectangular mesh with n0 x n1 elements over the
+    rectangle [0,l0] x [0,l1].
+
+    \param jmpi pointer to MPI world information structure
+    \param n0,n1 number of elements in each dimension
+    \param order order of shape functions (1, 2, or -1 for macro
+                 elements of order 1)
+    \param l0,l1 length of each side of rectangle
+    \param periodic0,periodic1 whether or not the boundary conditions of the
+                               dimension are periodic
+    \param integrationOrder order of the quadrature scheme.
+                            If <0 the order is selected automatically.
+    \param reducedIntegrationOrder order of the reduced quadrature scheme.
+                                   If <0 the order is selected automatically.
+    \param useElementsOnFace whether or not to use elements on face
+    \param useFullElementOrder
+    \param optimize whether to optimize labelling
+    \param points
+    \param tags
+    \param tagNamesToNums
+*/
+escript::Domain_ptr rectangle(escript::JMPI jmpi,
+                              dim_t n0 = 1, dim_t n1 = 1, int order = 1,
+                              double l0 = 1.0, double l1 = 1.0,
+                              bool periodic0 = false, bool periodic1 = false,
+                              int integrationOrder = -1,
+                              int reducedIntegrationOrder = -1,
+                              bool useElementsOnFace = false,
+                              bool useFullElementOrder = false,
+                              bool optimize = false,
+                              const std::vector<double>& points = std::vector<double>(),
+                              const std::vector<int>& tags = std::vector<int>(),
+                              const std::map<std::string, int>& tagNamesToNums = std::map<std::string, int>());
+
+/**
+    \brief Python driver for rectangle()
+    \param args see rectangle() definition for order of params
+*/
+escript::Domain_ptr rectangle_driver(const boost::python::list& args);
+
+/**
+    \brief
+    Merges a list of meshes into one list.
+    \param meshList Input - The list of meshes.
+*/
+escript::Domain_ptr meshMerge(const boost::python::list& meshList);
+
+/**
+    \brief
+    Detects matching faces in the mesh, removes them from the mesh 
+    and joins the elements touched by the face elements.
+    \param meshList The list of meshes.
+    \param safetyFactor
+    \param tolerance
+    \param optimize switches on the optimization of node labels 
+*/
+escript::Domain_ptr glueFaces(const boost::python::list& meshList,
+                              double safetyFactor = 0.2, double tolerance = 1.e-8,
+                              bool optimize = false);
+
+/**
+    \brief
+    Detects matching faces in the mesh and replaces them by joint elements.
+    \param meshList The list of meshes
+    \param safetyFactor
+    \param tolerance
+    \param optimize switches on the optimization of node labels 
+*/
+escript::Domain_ptr joinFaces(const boost::python::list& meshList,
+                              double safetyFactor = 0.2, double tolerance = 1.e-8,
+                              bool optimize = false);
+ 
+} // end of namespace
+
+#endif // __FINLEY_DOMAINFACTORY_H__
+
diff --git a/finley/src/ElementFile.cpp b/finley/src/ElementFile.cpp
index 7897673..58240dd 100644
--- a/finley/src/ElementFile.cpp
+++ b/finley/src/ElementFile.cpp
@@ -14,18 +14,10 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Finley: ElementFile
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "ElementFile.h"
+
 #include <escript/Data.h>
+#include <escript/index.h>
 
 #include <algorithm> // std::swap
 
@@ -34,7 +26,8 @@ namespace finley {
 /// constructor
 /// use ElementFile::allocTable to allocate the element table
 ElementFile::ElementFile(const_ReferenceElementSet_ptr refSet,
-                         esysUtils::JMPI& mpiInfo) :
+                         escript::JMPI mpiInfo) :
+    MPIInfo(mpiInfo),
     referenceElementSet(refSet),
     numElements(0),
     Id(NULL),
@@ -45,18 +38,16 @@ ElementFile::ElementFile(const_ReferenceElementSet_ptr refSet,
     minColor(0),
     maxColor(-1)
 {
-    MPIInfo=mpiInfo;
- 
-    jacobians=new ElementFile_Jacobians(
+    jacobians = new ElementFile_Jacobians(
             referenceElementSet->referenceElement->BasisFunctions);
-    jacobians_reducedQ=new ElementFile_Jacobians(
+    jacobians_reducedQ = new ElementFile_Jacobians(
             referenceElementSet->referenceElementReducedQuadrature->BasisFunctions);
-    jacobians_reducedS=new ElementFile_Jacobians(
+    jacobians_reducedS = new ElementFile_Jacobians(
             referenceElementSet->referenceElement->LinearBasisFunctions);
-    jacobians_reducedS_reducedQ=new ElementFile_Jacobians(
+    jacobians_reducedS_reducedQ = new ElementFile_Jacobians(
             referenceElementSet->referenceElementReducedQuadrature->LinearBasisFunctions);
 
-    numNodes=referenceElementSet->getNumNodes();
+    numNodes = referenceElementSet->getNumNodes();
 }
 
 /// destructor
@@ -72,26 +63,26 @@ ElementFile::~ElementFile()
 /// allocates the element table within this element file to hold NE elements.
 void ElementFile::allocTable(dim_t NE) 
 {
-    if (numElements>0)
+    if (numElements > 0)
         freeTable();
 
-    numElements=NE;
-    Owner=new int[numElements];
-    Id=new index_t[numElements];
-    Nodes=new index_t[numElements*numNodes];
-    Tag=new int[numElements];
-    Color=new int[numElements];
-  
+    numElements = NE;
+    Owner = new int[numElements];
+    Id = new index_t[numElements];
+    Nodes = new index_t[numElements * numNodes];
+    Tag = new int[numElements];
+    Color = new index_t[numElements];
+
     // this initialization makes sure that data are located on the right
     // processor
 #pragma omp parallel for
-    for (index_t e=0; e<numElements; e++) {
-        for (int i=0; i<numNodes; i++)
-            Nodes[INDEX2(i,e,numNodes)]=-1;
-        Owner[e]=-1;
-        Id[e]=-1;
-        Tag[e]=-1;
-        Color[e]=-1;
+    for (index_t e = 0; e < numElements; e++) {
+        for (int i = 0; i < numNodes; i++)
+            Nodes[INDEX2(i, e, numNodes)] = -1;
+        Owner[e] = -1;
+        Id[e] = -1;
+        Tag[e] = -1;
+        Color[e] = -1;
     }
 }
 
@@ -104,9 +95,9 @@ void ElementFile::freeTable()
     delete[] Tag;
     delete[] Color;
     tagsInUse.clear();
-    numElements=0;
-    maxColor=-1;
-    minColor=0;
+    numElements = 0;
+    maxColor = -1;
+    minColor = 0;
 }
 
 /// copies element file 'in' into this element file starting from 'offset'.
@@ -114,57 +105,56 @@ void ElementFile::freeTable()
 void ElementFile::copyTable(index_t offset, index_t nodeOffset,
                             index_t idOffset, const ElementFile* in)
 {
-    const int NN_in=in->numNodes;
+    const int NN_in = in->numNodes;
     if (NN_in > numNodes) {
-        setError(TYPE_ERROR, "ElementFile::copyTable: dimensions of element files don't match.");
-        return;
+        throw escript::ValueError("ElementFile::copyTable: dimensions of element files don't match.");
     }
 
 #pragma omp parallel for
-    for (index_t n=0; n<in->numElements; n++) {
-          Owner[offset+n]=in->Owner[n];
-          Id[offset+n]=in->Id[n]+idOffset;
-          Tag[offset+n]=in->Tag[n];
-          for (int i=0; i<numNodes; i++)
-              Nodes[INDEX2(i,offset+n,numNodes)] =
-                            in->Nodes[INDEX2(i,n,NN_in)]+nodeOffset;
+    for (index_t n = 0; n < in->numElements; n++) {
+        Owner[offset + n] = in->Owner[n];
+        Id[offset + n] = in->Id[n] + idOffset;
+        Tag[offset + n] = in->Tag[n];
+        for (int i = 0; i < numNodes; i++)
+            Nodes[INDEX2(i, offset + n, numNodes)] =
+                            in->Nodes[INDEX2(i, n, NN_in)] + nodeOffset;
     }
 }
 
-void ElementFile::gather(index_t* index, const ElementFile* in)
+void ElementFile::gather(const index_t* index, const ElementFile* in)
 {
-    const int NN_in=in->numNodes;
+    const int NN_in = in->numNodes;
 #pragma omp parallel for
-    for (index_t e=0; e<numElements; e++) {
-        const index_t k=index[e];
-        Id[e]=in->Id[k];
-        Tag[e]=in->Tag[k];
-        Owner[e]=in->Owner[k];
-        Color[e]=in->Color[k]+maxColor+1;
-        for (int j=0; j<std::min(numNodes,NN_in); j++)
-            Nodes[INDEX2(j,e,numNodes)]=in->Nodes[INDEX2(j,k,NN_in)];
+    for (index_t e = 0; e < numElements; e++) {
+        const index_t k = index[e];
+        Id[e] = in->Id[k];
+        Tag[e] = in->Tag[k];
+        Owner[e] = in->Owner[k];
+        Color[e] = in->Color[k] + maxColor + 1;
+        for (int j = 0; j < std::min(numNodes, NN_in); j++)
+            Nodes[INDEX2(j, e, numNodes)] = in->Nodes[INDEX2(j, k, NN_in)];
     }
-    minColor=std::min(minColor, in->minColor+maxColor+1);
-    maxColor=std::max(maxColor, in->maxColor+maxColor+1);
+    minColor = std::min(minColor, in->minColor+maxColor+1);
+    maxColor = std::max(maxColor, in->maxColor+maxColor+1);
 }
 
 /// scatters the ElementFile in into this ElementFile.
 /// A conservative assumption on the coloring is made.
 void ElementFile::scatter(index_t* index, const ElementFile* in)
 {
-    const int NN_in=in->numNodes;
+    const int NN_in = in->numNodes;
 #pragma omp parallel for
-    for (index_t e=0; e<in->numElements; e++) {
-        const index_t k=index[e];
-        Owner[k]=in->Owner[e];
-        Id[k]=in->Id[e];
-        Tag[k]=in->Tag[e];
-        Color[k]=in->Color[e]+maxColor+1;
-        for (int j=0; j<std::min(numNodes,NN_in); j++)
-            Nodes[INDEX2(j,k,numNodes)]=in->Nodes[INDEX2(j,e,NN_in)];
+    for (index_t e = 0; e < in->numElements; e++) {
+        const index_t k = index[e];
+        Owner[k] = in->Owner[e];
+        Id[k] = in->Id[e];
+        Tag[k] = in->Tag[e];
+        Color[k] = in->Color[e]+maxColor+1;
+        for (int j = 0; j < std::min(numNodes,NN_in); j++)
+            Nodes[INDEX2(j,k,numNodes)] = in->Nodes[INDEX2(j,e,NN_in)];
     }
-    minColor=std::min(minColor, in->minColor+maxColor+1);
-    maxColor=std::max(maxColor, in->maxColor+maxColor+1);
+    minColor = std::min(minColor, in->minColor+maxColor+1);
+    maxColor = std::max(maxColor, in->maxColor+maxColor+1);
 }
 
 void ElementFile::swapTable(ElementFile* other)
@@ -182,29 +172,29 @@ void ElementFile::swapTable(ElementFile* other)
 
 void ElementFile::optimizeOrdering()
 {
-    if (numElements<1)
+    if (numElements < 1)
         return;
 
-    const int NN=referenceElementSet->getNumNodes();
+    const int NN = referenceElementSet->getNumNodes();
     util::ValueAndIndexList item_list(numElements);
-    index_t *index=new index_t[numElements];
-    ElementFile* out=new ElementFile(referenceElementSet, MPIInfo);
+    index_t* index = new index_t[numElements];
+    ElementFile* out = new ElementFile(referenceElementSet, MPIInfo);
     out->allocTable(numElements);
-    if (noError()) {
 #pragma omp parallel for
-        for (index_t e=0; e<numElements; e++) {
-            std::pair<index_t,index_t> entry(Nodes[INDEX2(0,e,NN)], e);
-            for (int i=1; i<NN; i++)
-                entry.first=std::min(entry.first, Nodes[INDEX2(i,e,NN)]);
-            item_list[e] = entry;
-        }
-        util::sortValueAndIndex(item_list);
-#pragma omp parallel for
-        for (index_t e=0; e<numElements; e++)
-            index[e]=item_list[e].second;
-        out->gather(index, this);
-        swapTable(out);
+    for (index_t e = 0; e < numElements; e++) {
+        std::pair<index_t,index_t> entry(Nodes[INDEX2(0, e, NN)], e);
+        for (int i = 1; i < NN; i++)
+            entry.first = std::min(entry.first, Nodes[INDEX2(i, e, NN)]);
+        item_list[e] = entry;
     }
+    util::sortValueAndIndex(item_list);
+
+#pragma omp parallel for
+    for (index_t e = 0; e < numElements; e++)
+        index[e] = item_list[e].second;
+
+    out->gather(index, this);
+    swapTable(out);
     delete out;
     delete[] index;
 }
@@ -224,42 +214,38 @@ void ElementFile::relabelNodes(const std::vector<index_t>& newNode, index_t offs
 
 void ElementFile::setTags(int newTag, const escript::Data& mask)
 {
-    resetError();
-
-    const int numQuad=referenceElementSet->borrowReferenceElement(
+    const int numQuad = referenceElementSet->borrowReferenceElement(
             util::hasReducedIntegrationOrder(mask))
             ->Parametrization->numQuadNodes; 
     if (1 != mask.getDataPointSize()) {
-        setError(TYPE_ERROR, "ElementFile::setTags: number of components of mask must be 1.");
-        return;
+        throw escript::ValueError("ElementFile::setTags: number of components of mask must be 1.");
     } else if (mask.getNumDataPointsPerSample() != numQuad ||
             mask.getNumSamples() != numElements) {
-        setError(TYPE_ERROR, "ElementFile::setTags: illegal number of samples of mask Data object");
-        return;
+        throw escript::ValueError("ElementFile::setTags: illegal number of samples of mask Data object");
     }
 
     if (mask.actsExpanded()) {
 #pragma omp parallel for
-        for (index_t n=0; n<numElements; n++) {
+        for (index_t n = 0; n < numElements; n++) {
             if (mask.getSampleDataRO(n)[0] > 0)
-                Tag[n]=newTag;
+                Tag[n] = newTag;
         }
     } else {
 #pragma omp parallel for
-        for (index_t n=0; n<numElements; n++) {
-            const double *mask_array=mask.getSampleDataRO(n);
-            bool check=false;
-            for (int q=0; q<numQuad; q++)
-                check = (check || mask_array[q]);
+        for (index_t n = 0; n < numElements; n++) {
+            const double* mask_array = mask.getSampleDataRO(n);
+            bool check = false;
+            for (int q = 0; q < numQuad; q++)
+                check = check || mask_array[q];
             if (check)
-                Tag[n]=newTag;
+                Tag[n] = newTag;
         }
     }
     updateTagList();
 }
 
 /// Tries to reduce the number of colours used to colour the elements
-void ElementFile::createColoring(const std::vector<index_t>& dofMap)
+void ElementFile::createColoring(const IndexVector& dofMap)
 {
     if (numElements < 1)
         return;
@@ -267,16 +253,16 @@ void ElementFile::createColoring(const std::vector<index_t>& dofMap)
     const int NN = numNodes;
     const std::pair<index_t,index_t> idRange(util::getMinMaxInt(
                                             1, dofMap.size(), &dofMap[0]));
-    const index_t len=idRange.second-idRange.first+1;
+    const index_t len = idRange.second-idRange.first+1;
 
     // reset color vector
 #pragma omp parallel for
-    for (index_t e=0; e<numElements; e++)
-        Color[e]=-1;
+    for (index_t e = 0; e < numElements; e++)
+        Color[e] = -1;
 
-    index_t numUncoloredElements=numElements;
-    minColor=0;
-    maxColor=-1;
+    index_t numUncoloredElements = numElements;
+    minColor = 0;
+    maxColor = -1;
     while (numUncoloredElements>0) {
         // initialize the mask marking nodes used by a color
         std::vector<index_t> maskDOF(len, -1);
@@ -290,17 +276,10 @@ void ElementFile::createColoring(const std::vector<index_t>& dofMap)
                 bool independent = true; 
                 for (int i=0; i<NN; i++) {
 #ifdef BOUNDS_CHECK
-if (Nodes[INDEX2(i,e,NN)] < 0 || Nodes[INDEX2(i,e,NN)] >= dofMap.size()) {
-    printf("BOUNDS_CHECK %s %d i=%d e=%d NN=%d min_id=%d Nodes[INDEX2...]=%d\n",
-            __FILE__, __LINE__, i, e, NN, idRange.first, Nodes[INDEX2(i,e,NN)]);
-    exit(1);
-}
-if ((dofMap[Nodes[INDEX2(i,e,NN)]]-idRange.first) >= len ||
-        (dofMap[Nodes[INDEX2(i,e,NN)]]-idRange.first) < 0) {
-    printf("BOUNDS_CHECK %s %d i=%d e=%d NN=%d min_id=%d dof=%d\n",
-            __FILE__, __LINE__, i, e, NN, idRange.first, dofMap[Nodes[INDEX2(i,e,NN)]]-idRange.first);
-    exit(1);
-}
+                    ESYS_ASSERT(Nodes[INDEX2(i, e, NN)] >= 0, "BOUNDS_CHECK");
+                    ESYS_ASSERT(Nodes[INDEX2(i, e, NN)] < dofMap.size(), "BOUNDS_CHECK");
+                    ESYS_ASSERT(dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first < len, "BOUNDS_CHECK");
+                    ESYS_ASSERT(dofMap[Nodes[INDEX2(i, e, NN)]] - idRange.first >= 0, "BOUNDS_CHECK");
 #endif
                     if (maskDOF[dofMap[Nodes[INDEX2(i,e,NN)]]-idRange.first]>0) {
                         independent=false;
@@ -346,49 +325,6 @@ void ElementFile::markNodes(std::vector<short>& mask, int offset, bool useLinear
     }
 }
 
-void ElementFile::markDOFsConnectedToRange(int* mask, int offset, int marker,
-        index_t firstDOF, index_t lastDOF, const index_t *dofIndex, bool useLinear) 
-{
-    const_ReferenceElement_ptr refElement(referenceElementSet->
-                                            borrowReferenceElement(false));
-    if (useLinear) {
-        const int NN=refElement->numLinearNodes;
-        const int *lin_nodes=refElement->Type->linearNodes;
-        for (int color=minColor; color<=maxColor; color++) {
-#pragma omp parallel for
-            for (index_t e=0; e<numElements; e++) {
-                if (Color[e]==color) {
-                    for (int i=0; i<NN; i++) {
-                        const index_t k=dofIndex[Nodes[INDEX2(lin_nodes[i],e,numNodes)]];
-                        if (firstDOF<=k && k<lastDOF) {
-                            for (int j=0; j<NN; j++)
-                                mask[dofIndex[Nodes[INDEX2(lin_nodes[j],e,numNodes)]]-offset]=marker;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    } else {
-        const int NN=refElement->Type->numNodes;
-        for (int color=minColor; color<=maxColor; color++) {
-#pragma omp parallel for
-            for (index_t e=0; e<numElements; e++) {
-                if (Color[e]==color) {
-                    for (int i=0; i<NN; i++) {
-                        const index_t k=dofIndex[Nodes[INDEX2(i,e,numNodes)]];
-                        if (firstDOF<=k && k<lastDOF) {
-                            for (int j=0; j<NN; j++)
-                                mask[dofIndex[Nodes[INDEX2(j,e,numNodes)]]-offset]=marker;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
 /// redistributes the elements including overlap by rank
 void ElementFile::distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, index_t* index)
 {
@@ -467,7 +403,7 @@ void ElementFile::distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, in
         // copied once only for each processor
         for (index_t e=0; e<numElements; e++) {
             if (Owner[e] == myRank) {
-                proc_mask.assign(size, TRUE);
+                proc_mask.assign(size, true);
                 for (int j=0; j<numNodes; j++) {
                     const int p=mpiRankOfDOF[Nodes[INDEX2(j,e,numNodes)]];
                     if (proc_mask[p]) {
@@ -479,7 +415,7 @@ void ElementFile::distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, in
                             Nodes_buffer[INDEX2(i,k,numNodes)]=
                                     index[Nodes[INDEX2(i,e,numNodes)]];
                         send_count[p]++;
-                        proc_mask[p]=FALSE;
+                        proc_mask[p]=false;
                     }
                 }
             }
@@ -491,20 +427,20 @@ void ElementFile::distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, in
         for (int p=0; p<size; ++p) {
             if (recv_count[p] > 0) {
                 MPI_Irecv(&Id[recv_offset[p]], recv_count[p], MPI_DIM_T, p,
-                        MPIInfo->msg_tag_counter+myRank, MPIInfo->comm,
+                        MPIInfo->counter()+myRank, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Irecv(&Tag[recv_offset[p]], recv_count[p], MPI_INT, p,
-                        MPIInfo->msg_tag_counter+size+myRank, MPIInfo->comm,
+                        MPIInfo->counter()+size+myRank, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Irecv(&Owner[recv_offset[p]], recv_count[p], MPI_INT, p,
-                        MPIInfo->msg_tag_counter+2*size+myRank, MPIInfo->comm,
+                        MPIInfo->counter()+2*size+myRank, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Irecv(&Nodes[recv_offset[p]*numNodes],
                         recv_count[p]*numNodes, MPI_DIM_T, p,
-                        MPIInfo->msg_tag_counter+3*size+myRank, MPIInfo->comm,
+                        MPIInfo->counter()+3*size+myRank, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
             }
@@ -513,25 +449,25 @@ void ElementFile::distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, in
         for (int p=0; p<size; ++p) {
             if (send_count[p] > 0) {
                 MPI_Issend(&Id_buffer[send_offset[p]], send_count[p], MPI_DIM_T,
-                        p, MPIInfo->msg_tag_counter+p, MPIInfo->comm,
+                        p, MPIInfo->counter()+p, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Issend(&Tag_buffer[send_offset[p]], send_count[p], MPI_INT,
-                        p, MPIInfo->msg_tag_counter+size+p, MPIInfo->comm,
+                        p, MPIInfo->counter()+size+p, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Issend(&Owner_buffer[send_offset[p]], send_count[p],
-                        MPI_INT, p, MPIInfo->msg_tag_counter+2*size+p,
+                        MPI_INT, p, MPIInfo->counter()+2*size+p,
                         MPIInfo->comm, &mpi_requests[numRequests]);
                 numRequests++;
                 MPI_Issend(&Nodes_buffer[send_offset[p]*numNodes],
                         send_count[p]*numNodes, MPI_DIM_T, p,
-                        MPIInfo->msg_tag_counter+3*size+p, MPIInfo->comm,
+                        MPIInfo->counter()+3*size+p, MPIInfo->comm,
                         &mpi_requests[numRequests]);
                 numRequests++;
             }
         }
-        ESYS_MPI_INC_COUNTER(*MPIInfo, 4*size);
+        MPIInfo->incCounter(4*size);
         // wait for the requests to be finalized
         MPI_Waitall(numRequests, &mpi_requests[0], &mpi_stati[0]);
 #endif
diff --git a/finley/src/ElementFile.h b/finley/src/ElementFile.h
index f0db791..acde1d5 100644
--- a/finley/src/ElementFile.h
+++ b/finley/src/ElementFile.h
@@ -24,7 +24,8 @@
 
 namespace finley {
 
-struct ElementFile_Jacobians {
+struct ElementFile_Jacobians
+{
     ElementFile_Jacobians(const_ShapeFunction_ptr basis);
     ~ElementFile_Jacobians();
 
@@ -61,30 +62,45 @@ class ElementFile
 {
 public:
     ElementFile(const_ReferenceElementSet_ptr refElementSet,
-                esysUtils::JMPI& mpiInfo);
+                escript::JMPI mpiInfo);
     ~ElementFile();
 
-    void allocTable(dim_t numElements);
+    /// allocates the element table within an element file to hold NE elements
+    void allocTable(dim_t NE);
+
+    /// deallocates the element table within an element file
     void freeTable();
 
-    void distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF, index_t *Id);
-    void createColoring(const std::vector<index_t>& dofMap);
+    /// copies element file `in` into this element file starting from `offset`.
+    /// The elements `offset` to in->numElements+offset-1 will be overwritten.
+    void copyTable(index_t offset, index_t nodeOffset, index_t idOffset,
+                   const ElementFile* in);
+
+    /// redistributes the elements including overlap by rank
+    void distributeByRankOfDOF(const std::vector<int>& mpiRankOfDOF,
+                               index_t* nodesId);
+
+    /// Tries to reduce the number of colors used to color elements in this
+    /// ElementFile
+    void createColoring(const IndexVector& dofMap);
+
     /// reorders the elements so that they are stored close to the nodes
     void optimizeOrdering();
-    /// assigns new node reference numbers to the elements
-    void relabelNodes(const std::vector<index_t>& newNode, index_t offset);
+
+    /// assigns new node reference numbers to the elements.
+    /// If k is the old node, the new node is newNode[k-offset].
+    void relabelNodes(const IndexVector& newNode, index_t offset);
+
     void markNodes(std::vector<short>& mask, int offset, bool useLinear);
-    void scatter(index_t* index, const ElementFile* in);
-    void gather(index_t* index, const ElementFile* in);
-    void copyTable(index_t offset, index_t nodeOffset, index_t idOffset,
-                   const ElementFile* in);
 
-    void markDOFsConnectedToRange(int* mask, int offset, int marker,
-                                  index_t firstDOF, index_t lastDOF,
-                                  const index_t *dofIndex, bool useLinear);
+    void gather(const index_t* index, const ElementFile* in);
+
+    void scatter(index_t* index, const ElementFile* in);
 
     void setTags(const int newTag, const escript::Data& mask);
+
     ElementFile_Jacobians* borrowJacobians(const NodeFile*, bool, bool) const;
+
     /// returns the minimum and maximum reference number of nodes describing
     /// the elements
     inline std::pair<index_t,index_t> getNodeRange() const;
@@ -97,7 +113,7 @@ private:
     void swapTable(ElementFile* other);
 
 public:
-    esysUtils::JMPI MPIInfo;
+    escript::JMPI MPIInfo;
 
     /// the reference element to be used
     const_ReferenceElementSet_ptr referenceElementSet;
@@ -106,37 +122,49 @@ public:
     /// Id[i] is the id number of node i. This number is used when elements
     /// are resorted. In the entire code the term 'element id' refers to i and
     /// not to Id[i] unless explicitly stated otherwise.
-    index_t *Id;
+    index_t* Id;
+
     /// Tag[i] is the tag of element i
-    int *Tag;
+    int* Tag;
+
     /// Owner[i] contains the rank that owns element i
-    int *Owner;
+    int* Owner;
+
     /// array of tags which are actually used
     std::vector<int> tagsInUse;
+
     /// number of nodes per element
     int numNodes;
+
     /// Nodes[INDEX(k, i, numNodes)] is the k-th node in the i-th element.
     /// Note that in the way the nodes are ordered Nodes[INDEX(k, i, numNodes)
     /// is the k-th node of element i when referring to the linear version of
     /// the mesh.
-    index_t *Nodes;
+    index_t* Nodes;
+
     /// assigns each element a color. Elements with the same color don't share
     /// a node so they can be processed simultaneously.
     /// At any time Color must provide a valid value. In any case one can set
     /// Color[e]=e for all e
-    int *Color;
-    /// minimum color
-    int minColor;
-    /// maximum color
-    int maxColor;
+    index_t* Color;
+
+    /// minimum color value
+    index_t minColor;
+
+    /// maximum color value
+    index_t maxColor;
+
     /// jacobians of the shape function used for solution approximation
     ElementFile_Jacobians* jacobians;
+
     /// jacobians of the shape function used for solution approximation for
     /// reduced order of shape function
     ElementFile_Jacobians* jacobians_reducedS;
+
     /// jacobians of the shape function used for solution approximation for
     /// reduced integration order
     ElementFile_Jacobians* jacobians_reducedQ;
+
     /// jacobians of the shape function used for solution approximation for
     /// reduced integration order and reduced order of shape function
     ElementFile_Jacobians* jacobians_reducedS_reducedQ;
@@ -147,7 +175,6 @@ inline std::pair<index_t,index_t> ElementFile::getNodeRange() const
     return util::getMinMaxInt(numNodes, numElements, Nodes);
 }
 
-
 inline void ElementFile::updateTagList()
 {
     util::setValuesInUse(Tag, numElements, tagsInUse, MPIInfo);
diff --git a/finley/src/ElementFile_jacobians.cpp b/finley/src/ElementFile_jacobians.cpp
index 8d66942..78b2744 100644
--- a/finley/src/ElementFile_jacobians.cpp
+++ b/finley/src/ElementFile_jacobians.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "ElementFile.h"
 #include "Assemble.h"
 
@@ -87,12 +83,10 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
         }
      
         if (out->numQuadTotal != out->numSub*basis->numQuadNodes) {
-            setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: Incorrect total number of quadrature points.");
-            return NULL;
+            throw FinleyException("ElementFile::borrowJacobians: Incorrect total number of quadrature points.");
         }
         if (refElement->numNodes > numNodes) {
-            setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: Too many nodes expected.");
-            return NULL;
+            throw FinleyException("ElementFile::borrowJacobians: Too many nodes expected.");
         }
 
         if (out->volume==NULL)
@@ -115,10 +109,10 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                             Nodes, &shape->dSdv[0], basis->Type->numShapes,
                             dBdv, out->DSDX, out->volume, Id);
                 } else {
-                    setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: only one-sided elements supported in 1D.");
+                    throw FinleyException("ElementFile::borrowJacobians: only one-sided elements supported in 1D.");
                 }
             } else {
-                setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: local dimension in a 1D domain has to be 0 or 1.");
+                throw escript::ValueError("ElementFile::borrowJacobians: local dimension in a 1D domain has to be 0 or 1.");
             }
         /*========================== dim = 2 ============================== */
         } else if (out->numDim==2) {
@@ -141,7 +135,7 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                                 basis->Type->numShapes, dBdv, out->DSDX,
                                 out->volume, Id);
                     } else {
-                        setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: elements must be one- or two-sided.");
+                        throw escript::ValueError("ElementFile::borrowJacobians: elements must be one- or two-sided.");
                     }
                 } else if (out->BasisFunctions->Type->numDim==1) {
                     if (out->numSides==1) {
@@ -159,10 +153,10 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                                 basis->Type->numShapes, dBdv, out->DSDX,
                                 out->volume, Id);
                     } else {
-                        setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: elements must be one- or two-sided.");
+                        throw escript::ValueError("ElementFile::borrowJacobians: elements must be one- or two-sided.");
                     }
                 } else {
-                    setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: element dimension for local dimension 1 in a 2D domain has to be 1 or 2.");
+                    throw escript::ValueError("ElementFile::borrowJacobians: element dimension for local dimension 1 in a 2D domain has to be 1 or 2.");
                 }
             } else if (refElement->numLocalDim==2) {
                 if (out->numSides==1) {
@@ -172,10 +166,10 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                             Nodes, &shape->dSdv[0], basis->Type->numShapes,
                             dBdv, out->DSDX, out->volume, Id);
                 } else {
-                    setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: 2D volume supports one-sided elements only.");
+                    throw escript::ValueError("ElementFile::borrowJacobians: 2D volume supports one-sided elements only.");
                 }
             } else {
-                setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: local dimension in a 2D domain has to be 1 or 2.");
+                throw escript::ValueError("ElementFile::borrowJacobians: local dimension in a 2D domain has to be 1 or 2.");
             }
         /*========================== dim = 3 ============================== */
         } else if (out->numDim==3) {
@@ -198,7 +192,7 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                                 basis->Type->numShapes, dBdv, out->DSDX,
                                 out->volume, Id);
                     } else {
-                        setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: elements must be one- or two-sided.");
+                        throw escript::ValueError("ElementFile::borrowJacobians: elements must be one- or two-sided.");
                     }
                 } else if (out->BasisFunctions->Type->numDim==2) {
                     if (out->numSides==1) {
@@ -216,10 +210,10 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                                 basis->Type->numShapes, dBdv, out->DSDX,
                                 out->volume, Id);
                     } else {
-                        setError(SYSTEM_ERROR,"ElementFile::borrowJacobians: elements must be one- or two-sided.");
+                        throw escript::ValueError("ElementFile::borrowJacobians: elements must be one- or two-sided.");
                     }
                 } else {
-                    setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: element dimension for local dimension 2 in a 3D domain has to be 2 or 3.");
+                    throw escript::ValueError("ElementFile::borrowJacobians: element dimension for local dimension 2 in a 3D domain has to be 2 or 3.");
                 }
             } else if (refElement->numLocalDim==3) {
                 if (out->numSides==1) {
@@ -229,20 +223,16 @@ ElementFile_Jacobians* ElementFile::borrowJacobians(const NodeFile* nodefile,
                             Nodes, &shape->dSdv[0], basis->Type->numShapes,
                             dBdv, out->DSDX, out->volume, Id);
                 } else {
-                    setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: 3D volume supports one sided elements only..");
+                    throw escript::ValueError("ElementFile::borrowJacobians: 3D volume supports one sided elements only..");
                 }
             } else {
-                setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: local dimension in a 3D domain has to be 2 or 3.");
+                throw escript::ValueError("ElementFile::borrowJacobians: local dimension in a 3D domain has to be 2 or 3.");
             }
         } else {
-            setError(SYSTEM_ERROR, "ElementFile::borrowJacobians: number of spatial dimensions has to be 1, 2 or 3.");
+            throw escript::ValueError("ElementFile::borrowJacobians: number of spatial dimensions has to be 1, 2 or 3.");
         }
 
-        if (noError()) {
-            out->status = nodefile->status;
-        } else {
-            out=NULL;
-        }
+        out->status = nodefile->status;
     }
     return out;
 }
diff --git a/finley/src/Finley.cpp b/finley/src/Finley.cpp
deleted file mode 100644
index 910a957..0000000
--- a/finley/src/Finley.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include "Finley.h"
-#include "esysUtils/error.h"
-#include "finley/CppAdapter/FinleyAdapterException.h" // temporary
-
-namespace finley {
-
-/// returns a time mark
-double timer()
-{
-    return Esys_timer();
-}
-
-/// checks if the pointer ptr has a target. If not an error is raised and
-/// TRUE is returned.
-bool checkPtr(void* arg)
-{
-    return Esys_checkPtr(arg);
-}
-
-/// resets the error to NO_ERROR
-void resetError()
-{
-    Esys_resetError();
-}
-
-/// sets an error
-void setError(ErrorCodeType err, const char* msg)
-{
-    Esys_setError(err,msg);
-}
-
-/// checks if there is no error
-bool noError()
-{
-    return Esys_noError();
-}
-
-/// returns the error code
-ErrorCodeType getErrorType()
-{
-    return Esys_getErrorType();
-}
-
-/// returns the error message
-char* getErrorMessage(void)
-{
-    return Esys_getErrorMessage();
-}
-
-void checkFinleyError() 
-{
-    if (!noError()) {
-        // reset the error code to no error otherwise the next call to
-        // this function may resurrect a previous error
-        resetError();
-        throw FinleyAdapterException(getErrorMessage());
-    }
-}
-
-/* checks that there is no error across all processes in a communicator */
-/* NOTE : does not guarantee consistency of error string on each process */
-bool MPI_noError(esysUtils::JMPI& mpi_info)
-{
-    return esysUtils::Esys_MPIInfo_noError(mpi_info);
-}
-
-} // namespace finley
-
diff --git a/finley/src/Finley.h b/finley/src/Finley.h
index 5464e82..5b7973c 100644
--- a/finley/src/Finley.h
+++ b/finley/src/Finley.h
@@ -23,13 +23,24 @@
 
 *****************************************************************************/
 
-#include "esysUtils/Esys_MPI.h"
-#include "esysUtils/error.h"
+// first include to avoid _POSIX_C_SOURCE redefinition warnings
+#include <escript/DataTypes.h>
+
+#include <finley/FinleyException.h>
+
+#include <escript/EsysMPI.h>
 
 #include <vector>
 
 namespace finley {
 
+using escript::DataTypes::dim_t;
+using escript::DataTypes::index_t;
+using escript::DataTypes::IndexVector;
+
+// real_t clashes with metis real_t !
+//using escript::DataTypes::real_t;
+
 //#define Finley_TRACE
 #define FINLEY_UNKNOWN -1
 #define FINLEY_DEGREES_OF_FREEDOM 1
@@ -46,18 +57,23 @@ namespace finley {
 #define FINLEY_REDUCED_CONTACT_ELEMENTS_1 12
 #define FINLEY_REDUCED_CONTACT_ELEMENTS_2 13
 
-#define FINLEY_INITIAL_STATUS 0
-
-typedef Esys_ErrorCodeType ErrorCodeType;
+enum {
+    DegreesOfFreedom = FINLEY_DEGREES_OF_FREEDOM,
+    ReducedDegreesOfFreedom = FINLEY_REDUCED_DEGREES_OF_FREEDOM,
+    Nodes = FINLEY_NODES,
+    ReducedNodes = FINLEY_REDUCED_NODES,
+    Elements = FINLEY_ELEMENTS,
+    ReducedElements = FINLEY_REDUCED_ELEMENTS,
+    FaceElements = FINLEY_FACE_ELEMENTS,
+    ReducedFaceElements = FINLEY_REDUCED_FACE_ELEMENTS,
+    Points = FINLEY_POINTS,
+    ContactElementsZero = FINLEY_CONTACT_ELEMENTS_1,
+    ReducedContactElementsZero = FINLEY_REDUCED_CONTACT_ELEMENTS_1,
+    ContactElementsOne = FINLEY_CONTACT_ELEMENTS_2,
+    ReducedContactElementsOne = FINLEY_REDUCED_CONTACT_ELEMENTS_2
+};
 
-double timer();
-void resetError();
-void setError(ErrorCodeType err, const char* msg);
-bool noError();
-ErrorCodeType getErrorType();
-char* getErrorMessage();
-void checkFinleyError();
-bool MPI_noError(esysUtils::JMPI& mpi_info);
+#define FINLEY_INITIAL_STATUS 0
 
 } // namespace finley
 
diff --git a/finley/src/FinleyDomain.cpp b/finley/src/FinleyDomain.cpp
new file mode 100644
index 0000000..977639f
--- /dev/null
+++ b/finley/src/FinleyDomain.cpp
@@ -0,0 +1,2460 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "FinleyDomain.h"
+#include "Assemble.h"
+#include "FinleyException.h"
+#include "IndexList.h"
+
+#include <escript/Data.h>
+#include <escript/DataFactory.h>
+#include <escript/Random.h>
+#include <escript/SolverOptions.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#include <paso/Transport.h>
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/TrilinosMatrixAdapter.h>
+
+using esys_trilinos::TrilinosMatrixAdapter;
+using esys_trilinos::const_TrilinosGraph_ptr;
+#endif
+
+#include <boost/scoped_array.hpp>
+
+#ifdef ESYS_HAVE_NETCDF
+#include <netcdfcpp.h>
+#endif
+
+using namespace std;
+namespace bp = boost::python;
+using escript::NotImplementedError;
+using escript::ValueError;
+
+namespace finley {
+
+// define the static constants
+FinleyDomain::FunctionSpaceNamesMapType FinleyDomain::m_functionSpaceTypeNames;
+
+FinleyDomain::FinleyDomain(const string& name, int numDim, escript::JMPI jmpi) :
+    m_mpiInfo(jmpi),
+    m_name(name),
+    approximationOrder(-1),
+    reducedApproximationOrder(-1),
+    integrationOrder(-1),
+    reducedIntegrationOrder(-1),
+    m_elements(NULL),
+    m_faceElements(NULL),
+    m_contactElements(NULL),
+    m_points(NULL)
+{
+    // allocate node table
+    m_nodes = new NodeFile(numDim, m_mpiInfo);
+    setFunctionSpaceTypeNames();
+}
+
+FinleyDomain::FinleyDomain(const FinleyDomain& in) :
+    m_mpiInfo(in.m_mpiInfo),
+    m_name(in.m_name),
+    approximationOrder(in.approximationOrder),
+    reducedApproximationOrder(in.reducedApproximationOrder),
+    integrationOrder(in.integrationOrder),
+    reducedIntegrationOrder(in.reducedIntegrationOrder),
+    m_nodes(in.m_nodes),
+    m_elements(in.m_elements),
+    m_faceElements(in.m_faceElements),
+    m_contactElements(in.m_contactElements),
+    m_points(in.m_points)
+{
+    setFunctionSpaceTypeNames();
+}
+
+FinleyDomain::~FinleyDomain()
+{
+    delete m_nodes;
+    delete m_elements;
+    delete m_faceElements;
+    delete m_contactElements;
+    delete m_points;
+}
+
+void FinleyDomain::MPIBarrier() const
+{
+#ifdef ESYS_MPI
+    MPI_Barrier(getMPIComm());
+#endif
+}
+
+void FinleyDomain::setElements(ElementFile* elements)
+{
+    delete m_elements;
+    m_elements = elements;
+}
+
+void FinleyDomain::setFaceElements(ElementFile* elements)
+{
+    delete m_faceElements;
+    m_faceElements = elements;
+}
+
+void FinleyDomain::setContactElements(ElementFile* elements)
+{
+    delete m_contactElements;
+    m_contactElements = elements;
+}
+
+void FinleyDomain::setPoints(ElementFile* elements)
+{
+    delete m_points;
+    m_points = elements;
+}
+
+void FinleyDomain::setOrders() 
+{
+    const int ORDER_MAX = 9999999;
+    int locals[4] = { ORDER_MAX, ORDER_MAX, ORDER_MAX, ORDER_MAX };
+
+    if (m_elements != NULL && m_elements->numElements > 0) {
+        locals[0] = std::min(locals[0], m_elements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
+        locals[1] = std::min(locals[1], m_elements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
+        locals[2] = std::min(locals[2], m_elements->referenceElementSet->referenceElement->integrationOrder);
+        locals[3] = std::min(locals[3], m_elements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
+    }
+    if (m_faceElements != NULL && m_faceElements->numElements > 0) {
+        locals[0] = std::min(locals[0], m_faceElements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
+        locals[1] = std::min(locals[1], m_faceElements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
+        locals[2] = std::min(locals[2], m_faceElements->referenceElementSet->referenceElement->integrationOrder);
+        locals[3] = std::min(locals[3], m_faceElements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
+    }
+    if (m_contactElements != NULL && m_contactElements->numElements > 0) {
+        locals[0] = std::min(locals[0], m_contactElements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
+        locals[1] = std::min(locals[1], m_contactElements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
+        locals[2] = std::min(locals[2], m_contactElements->referenceElementSet->referenceElement->integrationOrder);
+        locals[3] = std::min(locals[3], m_contactElements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
+    }
+
+#ifdef ESYS_MPI
+    int globals[4];
+    MPI_Allreduce(locals, globals, 4, MPI_INT, MPI_MIN, m_mpiInfo->comm);
+    approximationOrder = (globals[0] < ORDER_MAX ? globals[0] : -1);
+    reducedApproximationOrder = (globals[1] < ORDER_MAX ? globals[1] : -1);
+    integrationOrder = (globals[2] < ORDER_MAX ? globals[2] : -1);
+    reducedIntegrationOrder = (globals[3] < ORDER_MAX ? globals[3] : -1);
+#else
+    approximationOrder = (locals[0] < ORDER_MAX ? locals[0] : -1);
+    reducedApproximationOrder = (locals[1] < ORDER_MAX ? locals[1] : -1);
+    integrationOrder = (locals[2] < ORDER_MAX ? locals[2] : -1);
+    reducedIntegrationOrder = (locals[3] < ORDER_MAX ? locals[3] : -1);
+#endif
+}
+
+void FinleyDomain::createMappings(const IndexVector& dofDist,
+                                  const IndexVector& nodeDist)
+{
+    std::vector<short> maskReducedNodes(m_nodes->getNumNodes(), -1);
+    markNodes(maskReducedNodes, 0, true);
+    IndexVector indexReducedNodes = util::packMask(maskReducedNodes);
+    m_nodes->createNodeMappings(indexReducedNodes, dofDist, nodeDist);
+}
+
+void FinleyDomain::markNodes(vector<short>& mask, index_t offset,
+                             bool useLinear) const
+{
+    m_elements->markNodes(mask, offset, useLinear);
+    m_faceElements->markNodes(mask, offset, useLinear);
+    m_contactElements->markNodes(mask, offset, useLinear);
+    m_points->markNodes(mask, offset, useLinear);
+}
+
+void FinleyDomain::relabelElementNodes(const IndexVector& newNode, index_t offset)
+{
+    m_elements->relabelNodes(newNode, offset);
+    m_faceElements->relabelNodes(newNode, offset);
+    m_contactElements->relabelNodes(newNode, offset);
+    m_points->relabelNodes(newNode, offset);
+}
+
+void FinleyDomain::dump(const string& fileName) const
+{
+#ifdef ESYS_HAVE_NETCDF
+    const NcDim* ncdims[12] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
+    NcVar* ids;
+    index_t* index_ptr;
+#ifdef ESYS_INDEXTYPE_LONG
+    NcType ncIdxType = ncLong;
+#else
+    NcType ncIdxType = ncInt;
+#endif
+    int num_Tags = 0;
+    int mpi_size                     = getMPISize();
+    int mpi_rank                     = getMPIRank();
+    int numDim                       = m_nodes->numDim;
+    dim_t numNodes                   = m_nodes->getNumNodes();
+    dim_t num_Elements               = m_elements->numElements;
+    dim_t num_FaceElements           = m_faceElements->numElements;
+    dim_t num_ContactElements        = m_contactElements->numElements;
+    dim_t num_Points                 = m_points->numElements;
+    int num_Elements_numNodes        = m_elements->numNodes;
+    int num_FaceElements_numNodes    = m_faceElements->numNodes;
+    int num_ContactElements_numNodes = m_contactElements->numNodes;
+#ifdef ESYS_MPI
+    MPI_Status status;
+#endif
+
+    // Incoming token indicates it's my turn to write
+#ifdef ESYS_MPI
+    if (mpi_rank > 0)
+        MPI_Recv(&num_Tags, 0, MPI_INT, mpi_rank-1, 81800, getMPIComm(), &status);
+#endif
+
+    const string newFileName(m_mpiInfo->appendRankToFileName(fileName));
+
+    // Figure out how much storage is required for tags
+    num_Tags = m_tagMap.size();
+
+    // NetCDF error handler
+    NcError err(NcError::verbose_nonfatal);
+    // Create the file
+    NcFile dataFile(newFileName.c_str(), NcFile::Replace);
+    string msgPrefix("Error in FinleyDomain::dump: NetCDF operation failed - ");
+    // check if writing was successful
+    if (!dataFile.is_valid())
+        throw FinleyException(msgPrefix + "Open file for output");
+
+    // Define dimensions (num_Elements and dim_Elements are identical,
+    // dim_Elements only appears if > 0)
+    if (! (ncdims[0] = dataFile.add_dim("numNodes", numNodes)) )
+        throw FinleyException(msgPrefix+"add_dim(numNodes)");
+    if (! (ncdims[1] = dataFile.add_dim("numDim", numDim)) )
+        throw FinleyException(msgPrefix+"add_dim(numDim)");
+    if (! (ncdims[2] = dataFile.add_dim("mpi_size_plus_1", mpi_size+1)) )
+        throw FinleyException(msgPrefix+"add_dim(mpi_size)");
+    if (num_Elements > 0)
+        if (! (ncdims[3] = dataFile.add_dim("dim_Elements", num_Elements)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_Elements)");
+    if (num_FaceElements > 0)
+        if (! (ncdims[4] = dataFile.add_dim("dim_FaceElements", num_FaceElements)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_FaceElements)");
+    if (num_ContactElements > 0)
+        if (! (ncdims[5] = dataFile.add_dim("dim_ContactElements", num_ContactElements)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_ContactElements)");
+    if (num_Points > 0)
+        if (! (ncdims[6] = dataFile.add_dim("dim_Points", num_Points)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_Points)");
+    if (num_Elements > 0)
+        if (! (ncdims[7] = dataFile.add_dim("dim_Elements_Nodes", num_Elements_numNodes)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_Elements_Nodes)");
+    if (num_FaceElements > 0)
+        if (! (ncdims[8] = dataFile.add_dim("dim_FaceElements_numNodes", num_FaceElements_numNodes)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_FaceElements_numNodes)");
+    if (num_ContactElements > 0)
+        if (! (ncdims[9] = dataFile.add_dim("dim_ContactElements_numNodes", num_ContactElements_numNodes)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_ContactElements_numNodes)");
+    if (num_Tags > 0)
+        if (! (ncdims[10] = dataFile.add_dim("dim_Tags", num_Tags)) )
+            throw FinleyException(msgPrefix+"add_dim(dim_Tags)");
+
+    // Attributes: MPI size, MPI rank, Name, order, reduced_order
+    if (!dataFile.add_att("index_size", (int)sizeof(index_t)))
+        throw FinleyException(msgPrefix+"add_att(index_size)");
+    if (!dataFile.add_att("mpi_size", mpi_size))
+        throw FinleyException(msgPrefix+"add_att(mpi_size)");
+    if (!dataFile.add_att("mpi_rank", mpi_rank))
+        throw FinleyException(msgPrefix+"add_att(mpi_rank)");
+    if (!dataFile.add_att("Name", m_name.c_str()))
+        throw FinleyException(msgPrefix+"add_att(Name)");
+    if (!dataFile.add_att("numDim", numDim))
+        throw FinleyException(msgPrefix+"add_att(order)");
+    if (!dataFile.add_att("order", integrationOrder))
+        throw FinleyException(msgPrefix+"add_att(order)");
+    if (!dataFile.add_att("reduced_order", reducedIntegrationOrder))
+        throw FinleyException(msgPrefix+"add_att(reduced_order)");
+    if (!dataFile.add_att("numNodes", numNodes))
+        throw FinleyException(msgPrefix+"add_att(numNodes)");
+    if (!dataFile.add_att("num_Elements", num_Elements))
+        throw FinleyException(msgPrefix+"add_att(num_Elements)");
+    if (!dataFile.add_att("num_FaceElements", num_FaceElements))
+        throw FinleyException(msgPrefix+"add_att(num_FaceElements)");
+    if (!dataFile.add_att("num_ContactElements", num_ContactElements))
+        throw FinleyException(msgPrefix+"add_att(num_ContactElements)");
+    if (!dataFile.add_att("num_Points", num_Points))
+        throw FinleyException(msgPrefix+"add_att(num_Points)");
+    if (!dataFile.add_att("num_Elements_numNodes", num_Elements_numNodes))
+        throw FinleyException(msgPrefix+"add_att(num_Elements_numNodes)");
+    if (!dataFile.add_att("num_FaceElements_numNodes", num_FaceElements_numNodes))
+        throw FinleyException(msgPrefix+"add_att(num_FaceElements_numNodes)");
+    if (!dataFile.add_att("num_ContactElements_numNodes", num_ContactElements_numNodes))
+        throw FinleyException(msgPrefix+"add_att(num_ContactElements_numNodes)");
+    if (!dataFile.add_att("Elements_TypeId", m_elements->referenceElementSet->referenceElement->Type->TypeId) )
+        throw FinleyException(msgPrefix+"add_att(Elements_TypeId)");
+    if (!dataFile.add_att("FaceElements_TypeId", m_faceElements->referenceElementSet->referenceElement->Type->TypeId) )
+        throw FinleyException(msgPrefix+"add_att(FaceElements_TypeId)");
+    if (!dataFile.add_att("ContactElements_TypeId", m_contactElements->referenceElementSet->referenceElement->Type->TypeId) )
+        throw FinleyException(msgPrefix+"add_att(ContactElements_TypeId)");
+    if (!dataFile.add_att("Points_TypeId", m_points->referenceElementSet->referenceElement->Type->TypeId) )
+        throw FinleyException(msgPrefix+"add_att(Points_TypeId)");
+    if (!dataFile.add_att("num_Tags", num_Tags))
+        throw FinleyException(msgPrefix+"add_att(num_Tags)");
+
+    // // // // // Nodes // // // // //
+
+    // Nodes nodeDistribution
+    if (! (ids = dataFile.add_var("Nodes_NodeDistribution", ncIdxType, ncdims[2])) )
+        throw FinleyException(msgPrefix+"add_var(Nodes_NodeDistribution)");
+    index_ptr = &m_nodes->nodesDistribution->first_component[0];
+    if (! (ids->put(index_ptr, mpi_size+1)) )
+        throw FinleyException(msgPrefix+"put(Nodes_NodeDistribution)");
+
+    // Nodes degreesOfFreedomDistribution
+    if (! ( ids = dataFile.add_var("Nodes_DofDistribution", ncIdxType, ncdims[2])) )
+        throw FinleyException(msgPrefix+"add_var(Nodes_DofDistribution)");
+    index_ptr = &m_nodes->degreesOfFreedomDistribution->first_component[0];
+    if (! (ids->put(index_ptr, mpi_size+1)) )
+        throw FinleyException(msgPrefix+"put(Nodes_DofDistribution)");
+
+    // Only write nodes if non-empty because NetCDF doesn't like empty arrays
+    // (it treats them as NC_UNLIMITED)
+    if (numNodes > 0) {
+        // Nodes Id
+        if (! ( ids = dataFile.add_var("Nodes_Id", ncIdxType, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_Id)");
+        if (! (ids->put(&m_nodes->Id[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_Id)");
+
+        // Nodes Tag
+        if (! ( ids = dataFile.add_var("Nodes_Tag", ncInt, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_Tag)");
+        if (! (ids->put(&m_nodes->Tag[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_Tag)");
+
+        // Nodes gDOF
+        if (! ( ids = dataFile.add_var("Nodes_gDOF", ncIdxType, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_gDOF)");
+        if (! (ids->put(&m_nodes->globalDegreesOfFreedom[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_gDOF)");
+
+        // Nodes global node index
+        if (! ( ids = dataFile.add_var("Nodes_gNI", ncIdxType, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_gNI)");
+        if (! (ids->put(&m_nodes->globalNodesIndex[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_gNI)");
+
+        // Nodes grDof
+        if (! ( ids = dataFile.add_var("Nodes_grDfI", ncIdxType, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_grDfI)");
+        if (! (ids->put(&m_nodes->globalReducedDOFIndex[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_grDfI)");
+
+        // Nodes grNI
+        if (! ( ids = dataFile.add_var("Nodes_grNI", ncIdxType, ncdims[0])) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_grNI)");
+        if (! (ids->put(&m_nodes->globalReducedNodesIndex[0], numNodes)) )
+            throw FinleyException(msgPrefix+"put(Nodes_grNI)");
+
+        // Nodes Coordinates
+        if (! ( ids = dataFile.add_var("Nodes_Coordinates", ncDouble, ncdims[0], ncdims[1]) ) )
+            throw FinleyException(msgPrefix+"add_var(Nodes_Coordinates)");
+        if (! (ids->put(m_nodes->Coordinates, numNodes, numDim)) )
+            throw FinleyException(msgPrefix+"put(Nodes_Coordinates)");
+    }
+
+    // // // // // Elements // // // // //
+    if (num_Elements > 0) {
+        // Elements_Id
+        if (! ( ids = dataFile.add_var("Elements_Id", ncIdxType, ncdims[3])) )
+            throw FinleyException(msgPrefix+"add_var(Elements_Id)");
+        if (! (ids->put(m_elements->Id, num_Elements)) )
+            throw FinleyException(msgPrefix+"put(Elements_Id)");
+
+        // Elements_Tag
+        if (! ( ids = dataFile.add_var("Elements_Tag", ncInt, ncdims[3])) )
+            throw FinleyException(msgPrefix+"add_var(Elements_Tag)");
+        if (! (ids->put(m_elements->Tag, num_Elements)) )
+            throw FinleyException(msgPrefix+"put(Elements_Tag)");
+
+        // Elements_Owner
+        if (! ( ids = dataFile.add_var("Elements_Owner", ncInt, ncdims[3])) )
+            throw FinleyException(msgPrefix+"add_var(Elements_Owner)");
+        if (! (ids->put(m_elements->Owner, num_Elements)) )
+            throw FinleyException(msgPrefix+"put(Elements_Owner)");
+
+        // Elements_Color
+        if (! ( ids = dataFile.add_var("Elements_Color", ncInt, ncdims[3])) )
+            throw FinleyException(msgPrefix+"add_var(Elements_Color)");
+        if (! (ids->put(m_elements->Color, num_Elements)) )
+            throw FinleyException(msgPrefix+"put(Elements_Color)");
+
+        // Elements_Nodes
+        if (! ( ids = dataFile.add_var("Elements_Nodes", ncIdxType, ncdims[3], ncdims[7]) ) )
+            throw FinleyException(msgPrefix+"add_var(Elements_Nodes)");
+        if (! (ids->put(&m_elements->Nodes[0], num_Elements, num_Elements_numNodes)) )
+            throw FinleyException(msgPrefix+"put(Elements_Nodes)");
+    }
+
+    // // // // // Face_Elements // // // // //
+    if (num_FaceElements > 0) {
+        // FaceElements_Id
+        if (!(ids = dataFile.add_var("FaceElements_Id", ncIdxType, ncdims[4])))
+            throw FinleyException(msgPrefix+"add_var(FaceElements_Id)");
+        if (!(ids->put(m_faceElements->Id, num_FaceElements)))
+            throw FinleyException(msgPrefix+"put(FaceElements_Id)");
+
+        // FaceElements_Tag
+        if (!(ids = dataFile.add_var("FaceElements_Tag", ncInt, ncdims[4])))
+            throw FinleyException(msgPrefix+"add_var(FaceElements_Tag)");
+        if (!(ids->put(m_faceElements->Tag, num_FaceElements)))
+            throw FinleyException(msgPrefix+"put(FaceElements_Tag)");
+
+        // FaceElements_Owner
+        if (!(ids = dataFile.add_var("FaceElements_Owner", ncInt, ncdims[4])))
+            throw FinleyException(msgPrefix+"add_var(FaceElements_Owner)");
+        if (!(ids->put(m_faceElements->Owner, num_FaceElements)))
+            throw FinleyException(msgPrefix+"put(FaceElements_Owner)");
+
+        // FaceElements_Color
+        if (!(ids = dataFile.add_var("FaceElements_Color", ncIdxType, ncdims[4])))
+            throw FinleyException(msgPrefix+"add_var(FaceElements_Color)");
+        if (!(ids->put(m_faceElements->Color, num_FaceElements)))
+            throw FinleyException(msgPrefix+"put(FaceElements_Color)");
+
+        // FaceElements_Nodes
+        if (!(ids = dataFile.add_var("FaceElements_Nodes", ncIdxType, ncdims[4], ncdims[8])))
+            throw FinleyException(msgPrefix+"add_var(FaceElements_Nodes)");
+        if (!(ids->put(m_faceElements->Nodes, num_FaceElements, num_FaceElements_numNodes)))
+            throw FinleyException(msgPrefix+"put(FaceElements_Nodes)");
+    }
+
+    // // // // // Contact_Elements // // // // //
+    if (num_ContactElements > 0) {
+        // ContactElements_Id
+        if (!(ids = dataFile.add_var("ContactElements_Id", ncIdxType, ncdims[5])))
+            throw FinleyException(msgPrefix+"add_var(ContactElements_Id)");
+        if (!(ids->put(m_contactElements->Id, num_ContactElements)))
+            throw FinleyException(msgPrefix+"put(ContactElements_Id)");
+
+        // ContactElements_Tag
+        if (!(ids = dataFile.add_var("ContactElements_Tag", ncInt, ncdims[5])))
+            throw FinleyException(msgPrefix+"add_var(ContactElements_Tag)");
+        if (!(ids->put(m_contactElements->Tag, num_ContactElements)))
+            throw FinleyException(msgPrefix+"put(ContactElements_Tag)");
+
+        // ContactElements_Owner
+        if (!(ids = dataFile.add_var("ContactElements_Owner", ncInt, ncdims[5])))
+            throw FinleyException(msgPrefix+"add_var(ContactElements_Owner)");
+        if (!(ids->put(m_contactElements->Owner, num_ContactElements)))
+            throw FinleyException(msgPrefix+"put(ContactElements_Owner)");
+
+        // ContactElements_Color
+        if (!(ids = dataFile.add_var("ContactElements_Color", ncInt, ncdims[5])))
+            throw FinleyException(msgPrefix+"add_var(ContactElements_Color)");
+        if (!(ids->put(m_contactElements->Color, num_ContactElements)))
+            throw FinleyException(msgPrefix+"put(ContactElements_Color)");
+
+        // ContactElements_Nodes
+        if (!(ids = dataFile.add_var("ContactElements_Nodes", ncIdxType, ncdims[5], ncdims[9])))
+            throw FinleyException(msgPrefix+"add_var(ContactElements_Nodes)");
+        if (!(ids->put(m_contactElements->Nodes, num_ContactElements, num_ContactElements_numNodes)))
+            throw FinleyException(msgPrefix+"put(ContactElements_Nodes)");
+    }
+
+    // // // // // Points // // // // //
+    if (num_Points > 0) {
+        // Points_Id
+        if (!(ids = dataFile.add_var("Points_Id", ncIdxType, ncdims[6])))
+            throw FinleyException(msgPrefix+"add_var(Points_Id)");
+        if (!(ids->put(m_points->Id, num_Points)))
+            throw FinleyException(msgPrefix+"put(Points_Id)");
+
+        // Points_Tag
+        if (!(ids = dataFile.add_var("Points_Tag", ncInt, ncdims[6])))
+            throw FinleyException(msgPrefix+"add_var(Points_Tag)");
+        if (!(ids->put(m_points->Tag, num_Points)))
+            throw FinleyException(msgPrefix+"put(Points_Tag)");
+
+        // Points_Owner
+        if (!(ids = dataFile.add_var("Points_Owner", ncInt, ncdims[6])))
+            throw FinleyException(msgPrefix+"add_var(Points_Owner)");
+        if (!(ids->put(m_points->Owner, num_Points)))
+            throw FinleyException(msgPrefix+"put(Points_Owner)");
+
+        // Points_Color
+        if (!(ids = dataFile.add_var("Points_Color", ncIdxType, ncdims[6])))
+            throw FinleyException(msgPrefix+"add_var(Points_Color)");
+        if (!(ids->put(m_points->Color, num_Points)))
+            throw FinleyException(msgPrefix+"put(Points_Color)");
+
+        // Points_Nodes
+        if (!(ids = dataFile.add_var("Points_Nodes", ncIdxType, ncdims[6])))
+            throw FinleyException(msgPrefix+"add_var(Points_Nodes)");
+        if (!(ids->put(m_points->Nodes, num_Points)))
+            throw FinleyException(msgPrefix+"put(Points_Nodes)");
+    }
+
+    // // // // // TagMap // // // // //
+    if (num_Tags > 0) {
+        // Temp storage to gather node IDs
+        vector<int> Tags_keys;
+
+        // Copy tag data into temp arrays
+        TagMap::const_iterator it;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
+            Tags_keys.push_back(it->second);
+        }
+
+        // Tags_keys
+        if (!(ids = dataFile.add_var("Tags_keys", ncInt, ncdims[10])))
+            throw FinleyException(msgPrefix+"add_var(Tags_keys)");
+        if (!(ids->put(&Tags_keys[0], num_Tags)))
+            throw FinleyException(msgPrefix+"put(Tags_keys)");
+
+        // Tags_names_*
+        // This is an array of strings, it should be stored as an array but
+        // instead I have hacked in one attribute per string because the NetCDF
+        // manual doesn't tell how to do an array of strings
+        int i = 0;
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++, i++) {
+            stringstream ss;
+            ss << "Tags_name_" << i;
+            const string name(ss.str());
+            if (!dataFile.add_att(name.c_str(), it->first.c_str()))
+                throw FinleyException(msgPrefix+"add_att(Tags_names_X)");
+        }
+    }
+
+    // Send token to next MPI process so he can take his turn
+#ifdef ESYS_MPI
+    if (mpi_rank < mpi_size-1)
+        MPI_Send(&num_Tags, 0, MPI_INT, mpi_rank+1, 81800, getMPIComm());
+#endif
+
+    // NetCDF file is closed by destructor of NcFile object
+
+#else
+    throw FinleyException("FinleyDomain::dump: not configured with netCDF. "
+                          "Please contact your installation manager.");
+#endif // ESYS_HAVE_NETCDF
+}
+
+string FinleyDomain::getDescription() const
+{
+    return "FinleyMesh";
+}
+
+string FinleyDomain::functionSpaceTypeAsString(int functionSpaceType) const
+{
+    FunctionSpaceNamesMapType::iterator loc;
+    loc = m_functionSpaceTypeNames.find(functionSpaceType);
+    if (loc == m_functionSpaceTypeNames.end()) {
+        return "Invalid function space type code.";
+    } else {
+        return loc->second;
+    }
+}
+
+bool FinleyDomain::isValidFunctionSpaceType(int functionSpaceType) const
+{
+    FunctionSpaceNamesMapType::iterator loc;
+    loc = m_functionSpaceTypeNames.find(functionSpaceType);
+    return (loc != m_functionSpaceTypeNames.end());
+}
+
+void FinleyDomain::setFunctionSpaceTypeNames()
+{
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                DegreesOfFreedom,"Finley_DegreesOfFreedom [Solution(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedDegreesOfFreedom,"Finley_ReducedDegreesOfFreedom [ReducedSolution(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Nodes,"Finley_Nodes [ContinuousFunction(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedNodes,"Finley_Reduced_Nodes [ReducedContinuousFunction(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Elements,"Finley_Elements [Function(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedElements,"Finley_Reduced_Elements [ReducedFunction(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                FaceElements,"Finley_Face_Elements [FunctionOnBoundary(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedFaceElements,"Finley_Reduced_Face_Elements [ReducedFunctionOnBoundary(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                Points,"Finley_Points [DiracDeltaFunctions(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ContactElementsZero,"Finley_Contact_Elements_0 [FunctionOnContactZero(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedContactElementsZero,"Finley_Reduced_Contact_Elements_0 [ReducedFunctionOnContactZero(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ContactElementsOne,"Finley_Contact_Elements_1 [FunctionOnContactOne(domain)]"));
+    m_functionSpaceTypeNames.insert(FunctionSpaceNamesMapType::value_type(
+                ReducedContactElementsOne,"Finley_Reduced_Contact_Elements_1 [ReducedFunctionOnContactOne(domain)]"));
+}
+
+int FinleyDomain::getContinuousFunctionCode() const
+{
+    return Nodes;
+}
+
+int FinleyDomain::getReducedContinuousFunctionCode() const
+{
+    return ReducedNodes;
+}
+
+int FinleyDomain::getFunctionCode() const
+{
+    return Elements;
+}
+
+int FinleyDomain::getReducedFunctionCode() const
+{
+    return ReducedElements;
+}
+
+int FinleyDomain::getFunctionOnBoundaryCode() const
+{
+    return FaceElements;
+}
+
+int FinleyDomain::getReducedFunctionOnBoundaryCode() const
+{
+    return ReducedFaceElements;
+}
+
+int FinleyDomain::getFunctionOnContactZeroCode() const
+{
+    return ContactElementsZero;
+}
+
+int FinleyDomain::getReducedFunctionOnContactZeroCode() const
+{
+    return ReducedContactElementsZero;
+}
+
+int FinleyDomain::getFunctionOnContactOneCode() const
+{
+    return ContactElementsOne;
+}
+
+int FinleyDomain::getReducedFunctionOnContactOneCode() const
+{
+    return ReducedContactElementsOne;
+}
+
+int FinleyDomain::getSolutionCode() const
+{
+    return DegreesOfFreedom;
+}
+
+int FinleyDomain::getReducedSolutionCode() const
+{
+    return ReducedDegreesOfFreedom;
+}
+
+int FinleyDomain::getDiracDeltaFunctionsCode() const
+{
+    return Points;
+}
+
+//
+// Return the number of data points summed across all MPI processes
+//
+dim_t FinleyDomain::getNumDataPointsGlobal() const
+{
+    return m_nodes->getGlobalNumNodes();
+}
+
+//
+// return the number of data points per sample and the number of samples
+// needed to represent data on a parts of the mesh.
+//
+pair<int,dim_t> FinleyDomain::getDataShape(int functionSpaceCode) const
+{
+    int numDataPointsPerSample = 0;
+    dim_t numSamples = 0;
+    switch (functionSpaceCode) {
+        case Nodes:
+            numDataPointsPerSample = 1;
+            numSamples = m_nodes->getNumNodes();
+        break;
+        case ReducedNodes:
+            numDataPointsPerSample = 1;
+            numSamples = m_nodes->getNumReducedNodes();
+        break;
+        case Elements:
+            if (m_elements) {
+                numSamples = m_elements->numElements;
+                numDataPointsPerSample = m_elements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
+            }
+        break;
+        case ReducedElements:
+            if (m_elements) {
+                numSamples = m_elements->numElements;
+                numDataPointsPerSample = m_elements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
+            }
+        break;
+        case FaceElements:
+            if (m_faceElements) {
+                numSamples = m_faceElements->numElements;
+                numDataPointsPerSample = m_faceElements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
+            }
+        break;
+        case ReducedFaceElements:
+            if (m_faceElements) {
+                numSamples = m_faceElements->numElements;
+                numDataPointsPerSample = m_faceElements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
+            }
+        break;
+        case Points:
+            if (m_points) {
+                numSamples = m_points->numElements;
+                numDataPointsPerSample = 1;
+            }
+        break;
+        case ContactElementsZero:
+        case ContactElementsOne:
+            if (m_contactElements) {
+                numSamples = m_contactElements->numElements;
+                numDataPointsPerSample = m_contactElements->referenceElementSet->referenceElement->Parametrization->numQuadNodes;
+            }
+            break;
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            if (m_contactElements) {
+                numSamples = m_contactElements->numElements;
+                numDataPointsPerSample = m_contactElements->referenceElementSet->referenceElementReducedQuadrature->Parametrization->numQuadNodes;
+            }
+            break;
+        case DegreesOfFreedom:
+            if (m_nodes) {
+                numSamples = m_nodes->getNumDegreesOfFreedom();
+                numDataPointsPerSample = 1;
+            }
+        break;
+        case ReducedDegreesOfFreedom:
+            if (m_nodes) {
+                numSamples = m_nodes->getNumReducedDegreesOfFreedom();
+                numDataPointsPerSample = 1;
+            }
+        break;
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceCode
+                << " for domain " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return pair<int,dim_t>(numDataPointsPerSample, numSamples);
+}
+
+//
+// adds linear PDE of second order into a given stiffness matrix and
+// right hand side
+//
+void FinleyDomain::addPDEToSystem(
+        escript::AbstractSystemMatrix& mat, escript::Data& rhs,
+        const escript::Data& A, const escript::Data& B, const escript::Data& C,
+        const escript::Data& D, const escript::Data& X, const escript::Data& Y,
+        const escript::Data& d, const escript::Data& y,
+        const escript::Data& d_contact, const escript::Data& y_contact,
+        const escript::Data& d_dirac, const escript::Data& y_dirac) const
+{
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(&mat);
+    if (tm) {
+        tm->resumeFill();
+    }
+#endif
+
+    Assemble_PDE(m_nodes, m_elements, mat.getPtr(), rhs, A, B, C, D, X, Y);
+    Assemble_PDE(m_nodes, m_faceElements, mat.getPtr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(), d,
+                 escript::Data(), y);
+    Assemble_PDE(m_nodes, m_contactElements, mat.getPtr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(), d_contact,
+                 escript::Data(), y_contact);
+    Assemble_PDE(m_nodes, m_points, mat.getPtr(), rhs, escript::Data(),
+                 escript::Data(), escript::Data(), d_dirac,
+                 escript::Data(), y_dirac);
+
+#ifdef ESYS_HAVE_TRILINOS
+    if (tm) {
+        tm->fillComplete(true);
+    }
+#endif
+}
+
+void FinleyDomain::addPDEToLumpedSystem(escript::Data& mat,
+                                        const escript::Data& D,
+                                        const escript::Data& d,
+                                        const escript::Data& d_dirac,
+                                        bool useHRZ) const
+{
+    Assemble_LumpedSystem(m_nodes, m_elements, mat, D, useHRZ);
+    Assemble_LumpedSystem(m_nodes, m_faceElements, mat, d, useHRZ);
+    Assemble_LumpedSystem(m_nodes, m_points, mat, d_dirac, useHRZ);
+}
+
+//
+// adds linear PDE of second order into the right hand side only
+//
+void FinleyDomain::addPDEToRHS(escript::Data& rhs, const escript::Data& X,
+          const escript::Data& Y, const escript::Data& y,
+          const escript::Data& y_contact, const escript::Data& y_dirac) const
+{
+    Assemble_PDE(m_nodes, m_elements, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), X, Y);
+
+    Assemble_PDE(m_nodes, m_faceElements, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), escript::Data(), y);
+
+    Assemble_PDE(m_nodes, m_contactElements, escript::ASM_ptr(),
+                 rhs, escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), escript::Data(), y_contact);
+
+    Assemble_PDE(m_nodes, m_points, escript::ASM_ptr(), rhs,
+                 escript::Data(), escript::Data(), escript::Data(),
+                 escript::Data(), escript::Data(), y_dirac);
+}
+
+//
+// adds PDE of second order into a transport problem
+//
+void FinleyDomain::addPDEToTransportProblem(
+        escript::AbstractTransportProblem& tp, escript::Data& source,
+        const escript::Data& M, const escript::Data& A, const escript::Data& B,
+        const escript::Data& C, const escript::Data& D, const escript::Data& X,
+        const escript::Data& Y, const escript::Data& d, const escript::Data& y,
+        const escript::Data& d_contact, const escript::Data& y_contact,
+        const escript::Data& d_dirac, const escript::Data& y_dirac) const
+{
+#ifdef ESYS_HAVE_PASO
+    paso::TransportProblem* ptp = dynamic_cast<paso::TransportProblem*>(&tp);
+    if (!ptp)
+        throw ValueError("Finley only supports Paso transport problems.");
+
+    source.expand();
+
+    escript::ASM_ptr mm(boost::static_pointer_cast<escript::AbstractSystemMatrix>(
+                ptp->borrowMassMatrix()));
+    escript::ASM_ptr tm(boost::static_pointer_cast<escript::AbstractSystemMatrix>(
+                ptp->borrowTransportMatrix()));
+
+    Assemble_PDE(m_nodes, m_elements, mm, source, escript::Data(),
+                 escript::Data(), escript::Data(), M, escript::Data(),
+                 escript::Data());
+    Assemble_PDE(m_nodes, m_elements, tm, source, A, B, C, D, X, Y);
+    Assemble_PDE(m_nodes, m_faceElements, tm, source, escript::Data(),
+                 escript::Data(), escript::Data(), d, escript::Data(), y);
+    Assemble_PDE(m_nodes, m_contactElements, tm, source,
+                 escript::Data(), escript::Data(), escript::Data(), d_contact,
+                 escript::Data(), y_contact);
+    Assemble_PDE(m_nodes, m_points, tm, source, escript::Data(),
+                 escript::Data(), escript::Data(), d_dirac, escript::Data(),
+                 y_dirac);
+#else
+    throw FinleyException("Transport problems require the Paso library which "
+                          "is not available.");
+#endif
+}
+
+//
+// interpolates data between different function spaces
+//
+void FinleyDomain::interpolateOnDomain(escript::Data& target,
+                                      const escript::Data& in) const
+{
+    if (*in.getFunctionSpace().getDomain() != *this)
+        throw ValueError("Illegal domain of interpolant.");
+    if (*target.getFunctionSpace().getDomain() != *this)
+        throw ValueError("Illegal domain of interpolation target.");
+
+    switch (in.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            switch (target.getFunctionSpace().getTypeCode()) {
+                case Nodes:
+                case ReducedNodes:
+                case DegreesOfFreedom:
+                case ReducedDegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                break;
+                case Elements:
+                case ReducedElements:
+                    Assemble_interpolate(m_nodes, m_elements, in, target);
+                break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                break;
+                case Points:
+                    Assemble_interpolate(m_nodes, m_points, in, target);
+                break;
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    Assemble_interpolate(m_nodes, m_contactElements, in, target);
+                break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Finley does not know anything "
+                          "about function space type "
+                          << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+        break;
+        case ReducedNodes:
+            switch(target.getFunctionSpace().getTypeCode()) {
+                case Nodes:
+                case ReducedNodes:
+                case DegreesOfFreedom:
+                case ReducedDegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                break;
+                case Elements:
+                case ReducedElements:
+                    Assemble_interpolate(m_nodes, m_elements, in, target);
+                break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                break;
+                case Points:
+                    Assemble_interpolate(m_nodes, m_points, in, target);
+                break;
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    Assemble_interpolate(m_nodes, m_contactElements, in, target);
+                break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Finley does not know anything "
+                          "about function space type "
+                          << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+        break;
+        case Elements:
+            if (target.getFunctionSpace().getTypeCode() == Elements) {
+                Assemble_CopyElementData(m_elements, target, in);
+            } else if (target.getFunctionSpace().getTypeCode()==ReducedElements) {
+                Assemble_AverageElementData(m_elements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on elements possible.");
+            }
+            break;
+        case ReducedElements:
+            if (target.getFunctionSpace().getTypeCode() == ReducedElements) {
+                Assemble_CopyElementData(m_elements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on elements "
+                                 "with reduced integration order possible.");
+            }
+            break;
+        case FaceElements:
+            if (target.getFunctionSpace().getTypeCode() == FaceElements) {
+                Assemble_CopyElementData(m_faceElements, target, in);
+            } else if (target.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+                Assemble_AverageElementData(m_faceElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on face elements possible.");
+            }
+            break;
+        case ReducedFaceElements:
+            if (target.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+                Assemble_CopyElementData(m_faceElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on face "
+                         "elements with reduced integration order possible.");
+            }
+            break;
+        case Points:
+            if (target.getFunctionSpace().getTypeCode() == Points) {
+                Assemble_CopyElementData(m_points, target, in);
+            } else {
+                throw ValueError("No interpolation with data on points possible.");
+            }
+            break;
+        case ContactElementsZero:
+        case ContactElementsOne:
+            if (target.getFunctionSpace().getTypeCode()==ContactElementsZero || target.getFunctionSpace().getTypeCode()==ContactElementsOne) {
+                Assemble_CopyElementData(m_contactElements, target, in);
+            } else if (target.getFunctionSpace().getTypeCode()==ReducedContactElementsZero || target.getFunctionSpace().getTypeCode()==ReducedContactElementsOne) {
+                Assemble_AverageElementData(m_contactElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on contact elements possible.");
+            }
+            break;
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            if (target.getFunctionSpace().getTypeCode()==ReducedContactElementsZero || target.getFunctionSpace().getTypeCode()==ReducedContactElementsOne) {
+                Assemble_CopyElementData(m_contactElements, target, in);
+            } else {
+                throw ValueError("No interpolation with data on contact elements with reduced integration order possible.");
+            }
+            break;
+        case DegreesOfFreedom:
+            switch (target.getFunctionSpace().getTypeCode()) {
+                case ReducedDegreesOfFreedom:
+                case DegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                break;
+
+                case Nodes:
+                case ReducedNodes:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in);
+                        temp.expand();
+                        Assemble_CopyNodalData(m_nodes, target, temp);
+                    } else {
+                        Assemble_CopyNodalData(m_nodes, target, in);
+                    }
+                break;
+                case Elements:
+                case ReducedElements:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in, continuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_elements, temp, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_elements, in, target);
+                    }
+                break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in, continuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_faceElements, temp, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                    }
+                break;
+                case Points:
+                    if (getMPISize() > 1) {
+                        //escript::Data temp(in, continuousFunction(*this));
+                    } else {
+                        Assemble_interpolate(m_nodes, m_points, in, target);
+                    }
+                break;
+                case ContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsZero:
+                case ReducedContactElementsOne:
+                    if (getMPISize() > 1) {
+                        escript::Data temp(in, continuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_contactElements, temp, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_contactElements, in, target);
+                    }
+                    break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Finley does not know anything "
+                          "about function space type "
+                       << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+            break;
+        case ReducedDegreesOfFreedom:
+            switch (target.getFunctionSpace().getTypeCode()) {
+                case Nodes:
+                    throw ValueError("Finley does not support interpolation from reduced degrees of freedom to mesh nodes.");
+                case ReducedNodes:
+                    if (getMPISize() > 1) {
+                        escript::Data in2(in);
+                        in2.expand();
+                        Assemble_CopyNodalData(m_nodes, target, in2);
+                    } else {
+                        Assemble_CopyNodalData(m_nodes, target, in);
+                    }
+                    break;
+                case DegreesOfFreedom:
+                    throw ValueError("Finley does not support interpolation from reduced degrees of freedom to degrees of freedom");
+                case ReducedDegreesOfFreedom:
+                    Assemble_CopyNodalData(m_nodes, target, in);
+                    break;
+                case Elements:
+                case ReducedElements:
+                    if (getMPISize() > 1) {
+                        escript::Data in2(in, reducedContinuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_elements, in2, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_elements, in, target);
+                    }
+                    break;
+                case FaceElements:
+                case ReducedFaceElements:
+                    if (getMPISize() > 1) {
+                        escript::Data in2(in, reducedContinuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_faceElements, in2, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_faceElements, in, target);
+                    }
+                    break;
+                case Points:
+                    if (getMPISize() > 1) {
+                        escript::Data in2(in, reducedContinuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_points, in2, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_points, in, target);
+                    }
+                    break;
+                case ContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsZero:
+                case ReducedContactElementsOne:
+                    if (getMPISize()>1) {
+                        escript::Data in2(in, reducedContinuousFunction(*this));
+                        Assemble_interpolate(m_nodes, m_contactElements, in2, target);
+                    } else {
+                        Assemble_interpolate(m_nodes, m_contactElements, in, target);
+                    }
+                    break;
+                default:
+                    stringstream ss;
+                    ss << "interpolateOnDomain: Finley does not know anything about function space type " << target.getFunctionSpace().getTypeCode();
+                    throw ValueError(ss.str());
+            }
+            break;
+        default:
+            stringstream ss;
+            ss << "interpolateOnDomain: Finley does not know anything about "
+                "function space type " << in.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// copies the locations of sample points into x
+//
+void FinleyDomain::setToX(escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToX: Illegal domain of data point locations");
+
+    // in case of appropriate function space we can do the job directly:
+    if (arg.getFunctionSpace().getTypeCode() == Nodes) {
+        Assemble_NodeCoordinates(m_nodes, arg);
+    } else {
+        escript::Data tmp_data = Vector(0., continuousFunction(*this), true);
+        Assemble_NodeCoordinates(m_nodes, tmp_data);
+        // this is then interpolated onto arg:
+        interpolateOnDomain(arg, tmp_data);
+    }
+}
+
+//
+// return the normal vectors at the location of data points as a Data object
+//
+void FinleyDomain::setToNormal(escript::Data& normal) const
+{
+    if (*normal.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToNormal: Illegal domain of normal locations");
+
+    if (normal.getFunctionSpace().getTypeCode() == FaceElements ||
+            normal.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
+        Assemble_getNormal(m_nodes, m_faceElements, normal);
+    } else if (normal.getFunctionSpace().getTypeCode() == ContactElementsOne ||
+            normal.getFunctionSpace().getTypeCode() == ContactElementsZero ||
+            normal.getFunctionSpace().getTypeCode() == ReducedContactElementsOne ||
+            normal.getFunctionSpace().getTypeCode() == ReducedContactElementsZero) {
+        Assemble_getNormal(m_nodes, m_contactElements, normal);
+    } else {
+        stringstream ss;
+        ss << "setToNormal: Illegal function space type "
+           << normal.getFunctionSpace().getTypeCode();
+        throw ValueError(ss.str());
+    }
+}
+
+//
+// interpolates data to other domain
+//
+void FinleyDomain::interpolateAcross(escript::Data& /*target*/,
+                                    const escript::Data& /*source*/) const
+{
+    throw NotImplementedError("Finley does not allow interpolation across "
+                              "domains.");
+}
+
+//
+// calculates the integral of a function defined on arg
+//
+void FinleyDomain::setToIntegrals(vector<double>& integrals,
+                                  const escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToIntegrals: Illegal domain of integration kernel");
+
+    switch (arg.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+        case ReducedNodes:
+        case DegreesOfFreedom:
+        case ReducedDegreesOfFreedom:
+        {
+            escript::Data temp(arg, escript::function(*this));
+            Assemble_integrate(m_nodes, m_elements, temp, &integrals[0]);
+        }
+        break;
+        case Elements:
+        case ReducedElements:
+            Assemble_integrate(m_nodes, m_elements, arg, &integrals[0]);
+        break;
+        case FaceElements:
+        case ReducedFaceElements:
+            Assemble_integrate(m_nodes, m_faceElements, arg, &integrals[0]);
+        break;
+        case Points:
+            throw ValueError("Integral of data on points is not supported.");
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            Assemble_integrate(m_nodes, m_contactElements, arg, &integrals[0]);
+        break;
+        default:
+            stringstream ss;
+            ss << "setToIntegrals: Finley does not know anything about "
+                "function space type " << arg.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// calculates the gradient of arg
+//
+void FinleyDomain::setToGradient(escript::Data& grad, const escript::Data& arg) const
+{
+    if (*arg.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToGradient: Illegal domain of gradient argument");
+    if (*grad.getFunctionSpace().getDomain() != *this)
+        throw ValueError("setToGradient: Illegal domain of gradient");
+
+    escript::Data nodeData;
+    if (getMPISize() > 1) {
+        if (arg.getFunctionSpace().getTypeCode() == DegreesOfFreedom) {
+            nodeData = escript::Data(arg, continuousFunction(*this));
+        } else if(arg.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
+            nodeData = escript::Data(arg, reducedContinuousFunction(*this));
+        } else {
+            nodeData = arg;
+        }
+    } else {
+        nodeData = arg;
+    }
+    switch (grad.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            throw ValueError("Gradient at nodes is not supported.");
+        case ReducedNodes:
+            throw ValueError("Gradient at reduced nodes is not supported.");
+        case Elements:
+        case ReducedElements:
+            Assemble_gradient(m_nodes, m_elements, grad, nodeData);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            Assemble_gradient(m_nodes, m_faceElements, grad, nodeData);
+            break;
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            Assemble_gradient(m_nodes, m_contactElements, grad, nodeData);
+        break;
+        case Points:
+            throw ValueError("Gradient at points is not supported.");
+        case DegreesOfFreedom:
+            throw ValueError("Gradient at degrees of freedom is not supported.");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("Gradient at reduced degrees of freedom is not supported.");
+        default:
+            stringstream ss;
+            ss << "Gradient: Finley does not know anything about function "
+                  "space type " << arg.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// returns the size of elements
+//
+void FinleyDomain::setToSize(escript::Data& size) const
+{
+    switch (size.getFunctionSpace().getTypeCode()) {
+        case Nodes:
+            throw ValueError("Size of nodes is not supported.");
+        case ReducedNodes:
+            throw ValueError("Size of reduced nodes is not supported.");
+        case Elements:
+        case ReducedElements:
+            Assemble_getSize(m_nodes, m_elements, size);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            Assemble_getSize(m_nodes, m_faceElements, size);
+            break;
+        case ContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            Assemble_getSize(m_nodes, m_contactElements, size);
+            break;
+        case Points:
+            throw ValueError("Size of point elements is not supported.");
+        case DegreesOfFreedom:
+            throw ValueError("Size of degrees of freedom is not supported.");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("Size of reduced degrees of freedom is not supported.");
+        default:
+            stringstream ss;
+            ss << "setToSize: Finley does not know anything about function "
+                  "space type " << size.getFunctionSpace().getTypeCode();
+            throw ValueError(ss.str());
+    }
+}
+
+//
+// sets the location of nodes
+//
+void FinleyDomain::setNewX(const escript::Data& newX)
+{
+    if (*newX.getFunctionSpace().getDomain() != *this)
+        throw ValueError("Illegal domain of new point locations");
+
+    if (newX.getFunctionSpace() == continuousFunction(*this)) {
+        m_nodes->setCoordinates(newX);
+    } else {
+        throw ValueError("As of escript version 3.3 setNewX only accepts "
+                         "ContinuousFunction arguments. Please interpolate.");
+    }
+}
+
+bool FinleyDomain::ownSample(int fs_code, index_t id) const
+{
+#ifdef ESYS_MPI
+    if (getMPISize() > 1 && fs_code != FINLEY_DEGREES_OF_FREEDOM &&
+            fs_code != FINLEY_REDUCED_DEGREES_OF_FREEDOM) {
+        /*
+         * this method is only used by saveDataCSV which would use the returned
+         * values for reduced nodes wrongly so this case is disabled for now
+        if (fs_code == FINLEY_REDUCED_NODES) {
+            myFirstNode = NodeFile_getFirstReducedNode(mesh_p->Nodes);
+            myLastNode = NodeFile_getLastReducedNode(mesh_p->Nodes);
+            globalNodeIndex = NodeFile_borrowGlobalReducedNodesIndex(mesh_p->Nodes);
+        } else
+        */
+        if (fs_code == Nodes) {
+            const index_t myFirstNode = m_nodes->getFirstNode();
+            const index_t myLastNode = m_nodes->getLastNode();
+            const index_t k = m_nodes->borrowGlobalNodesIndex()[id];
+            return (myFirstNode <= k && k < myLastNode);
+        } else {
+            throw ValueError("ownSample: unsupported function space type");
+        }
+    }
+#endif
+    return true;
+}
+
+//
+// creates a stiffness matrix and initializes it with zeros
+//
+escript::ASM_ptr FinleyDomain::newSystemMatrix(int row_blocksize,
+                            const escript::FunctionSpace& row_functionspace,
+                            int column_blocksize,
+                            const escript::FunctionSpace& column_functionspace,
+                            int type) const
+{
+    // is the domain right?
+    if (*row_functionspace.getDomain() != *this)
+        throw ValueError("domain of row function space does not match the domain of matrix generator.");
+    if (*column_functionspace.getDomain() != *this)
+        throw ValueError("domain of column function space does not match the domain of matrix generator.");
+
+    bool reduceRowOrder = false;
+    bool reduceColOrder = false;
+    // is the function space type right?
+    if (row_functionspace.getTypeCode() == ReducedDegreesOfFreedom) {
+        reduceRowOrder = true;
+    } else if (row_functionspace.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for system matrix rows.");
+    }
+    if (column_functionspace.getTypeCode() == ReducedDegreesOfFreedom) {
+        reduceColOrder = true;
+    } else if (column_functionspace.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for system matrix columns.");
+    }
+
+    // generate matrix
+    if (type & (int)SMT_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        if (reduceRowOrder != reduceColOrder)
+            throw ValueError("element order of matrix rows and columns must "
+                             "match when using Trilinos");
+        const_TrilinosGraph_ptr graph(getTrilinosGraph(reduceRowOrder));
+        bool isComplex = (type & (int)SMT_COMPLEX);
+        bool unroll = (type & (int)SMT_UNROLL);
+        escript::ASM_ptr sm(new TrilinosMatrixAdapter(m_mpiInfo, row_blocksize,
+                    row_functionspace, graph, isComplex, unroll));
+        return sm;
+#else
+        throw FinleyException("newSystemMatrix: finley was not compiled "
+                "with Trilinos support so the Trilinos solver stack cannot be "
+                "used.");
+#endif
+    } else if (type & (int)SMT_PASO) {
+#ifdef ESYS_HAVE_PASO
+        paso::SystemMatrixPattern_ptr pattern(getPasoPattern(
+                                            reduceRowOrder, reduceColOrder));
+        paso::SystemMatrix_ptr sm(new paso::SystemMatrix(type, pattern,
+                  row_blocksize, column_blocksize, false, row_functionspace,
+                  column_functionspace));
+        return sm;
+#else
+        throw FinleyException("newSystemMatrix: finley was not compiled "
+                "with Paso support so the Paso solver stack cannot be used.");
+#endif
+    } else {
+        throw FinleyException("newSystemMatrix: unknown matrix type ID");
+    }
+}
+
+//
+// creates a TransportProblem
+//
+escript::ATP_ptr FinleyDomain::newTransportProblem(int blocksize,
+                                             const escript::FunctionSpace& fs,
+                                             int type) const
+{
+    // is the domain right?
+    if (*fs.getDomain() != *this)
+        throw ValueError("domain of function space does not match the domain of transport problem generator.");
+
+#ifdef ESYS_HAVE_PASO
+    // is the function space type right
+    bool reduceOrder = false;
+    if (fs.getTypeCode() == ReducedDegreesOfFreedom) {
+        reduceOrder = true;
+    } else if (fs.getTypeCode() != DegreesOfFreedom) {
+        throw ValueError("illegal function space type for transport problem.");
+    }
+
+    // generate transport problem
+    paso::SystemMatrixPattern_ptr pattern(getPasoPattern(
+                                                  reduceOrder, reduceOrder));
+    paso::TransportProblem_ptr transportProblem(new paso::TransportProblem(
+                                              pattern, blocksize, fs));
+    return transportProblem;
+#else
+    throw FinleyException("Transport problems require the Paso library which "
+                          "is not available.");
+#endif
+}
+
+//
+// returns true if data on functionSpaceCode is considered as being cell centered
+//
+bool FinleyDomain::isCellOriented(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+        case DegreesOfFreedom:
+        case ReducedDegreesOfFreedom:
+            return false;
+        case Elements:
+        case FaceElements:
+        case Points:
+        case ContactElementsZero:
+        case ContactElementsOne:
+        case ReducedElements:
+        case ReducedFaceElements:
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            return true;
+    }
+    stringstream ss;
+    ss << "isCellOriented: Finley does not know anything about "
+          "function space type " << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+bool
+FinleyDomain::commonFunctionSpace(const vector<int>& fs, int& resultcode) const
+{
+    if (fs.empty())
+        return false;
+    // The idea is to use equivalence classes, i.e. types which can be
+    // interpolated back and forth
+    //    class 1: DOF <-> Nodes
+    //    class 2: ReducedDOF <-> ReducedNodes
+    //    class 3: Points
+    //    class 4: Elements
+    //    class 5: ReducedElements
+    //    class 6: FaceElements
+    //    class 7: ReducedFaceElements
+    //    class 8: ContactElementZero <-> ContactElementOne
+    //    class 9: ReducedContactElementZero <-> ReducedContactElementOne
+
+    // There is also a set of lines. Interpolation is possible down a line but
+    // not between lines.
+    // class 1 and 2 belong to all lines so aren't considered.
+    //    line 0: class 3
+    //    line 1: class 4,5
+    //    line 2: class 6,7
+    //    line 3: class 8,9
+
+    // For classes with multiple members (e.g. class 2) we have vars to record
+    // if there is at least one instance.
+    // e.g. hasnodes is true if we have at least one instance of Nodes.
+    vector<int> hasclass(10);
+    vector<int> hasline(4);
+    bool hasnodes = false;
+    bool hasrednodes = false;
+    bool hascez = false;
+    bool hasrcez = false;
+    for (int i = 0; i < fs.size(); ++i) {
+        switch (fs[i]) {
+            case Nodes:
+                hasnodes = true; // fall through
+            case DegreesOfFreedom:
+                hasclass[1] = 1;
+                break;
+            case ReducedNodes:
+                hasrednodes = true; // fall through
+            case ReducedDegreesOfFreedom:
+                hasclass[2] = 1;
+                break;
+            case Points:
+                hasline[0] = 1;
+                hasclass[3] = 1;
+                break;
+            case Elements:
+                hasclass[4] = 1;
+                hasline[1] = 1;
+                break;
+            case ReducedElements:
+                hasclass[5] = 1;
+                hasline[1] = 1;
+                break;
+            case FaceElements:
+                hasclass[6] = 1;
+                hasline[2] = 1;
+                break;
+            case ReducedFaceElements:
+                hasclass[7] = 1;
+                hasline[2] = 1;
+                break;
+            case ContactElementsZero:
+                hascez = true; // fall through
+            case ContactElementsOne:
+                hasclass[8] = 1;
+                hasline[3] = 1;
+                break;
+            case ReducedContactElementsZero:
+                hasrcez = true; // fall through
+            case ReducedContactElementsOne:
+                hasclass[9] = 1;
+                hasline[3] = 1;
+                break;
+            default:
+                return false;
+        }
+    }
+    int totlines = hasline[0]+hasline[1]+hasline[2]+hasline[3];
+
+    // fail if we have more than one leaf group
+    if (totlines > 1)
+        // there are at least two branches we can't interpolate between
+        return false;
+    else if (totlines == 1) {
+        if (hasline[0] == 1) // we have points
+            resultcode = Points;
+        else if (hasline[1] == 1) {
+            if (hasclass[5] == 1)
+                resultcode=ReducedElements;
+            else
+                resultcode=Elements;
+        } else if (hasline[2] == 1) {
+            if (hasclass[7] == 1)
+                resultcode=ReducedFaceElements;
+            else
+                resultcode=FaceElements;
+        } else { // so we must be in line3
+            if (hasclass[9] == 1) {
+                // need something from class 9
+                resultcode = (hasrcez ? ReducedContactElementsZero : ReducedContactElementsOne);
+            } else {
+                // something from class 8
+                resultcode = (hascez ? ContactElementsZero : ContactElementsOne);
+            }
+        }
+    } else { // totlines==0
+        if (hasclass[2] == 1) {
+            // something from class 2
+            resultcode = (hasrednodes ? ReducedNodes : ReducedDegreesOfFreedom);
+        } else { 
+            // something from class 1
+            resultcode = (hasnodes ? Nodes : DegreesOfFreedom);
+        }
+    }
+    return true;
+}
+
+bool FinleyDomain::probeInterpolationOnDomain(int functionSpaceType_source,
+                                              int functionSpaceType_target) const
+{
+    switch(functionSpaceType_source) {
+        case Nodes:
+            switch (functionSpaceType_target) {
+                case Nodes:
+                case ReducedNodes:
+                case ReducedDegreesOfFreedom:
+                case DegreesOfFreedom:
+                case Elements:
+                case ReducedElements:
+                case FaceElements:
+                case ReducedFaceElements:
+                case Points:
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    return true;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Finley does not know "
+                          "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+        case ReducedNodes:
+            switch(functionSpaceType_target) {
+                case ReducedNodes:
+                case ReducedDegreesOfFreedom:
+                case Elements:
+                case ReducedElements:
+                case FaceElements:
+                case ReducedFaceElements:
+                case Points:
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    return true;
+                case Nodes:
+                case DegreesOfFreedom:
+                    return false;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Finley does not know "
+                          "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+        case Elements:
+            return (functionSpaceType_target == Elements ||
+                    functionSpaceType_target == ReducedElements);
+        case ReducedElements:
+            return (functionSpaceType_target == ReducedElements);
+        case FaceElements:
+            return (functionSpaceType_target == FaceElements ||
+                    functionSpaceType_target == ReducedFaceElements);
+        case ReducedFaceElements:
+            return (functionSpaceType_target == ReducedFaceElements);
+        case Points:
+            return (functionSpaceType_target == Points);
+        case ContactElementsZero:
+        case ContactElementsOne:
+            return (functionSpaceType_target == ContactElementsZero ||
+                    functionSpaceType_target == ContactElementsOne ||
+                    functionSpaceType_target == ReducedContactElementsZero ||
+                    functionSpaceType_target == ReducedContactElementsOne);
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            return (functionSpaceType_target == ReducedContactElementsZero ||
+                    functionSpaceType_target == ReducedContactElementsOne);
+        case DegreesOfFreedom:
+            switch (functionSpaceType_target) {
+                case ReducedDegreesOfFreedom:
+                case DegreesOfFreedom:
+                case Nodes:
+                case ReducedNodes:
+                case Elements:
+                case ReducedElements:
+                case Points:
+                case FaceElements:
+                case ReducedFaceElements:
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    return true;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Finley does not know "
+                          "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+        case ReducedDegreesOfFreedom:
+            switch(functionSpaceType_target) {
+                case ReducedDegreesOfFreedom:
+                case ReducedNodes:
+                case Elements:
+                case ReducedElements:
+                case FaceElements:
+                case ReducedFaceElements:
+                case Points:
+                case ContactElementsZero:
+                case ReducedContactElementsZero:
+                case ContactElementsOne:
+                case ReducedContactElementsOne:
+                    return true;
+                case Nodes:
+                case DegreesOfFreedom:
+                    return false;
+                default:
+                    stringstream ss;
+                    ss << "Interpolation On Domain: Finley does not know "
+                          "anything about function space type "
+                       << functionSpaceType_target;
+                    throw ValueError(ss.str());
+            }
+    }
+    stringstream ss;
+    ss << "Interpolation On Domain: Finley does not know anything "
+          "about function space type " << functionSpaceType_source;
+    throw ValueError(ss.str());
+}
+
+signed char FinleyDomain::preferredInterpolationOnDomain(
+        int functionSpaceType_source, int functionSpaceType_target) const
+{
+    if (probeInterpolationOnDomain(functionSpaceType_source, functionSpaceType_target))
+        return 1;
+    if (probeInterpolationOnDomain(functionSpaceType_target, functionSpaceType_source))
+        return -1;
+
+    return 0;
+}
+
+bool FinleyDomain::probeInterpolationAcross(int /*source*/,
+        const AbstractDomain& /*targetDomain*/, int /*target*/) const
+{
+    return false;
+}
+
+bool FinleyDomain::operator==(const AbstractDomain& other) const
+{
+    const FinleyDomain* temp = dynamic_cast<const FinleyDomain*>(&other);
+    if (temp) {
+        return (m_nodes == temp->m_nodes &&
+                m_elements == temp->m_elements &&
+                m_faceElements == temp->m_faceElements &&
+                m_contactElements == temp->m_contactElements &&
+                m_points == temp->m_points);
+    }
+    return false;
+}
+
+bool FinleyDomain::operator!=(const AbstractDomain& other) const
+{
+    return !(operator==(other));
+}
+
+int FinleyDomain::getSystemMatrixTypeId(const bp::object& options) const
+{
+    const escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy>(options);
+
+    int package = sb.getPackage();
+    escript::SolverOptions method = sb.getSolverMethod();
+#ifdef ESYS_HAVE_TRILINOS
+    bool isDirect = escript::isDirectSolver(method);
+#endif
+
+    // the configuration of finley should have taken care that we have either
+    // paso or trilinos so here's how we prioritize
+#if defined(ESYS_HAVE_PASO) && defined(ESYS_HAVE_TRILINOS)
+    // we have Paso & Trilinos so use Trilinos for parallel direct solvers and
+    // for complex problems
+    if (package == escript::SO_DEFAULT) {
+        if ((method == escript::SO_METHOD_DIRECT && getMPISize() > 1)
+                || isDirect
+                || sb.isComplex()) {
+            package = escript::SO_PACKAGE_TRILINOS;
+        }
+    }
+#endif
+#ifdef ESYS_HAVE_PASO
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_PASO;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_TRILINOS;
+#endif
+    if (package == escript::SO_PACKAGE_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        int type = (int)SMT_TRILINOS;
+        if (sb.isComplex())
+            type |= (int)SMT_COMPLEX;
+        // This is required because MueLu (AMG) and Amesos2 (direct) do not
+        // support block matrices at this point. Remove if they ever do...
+        if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_AMG ||
+                sb.getPreconditioner() == escript::SO_PRECONDITIONER_ILUT ||
+                isDirect) {
+            type |= (int)SMT_UNROLL;
+        }
+        return type;
+#else
+        throw FinleyException("Trilinos requested but not built with Trilinos.");
+#endif
+    }
+#ifdef ESYS_HAVE_PASO
+    if (sb.isComplex()) {
+        throw NotImplementedError("Paso does not support complex-valued matrices");
+    }
+    return (int)SMT_PASO | paso::SystemMatrix::getSystemMatrixTypeId(
+                method, sb.getPreconditioner(), sb.getPackage(),
+                sb.isSymmetric(), m_mpiInfo);
+#else
+    throw FinleyException("Unable to find a working solver library!");
+#endif
+}
+
+int FinleyDomain::getTransportTypeId(int solver, int preconditioner,
+                                    int package, bool symmetry) const
+{
+#ifdef ESYS_HAVE_PASO
+    return paso::TransportProblem::getTypeId(solver, preconditioner, package,
+                                             symmetry, getMPI());
+#else
+    throw FinleyException("Transport solvers require Paso but finley was not "
+                          "compiled with Paso!");
+#endif
+}
+
+escript::Data FinleyDomain::getX() const
+{
+    return continuousFunction(*this).getX();
+}
+
+escript::Data FinleyDomain::getNormal() const
+{
+    return functionOnBoundary(*this).getNormal();
+}
+
+escript::Data FinleyDomain::getSize() const
+{
+    return escript::function(*this).getSize();
+}
+
+const index_t* FinleyDomain::borrowSampleReferenceIDs(int functionSpaceType) const
+{
+    index_t* out = NULL;
+    switch (functionSpaceType) {
+        case Nodes:
+            out = m_nodes->Id;
+            break;
+        case ReducedNodes:
+            out = m_nodes->reducedNodesId;
+            break;
+        case Elements:
+        case ReducedElements:
+            out = m_elements->Id;
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            out = m_faceElements->Id;
+            break;
+        case Points:
+            out = m_points->Id;
+            break;
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            out = m_contactElements->Id;
+            break;
+        case DegreesOfFreedom:
+            out = m_nodes->degreesOfFreedomId;
+            break;
+        case ReducedDegreesOfFreedom:
+            out = m_nodes->reducedDegreesOfFreedomId;
+            break;
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceType
+               << " for domain: " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return out;
+}
+int FinleyDomain::getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const
+{
+    int out = 0;
+    switch (functionSpaceType) {
+        case Nodes:
+            out = m_nodes->Tag[sampleNo];
+            break;
+        case ReducedNodes:
+            throw ValueError("ReducedNodes does not support tags.");
+        case Elements:
+        case ReducedElements:
+            out = m_elements->Tag[sampleNo];
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            out = m_faceElements->Tag[sampleNo];
+            break;
+        case Points:
+            out = m_points->Tag[sampleNo];
+            break;
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            out = m_contactElements->Tag[sampleNo];
+            break;
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags.");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("ReducedDegreesOfFreedom does not support tags.");
+        default:
+            stringstream ss;
+            ss << "Invalid function space type: " << functionSpaceType
+               << " for domain: " << getDescription();
+            throw ValueError(ss.str());
+    }
+    return out;
+}
+
+
+void FinleyDomain::setTags(int functionSpaceType, int newTag, const escript::Data& mask) const
+{
+    switch (functionSpaceType) {
+        case Nodes:
+            m_nodes->setTags(newTag, mask);
+            break;
+        case ReducedNodes:
+            throw ValueError("ReducedNodes does not support tags");
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("ReducedDegreesOfFreedom does not support tags");
+        case Elements:
+        case ReducedElements:
+            m_elements->setTags(newTag, mask);
+            break;
+        case FaceElements:
+        case ReducedFaceElements:
+            m_faceElements->setTags(newTag, mask);
+            break;
+        case Points:
+            m_points->setTags(newTag, mask);
+            break;
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            m_contactElements->setTags(newTag, mask);
+            break;
+        default:
+            stringstream ss;
+            ss << "Finley does not know anything about function space type "
+               << functionSpaceType;
+            throw ValueError(ss.str());
+    }
+}
+
+void FinleyDomain::setTagMap(const string& name, int tag)
+{
+    m_tagMap[name] = tag;
+}
+
+int FinleyDomain::getTag(const string& name) const
+{
+    TagMap::const_iterator it = m_tagMap.find(name);
+    if (it == m_tagMap.end()) {
+        stringstream ss;
+        ss << "getTag: unknown tag name " << name << ".";
+        throw escript::ValueError(ss.str());
+    }
+    return it->second;
+}
+
+bool FinleyDomain::isValidTagName(const string& name) const
+{
+    return (m_tagMap.count(name) > 0);
+}
+
+string FinleyDomain::showTagNames() const
+{
+    stringstream ss;
+    TagMap::const_iterator it = m_tagMap.begin();
+    while (it != m_tagMap.end()) {
+        ss << it->first;
+        ++it;
+        if (it != m_tagMap.end())
+            ss << ", ";
+    }
+    return ss.str();
+}
+
+int FinleyDomain::getNumberOfTagsInUse(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+            return m_nodes->tagsInUse.size();
+        case ReducedNodes:
+            throw ValueError("ReducedNodes does not support tags");
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("ReducedDegreesOfFreedom does not support tags");
+        case Elements:
+        case ReducedElements:
+            return m_elements->tagsInUse.size();
+        case FaceElements:
+        case ReducedFaceElements:
+            return m_faceElements->tagsInUse.size();
+        case Points:
+            return m_points->tagsInUse.size();
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            return m_contactElements->tagsInUse.size();
+    }
+    stringstream ss;
+    ss << "Finley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+const int* FinleyDomain::borrowListOfTagsInUse(int functionSpaceCode) const
+{
+    switch (functionSpaceCode) {
+        case Nodes:
+            if (m_nodes->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_nodes->tagsInUse[0];
+        case ReducedNodes:
+            throw ValueError("ReducedNodes does not support tags");
+        case DegreesOfFreedom:
+            throw ValueError("DegreesOfFreedom does not support tags");
+        case ReducedDegreesOfFreedom:
+            throw ValueError("ReducedDegreesOfFreedom does not support tags");
+        case Elements:
+        case ReducedElements:
+            if (m_elements->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_elements->tagsInUse[0];
+        case FaceElements:
+        case ReducedFaceElements:
+            if (m_faceElements->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_faceElements->tagsInUse[0];
+        case Points:
+            if (m_points->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_points->tagsInUse[0];
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            if (m_contactElements->tagsInUse.empty())
+                return NULL;
+            else
+                return &m_contactElements->tagsInUse[0];
+    }
+    stringstream ss;
+    ss << "Finley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+bool FinleyDomain::canTag(int functionSpaceCode) const
+{
+    switch(functionSpaceCode) {
+        case Nodes:
+        case Elements:
+        case ReducedElements:
+        case FaceElements:
+        case ReducedFaceElements:
+        case Points:
+        case ContactElementsZero:
+        case ReducedContactElementsZero:
+        case ContactElementsOne:
+        case ReducedContactElementsOne:
+            return true;
+        default:
+            return false;
+    }
+}
+
+FinleyDomain::StatusType FinleyDomain::getStatus() const
+{
+    return m_nodes->status;
+}
+
+int FinleyDomain::getApproximationOrder(int functionSpaceCode) const
+{
+    switch(functionSpaceCode) {
+        case Nodes:
+        case DegreesOfFreedom:
+            return approximationOrder;
+        case ReducedNodes:
+        case ReducedDegreesOfFreedom:
+            return reducedApproximationOrder;
+        case Elements:
+        case FaceElements:
+        case Points:
+        case ContactElementsZero:
+        case ContactElementsOne:
+            return integrationOrder;
+        case ReducedElements:
+        case ReducedFaceElements:
+        case ReducedContactElementsZero:
+        case ReducedContactElementsOne:
+            return reducedIntegrationOrder;
+    }
+    stringstream ss;
+    ss << "Finley does not know anything about function space type "
+       << functionSpaceCode;
+    throw ValueError(ss.str());
+}
+
+escript::Data FinleyDomain::randomFill(
+                                const escript::DataTypes::ShapeType& shape,
+                                const escript::FunctionSpace& what, long seed,
+                                const bp::tuple& filter) const
+{
+    escript::Data towipe(0, shape, what, true);
+    // since we just made this object, no sharing is possible and we don't
+    // need to check for exclusive write
+    escript::DataTypes::RealVectorType& dv(towipe.getExpandedVectorReference());
+    escript::randomFillArray(seed, &dv[0], dv.size());
+    return towipe;
+}
+
+/// prepares the mesh for further use
+void FinleyDomain::prepare(bool optimize)
+{
+    setOrders();
+
+    // first step is to distribute the elements according to a global
+    // distribution of DOF
+    IndexVector distribution(m_mpiInfo->size + 1);
+
+    // first we create dense labeling for the DOFs
+    dim_t newGlobalNumDOFs = m_nodes->createDenseDOFLabeling();
+
+    // create a distribution of the global DOFs and determine the MPI rank
+    // controlling the DOFs on this processor
+    m_mpiInfo->setDistribution(0, newGlobalNumDOFs - 1, &distribution[0]);
+
+    // now the mesh is re-distributed according to the distribution vector
+    // this will redistribute the Nodes and Elements including overlap and
+    // will create an element colouring but will not create any mappings
+    // (see later in this function)
+    distributeByRankOfDOF(distribution);
+
+    // at this stage we are able to start an optimization of the DOF
+    // distribution using ParaMetis. On return distribution is altered and
+    // new DOF IDs have been assigned
+    if (optimize && m_mpiInfo->size > 1) {
+        optimizeDOFDistribution(distribution);
+        distributeByRankOfDOF(distribution);
+    }
+    // the local labelling of the degrees of freedom is optimized
+    if (optimize) {
+        optimizeDOFLabeling(distribution);
+    }
+
+    // rearrange elements with the aim of bringing elements closer to memory
+    // locations of the nodes (distributed shared memory!):
+    optimizeElementOrdering();
+
+    // create the global indices
+    std::vector<short> maskReducedNodes(m_nodes->getNumNodes(), -1);
+    IndexVector nodeDistribution(m_mpiInfo->size + 1);
+    markNodes(maskReducedNodes, 0, true);
+    IndexVector indexReducedNodes = util::packMask(maskReducedNodes);
+
+    m_nodes->createDenseNodeLabeling(nodeDistribution, distribution);
+    // created reduced DOF labeling
+    m_nodes->createDenseReducedLabeling(maskReducedNodes, false);
+    // created reduced node labeling
+    m_nodes->createDenseReducedLabeling(maskReducedNodes, true);
+    // create the missing mappings
+    m_nodes->createNodeMappings(indexReducedNodes, distribution, nodeDistribution);
+
+    updateTagList();
+}
+
+/// redistributes the Nodes and Elements including overlap
+/// according to the DOF distribution. It will create an element colouring
+/// but will not create any mappings.
+void FinleyDomain::distributeByRankOfDOF(const std::vector<index_t>& dof_distribution)
+{
+    std::vector<int> mpiRankOfDOF(m_nodes->getNumNodes());
+    m_nodes->assignMPIRankToDOFs(mpiRankOfDOF, dof_distribution);
+
+    // first, the elements are redistributed according to mpiRankOfDOF
+    // at the input the Node tables refer to the local labeling of the nodes
+    // while at the output they refer to the global labeling which is rectified
+    // in the next step
+    m_elements->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+    m_faceElements->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+    m_contactElements->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+    m_points->distributeByRankOfDOF(mpiRankOfDOF, m_nodes->Id);
+
+    resolveNodeIds();
+
+    // create a local labeling of the DOFs
+    const std::pair<index_t,index_t> dof_range(m_nodes->getDOFRange());
+    const index_t len = dof_range.second-dof_range.first+1;
+    // local mask for used nodes
+    std::vector<index_t> localDOF_mask(len, -1);
+    std::vector<index_t> localDOF_map(m_nodes->getNumNodes(), -1);
+
+#pragma omp parallel for
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+#ifdef BOUNDS_CHECK
+        ESYS_ASSERT(m_nodes->globalDegreesOfFreedom[n]-dof_range.first < len, "BOUNDS_CHECK");
+        ESYS_ASSERT(m_nodes->globalDegreesOfFreedom[n]-dof_range.first >= 0, "BOUNDS_CHECK");
+#endif
+        localDOF_mask[m_nodes->globalDegreesOfFreedom[n]-dof_range.first] = n;
+    }
+
+    index_t numDOFs = 0;
+    for (index_t n = 0; n < len; n++) {
+        const index_t k = localDOF_mask[n];
+        if (k >= 0) {
+             localDOF_mask[n] = numDOFs;
+             numDOFs++;
+          }
+    }
+#pragma omp parallel for
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+        const index_t k = localDOF_mask[m_nodes->globalDegreesOfFreedom[n]-dof_range.first];
+        localDOF_map[n] = k;
+    }
+    // create element coloring
+    createColoring(localDOF_map);
+}
+
+/// optimizes the labeling of the DOFs on each processor
+void FinleyDomain::optimizeDOFLabeling(const IndexVector& distribution)
+{
+    // this method relies on Pattern::reduceBandwidth so requires PASO
+    // at the moment
+#ifdef ESYS_HAVE_PASO
+    const int myRank = getMPIRank();
+    const int mpiSize = getMPISize();
+    const index_t myFirstVertex = distribution[myRank];
+    const index_t myLastVertex = distribution[myRank+1];
+    const dim_t myNumVertices = myLastVertex-myFirstVertex;
+    dim_t len = 0;
+    for (int p = 0; p < mpiSize; ++p)
+        len=std::max(len, distribution[p+1]-distribution[p]);
+
+    boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
+    boost::scoped_array<index_t> newGlobalDOFID(new index_t[len]);
+
+    // create the adjacency structure xadj and adjncy
+#pragma omp parallel
+    {
+        // insert contributions from element matrices into columns index
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                myFirstVertex, myLastVertex, m_elements,
+                m_nodes->globalDegreesOfFreedom,
+                m_nodes->globalDegreesOfFreedom);
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                myFirstVertex, myLastVertex, m_faceElements,
+                m_nodes->globalDegreesOfFreedom,
+                m_nodes->globalDegreesOfFreedom);
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                myFirstVertex, myLastVertex, m_contactElements,
+                m_nodes->globalDegreesOfFreedom,
+                m_nodes->globalDegreesOfFreedom);
+        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
+                myFirstVertex, myLastVertex, m_points,
+                m_nodes->globalDegreesOfFreedom,
+                m_nodes->globalDegreesOfFreedom);
+    }
+    // create the local matrix pattern
+    paso::Pattern_ptr pattern = paso::Pattern::fromIndexListArray(0,
+            myNumVertices, index_list.get(), myFirstVertex, myLastVertex,
+            -myFirstVertex);
+
+    pattern->reduceBandwidth(&newGlobalDOFID[0]);
+
+    // shift new labeling to create a global id
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumVertices; ++i)
+        newGlobalDOFID[i] += myFirstVertex;
+
+    // distribute new labeling to other processors
+#ifdef ESYS_MPI
+    const int dest = m_mpiInfo->mod_rank(myRank + 1);
+    const int source = m_mpiInfo->mod_rank(myRank - 1);
+#endif
+    int current_rank = myRank;
+    for (int p = 0; p < mpiSize; ++p) {
+        const index_t firstVertex = distribution[current_rank];
+        const index_t lastVertex = distribution[current_rank + 1];
+#pragma omp parallel for
+        for (index_t i = 0; i < m_nodes->getNumNodes(); ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i];
+            if (firstVertex <= k && k < lastVertex) {
+                m_nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k-firstVertex];
+            }
+        }
+
+        if (p < mpiSize - 1) { // the final send can be skipped
+#ifdef ESYS_MPI
+            MPI_Status status;
+            MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T,
+                                 dest, m_mpiInfo->counter(), source,
+                                 m_mpiInfo->counter(), m_mpiInfo->comm, &status);
+            m_mpiInfo->incCounter();
+#endif
+            current_rank = m_mpiInfo->mod_rank(current_rank - 1);
+        }
+    }
+#endif // ESYS_HAVE_PASO
+}
+
+void FinleyDomain::resolveNodeIds()
+{
+    // find the minimum and maximum id used by elements
+    index_t min_id = escript::DataTypes::index_t_max();
+    index_t max_id = -escript::DataTypes::index_t_max();
+    std::pair<index_t,index_t> range(m_elements->getNodeRange());
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+    range = m_faceElements->getNodeRange();
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+    range = m_contactElements->getNodeRange();
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+    range = m_points->getNodeRange();
+    max_id = std::max(max_id, range.second);
+    min_id = std::min(min_id, range.first);
+#ifdef Finley_TRACE
+    index_t global_min_id, global_max_id;
+#ifdef ESYS_MPI
+    index_t id_range[2], global_id_range[2];
+    id_range[0] = -min_id;
+    id_range[1] = max_id;
+    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, m_mpiInfo->comm);
+    global_min_id = -global_id_range[0];
+    global_max_id = global_id_range[1];
+#else
+    global_min_id = min_id;
+    global_max_id = max_id;
+#endif
+    printf("Node id range used by elements is %d:%d\n", global_min_id, global_max_id);
+#endif
+    if (min_id > max_id) {
+        max_id = -1;
+        min_id = 0;
+    }
+
+    // allocate mappings for new local node labeling to global node labeling
+    // (newLocalToGlobalNodeLabels) and global node labeling to the new local
+    // node labeling (globalToNewLocalNodeLabels[i-min_id] is the new local id
+    // of global node i)
+    index_t len = (max_id >= min_id) ? max_id - min_id + 1 : 0;
+
+    // mark the nodes referred by elements in usedMask
+    std::vector<short> usedMask(len, -1);
+    markNodes(usedMask, min_id, false);
+
+    // create a local labeling newLocalToGlobalNodeLabels of the local nodes
+    // by packing the mask usedMask
+    std::vector<index_t> newLocalToGlobalNodeLabels =  util::packMask(usedMask);
+    const dim_t newNumNodes = newLocalToGlobalNodeLabels.size();
+
+    usedMask.clear();
+
+    // invert the new labeling and shift the index newLocalToGlobalNodeLabels
+    // to global node IDs
+    std::vector<index_t> globalToNewLocalNodeLabels(len, -1);
+
+#pragma omp parallel for
+    for (index_t n = 0; n < newNumNodes; n++) {
+#ifdef BOUNDS_CHECK
+        ESYS_ASSERT(newLocalToGlobalNodeLabels[n] < len, "BOUNDS_CHECK");
+        ESYS_ASSERT(newLocalToGlobalNodeLabels[n] >= 0, "BOUNDS_CHECK");
+#endif
+        globalToNewLocalNodeLabels[newLocalToGlobalNodeLabels[n]] = n;
+        newLocalToGlobalNodeLabels[n] += min_id;
+    }
+    // create a new node file
+    NodeFile* newNodeFile = new NodeFile(getDim(), m_mpiInfo);
+    newNodeFile->allocTable(newNumNodes);
+    if (len)
+        newNodeFile->gather_global(&newLocalToGlobalNodeLabels[0], m_nodes);
+    else
+        newNodeFile->gather_global(NULL, m_nodes);
+
+    delete m_nodes;
+    m_nodes = newNodeFile;
+    // relabel nodes of the elements
+    relabelElementNodes(globalToNewLocalNodeLabels, min_id);
+}
+
+/// tries to reduce the number of colours for all element files
+void FinleyDomain::createColoring(const IndexVector& dofMap)
+{
+    m_elements->createColoring(dofMap);
+    m_faceElements->createColoring(dofMap);
+    m_points->createColoring(dofMap);
+    m_contactElements->createColoring(dofMap);
+}
+
+/// redistributes elements to minimize communication during assemblage
+void FinleyDomain::optimizeElementOrdering()
+{
+    m_elements->optimizeOrdering();
+    m_faceElements->optimizeOrdering();
+    m_points->optimizeOrdering();
+    m_contactElements->optimizeOrdering();
+}
+
+/// regenerates list of tags in use for node file and element files
+void FinleyDomain::updateTagList()
+{
+    m_nodes->updateTagList();
+    m_elements->updateTagList();
+    m_faceElements->updateTagList();
+    m_points->updateTagList();
+    m_contactElements->updateTagList();
+}
+
+
+}  // end of namespace
+
diff --git a/finley/src/FinleyDomain.h b/finley/src/FinleyDomain.h
new file mode 100644
index 0000000..445872a
--- /dev/null
+++ b/finley/src/FinleyDomain.h
@@ -0,0 +1,897 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __FINLEY_DOMAIN_H__
+#define __FINLEY_DOMAIN_H__
+
+/****************************************************************************
+
+   Finley: Domain
+
+   A mesh is built from nodes and elements which describe the domain, surface,
+   and point sources (the latter are needed to establish links with other
+   codes, in particular particle codes). The nodes are stored in a NodeFile
+   and elements in ElementFiles. Finley domains have four ElementFiles
+   containing the elements, surface, contact and point sources, respectively.
+   Notice that the surface elements do not necessarily cover the entire
+   surface of the domain.
+
+   The element type is fixed by the reference element, see ReferenceElement.h.
+   The numbering of the nodes starts with 0.
+
+   Important: it is assumed that every node appears in at least one element or
+   surface element and that any node used in an element, surface element or as
+   a point is specified in the NodeFile, see also resolveNodeIds.
+
+   In some cases it is useful to refer to a mesh entirely built from
+   order 1 (=linear) elements. The linear version of the mesh can be
+   accessed by referring to the first few nodes of each element
+   (thanks to the way the nodes are ordered). As the numbering of
+   these nodes is not continuous a relabeling vector is introduced
+   in the NodeFile. This feature is not fully implemented yet.
+
+   All nodes and elements are tagged. The tag allows to group nodes and
+   elements. A typical application is to mark surface elements on a
+   certain portion of the domain with the same tag. All these surface
+   elements can then be assigned the same value e.g. for the pressure.
+
+   The spatial dimensionality is determined by the type of elements
+   used and can be queried using getDim(). Notice that the element type
+   also determines the type of surface elements to be used.
+
+*****************************************************************************/
+
+#include <finley/Finley.h>
+#include <finley/ElementFile.h>
+#include <finley/NodeFile.h>
+#include <finley/Util.h>
+
+#include <escript/AbstractContinuousDomain.h>
+#include <escript/FunctionSpace.h>
+#include <escript/FunctionSpaceFactory.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrixPattern.h>
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/types.h>
+#endif
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace finley {
+
+typedef std::map<std::string, int> TagMap;
+
+enum SystemMatrixType {
+    SMT_PASO = 1<<8,
+    SMT_TRILINOS = 1<<10,
+    SMT_COMPLEX = 1<<16,
+    SMT_UNROLL = 1<<17
+};
+
+/**
+    \brief
+    FinleyDomain implements the AbstractContinuousDomain interface for the
+    Finley library.
+*/
+class FinleyDomain : public escript::AbstractContinuousDomain
+{
+public:
+    /**
+     \brief
+     recovers domain from a dump file
+     \param filename the name of the file
+    */
+    static escript::Domain_ptr load(const std::string& filename);
+
+    /**
+     \brief
+     reads a mesh from a fly file. For MPI parallel runs fans out the mesh
+     to multiple processes.
+     \param mpiInfo the MPI information structure
+     \param fileName the name of the file
+     \param integrationOrder order of the quadrature scheme.
+                             If <0 the order is selected automatically.
+     \param reducedIntegrationOrder order of the reduced quadrature scheme.
+                                    If <0 the order is selected automatically.
+     \param optimize whether to optimize the node labels
+    */
+    static escript::Domain_ptr read(escript::JMPI mpiInfo,
+                                    const std::string& fileName,
+                                    int integrationOrder = -1,
+                                    int reducedIntegrationOrder = -1,
+                                    bool optimize = false);
+
+    /**
+     \brief
+     reads a gmsh mesh file.
+     \param mpiInfo the MPI information structure
+     \param filename the name of the gmsh file
+     \param numDim spatial dimensionality
+     \param integrationOrder order of the quadrature scheme.
+                             If <0 the order is selected automatically.
+     \param reducedIntegrationOrder order of the reduced quadrature scheme.
+                                    If <0 the order is selected automatically.
+     \param optimize whether to optimize the node labels 
+     \param useMacroElements whether to use first order macro elements
+    */
+    static escript::Domain_ptr readGmsh(escript::JMPI mpiInfo,
+                                        const std::string& filename, 
+                                        int numDim, int integrationOrder = -1,
+                                        int reducedIntegrationOrder = -1,
+                                        bool optimize = false,
+                                        bool useMacroElements = false);
+
+    /**
+     \brief
+     Creates a 2-dimensional rectangular domain with first order (Rec4)
+     elements in the rectangle [0,L0] x [0,L1].
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param periodic0 Input - use periodic boundary in first dimension?
+     \param periodic1 Input - use periodic boundary in second dimension?
+     \param order Input - accuracy of integration scheme (order 1 or 2)
+     \param reducedOrder Input - reduced integration order (1 or 2)
+     \param useElementsOnFace Input - whether to use rich face elements
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr createRec4(dim_t NE0, dim_t NE1,
+                                    double L0, double L1,
+                                    bool periodic0, bool periodic1, int order,
+                                    int reducedOrder, bool useElementsOnFace,
+                                    bool optimize, escript::JMPI jmpi);
+
+    /**
+     \brief
+     Creates a 2-dimensional rectangular domain with second order (Rec8 or
+     Rec9) elements in the rectangle [0,L0] x [0,L1].
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param periodic0 Input - use periodic boundary in first dimension?
+     \param periodic1 Input - use periodic boundary in second dimension?
+     \param order Input - accuracy of integration scheme (order 1 or 2)
+     \param reducedOrder Input - reduced integration order (1 or 2)
+     \param useElementsOnFace Input - whether to use rich face elements
+     \param useFullElementOrder Input - if true the main element type will
+                                        be Rec9
+     \param useMacroElements Input - whether to use Macro element type
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr createRec8(dim_t NE0, dim_t NE1,
+                                    double l0, double l1,
+                                    bool periodic0, bool periodic1, int order,
+                                    int reducedOrder, bool useElementsOnFace,
+                                    bool useFullElementOrder,
+                                    bool useMacroElements, bool optimize,
+                                    escript::JMPI jmpi);
+
+    /**
+     \brief
+     Creates a 3-dimensional rectangular domain with first order (Hex8)
+     elements.
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param NE2 Input - number of elements in third dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param l2 Input - length of domain in third dimension (depth)
+     \param periodic0 Input - use periodic boundary in first dimension?
+     \param periodic1 Input - use periodic boundary in second dimension?
+     \param periodic2 Input - use periodic boundary in third dimension?
+     \param order Input - integration order (1 or 2)
+     \param reducedOrder Input - reduced integration order (1 or 2)
+     \param useElementsOnFace Input - whether to use rich face elements
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr createHex8(dim_t NE0, dim_t NE1, dim_t NE2,
+                                double l0, double l1, double l2,
+                                bool periodic0, bool periodic1, bool periodic2,
+                                int order, int reducedOrder,
+                                bool useElementsOnFace,
+                                bool optimize, escript::JMPI jmpi);
+
+    /**
+     \brief
+     Creates a 3-dimensional rectangular domain with second order (Hex20 or
+     Hex27) elements.
+
+     \param NE0 Input - number of elements in first dimension
+     \param NE1 Input - number of elements in second dimension
+     \param NE2 Input - number of elements in third dimension
+     \param l0 Input - length of domain in first dimension (width)
+     \param l1 Input - length of domain in second dimension (height)
+     \param l2 Input - length of domain in third dimension (depth)
+     \param periodic0 Input - use periodic boundary in first dimension?
+     \param periodic1 Input - use periodic boundary in second dimension?
+     \param periodic2 Input - use periodic boundary in third dimension?
+     \param order Input - integration order (1 or 2)
+     \param reducedOrder Input - reduced integration order (1 or 2)
+     \param useElementsOnFace Input - whether to use rich face elements
+     \param useFullElementOrder Input - ignored
+     \param useMacroElements Input - whether to use Macro element type
+     \param optimize Input - whether to optimize node/DOF labelling
+     \param jmpi Input - Shared pointer to MPI Information to be used
+    */
+    static escript::Domain_ptr createHex20(dim_t NE0, dim_t NE1, dim_t NE2,
+                                double l0, double l1, double l2,
+                                bool periodic0, bool periodic1, bool periodic2,
+                                int order, int reducedOrder,
+                                bool useElementsOnFace,
+                                bool useFullElementOrder,
+                                bool useMacroElements, bool optimize,
+                                escript::JMPI jmpi);
+
+    /**
+     \brief
+     Constructor for FinleyDomain
+
+     \param name a descriptive name for the domain
+     \param numDim dimensionality of the domain (2 or 3)
+     \param jmpi shared pointer to MPI Information to be used
+    */
+    FinleyDomain(const std::string& name, int numDim, escript::JMPI jmpi);
+
+    /**
+     \brief
+     Copy constructor.
+    */
+    FinleyDomain(const FinleyDomain& in);
+
+    /**
+     \brief
+     Destructor for FinleyDomain
+    */
+    ~FinleyDomain();
+
+    /**
+     \brief adds Dirac delta points.
+      Do NOT call this at any time other than construction!
+      Using them later creates consistency problems
+    */
+    void addDiracPoints(const std::vector<double>& points,
+                        const std::vector<int>& tags);
+
+    /**
+     \brief
+     returns a pointer to this domain's node file
+    */
+    NodeFile* getNodes() const { return m_nodes; }
+
+    /**
+     \brief
+     replaces the element file by `elements`
+    */
+    void setElements(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's element file
+    */
+    ElementFile* getElements() const { return m_elements; }
+
+    /**
+     \brief
+     replaces the face element file by `elements`
+    */
+    void setFaceElements(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's face element file
+    */
+    ElementFile* getFaceElements() const { return m_faceElements; }
+
+    /**
+     \brief
+     replaces the contact element file by `elements`
+    */
+    void setContactElements(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's contact element file
+    */
+    ElementFile* getContactElements() const { return m_contactElements; }
+
+    /**
+     \brief
+     replaces the point element file by `elements`
+    */
+    void setPoints(ElementFile* elements);
+
+    /**
+     \brief
+     returns a pointer to this domain's point (nodal) element file
+    */
+    ElementFile* getPoints() const { return m_points; }
+
+    /**
+     \brief
+     returns a reference to the MPI information wrapper for this domain
+    */
+    virtual escript::JMPI getMPI() const { return m_mpiInfo; }
+
+    /**
+     \brief
+     returns the number of processors used for this domain
+    */
+    virtual int getMPISize() const { return m_mpiInfo->size; }
+
+    /**
+     \brief
+     returns the number MPI rank of this processor
+    */
+    virtual int getMPIRank() const { return m_mpiInfo->rank; }
+
+    /**
+     \brief
+     If compiled for MPI then execute an MPI_Barrier, else do nothing
+    */
+    virtual void MPIBarrier() const;
+
+    /**
+     \brief
+     returns true if on MPI processor 0, else false
+    */
+    virtual bool onMasterProcessor() const { return getMPIRank() == 0; }
+
+    MPI_Comm getMPIComm() const { return m_mpiInfo->comm; }
+
+    /**
+     \brief
+     writes the current mesh to a file with the given name in the fly file
+     format.
+     \param fileName Input - The name of the file to write to.
+    */
+    void write(const std::string& fileName) const;
+
+    /**
+     \brief
+     \param full whether to include coordinate values and id's
+    */
+    void Print_Mesh_Info(bool full=false) const;
+
+    /**
+     \brief
+     dumps the mesh to a file with the given name.
+     \param fileName Input - The name of the file
+    */
+    void dump(const std::string& fileName) const;
+
+    /**
+     \brief
+     Return the tag key for the given sample number.
+     \param functionSpaceType Input - The function space type.
+     \param sampleNo Input - The sample number.
+    */
+    int getTagFromSampleNo(int functionSpaceType, index_t sampleNo) const;
+
+    /**
+     \brief
+     Return the reference number of  the given sample number.
+     \param functionSpaceType Input - The function space type.
+    */
+    const index_t* borrowSampleReferenceIDs(int functionSpaceType) const;
+
+    /**
+     \brief
+     Returns true if the given integer is a valid function space type
+     for this domain.
+    */
+    virtual bool isValidFunctionSpaceType(int functionSpaceType) const;
+
+    /**
+     \brief
+     Return a description for this domain
+    */
+    virtual std::string getDescription() const;
+
+    /**
+     \brief
+     Return a description for the given function space type code
+    */
+    virtual std::string functionSpaceTypeAsString(int functionSpaceType) const;
+
+    /**
+     \brief
+     Build the table of function space type names
+    */
+    void setFunctionSpaceTypeNames();
+
+    /**
+     \brief
+     Return a continuous FunctionSpace code
+    */
+    virtual int getContinuousFunctionCode() const;
+
+    /**
+     \brief
+     Return a continuous on reduced order nodes FunctionSpace code
+    */
+    virtual int getReducedContinuousFunctionCode() const;
+
+    /**
+     \brief
+     Return a function FunctionSpace code
+    */
+    virtual int getFunctionCode() const;
+
+    /**
+     \brief
+     Return a function with reduced integration order FunctionSpace code
+    */
+    virtual int getReducedFunctionCode() const;
+
+    /**
+     \brief
+     Return a function on boundary FunctionSpace code
+    */
+    virtual int getFunctionOnBoundaryCode() const;
+
+    /**
+     \brief
+     Return a function on boundary with reduced integration order FunctionSpace code
+    */
+    virtual int getReducedFunctionOnBoundaryCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactZero code
+    */
+    virtual int getFunctionOnContactZeroCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactZero code  with reduced integration order
+    */
+    virtual int getReducedFunctionOnContactZeroCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactOne code
+    */
+    virtual int getFunctionOnContactOneCode() const;
+
+    /**
+     \brief
+     Return a FunctionOnContactOne code  with reduced integration order
+    */
+    virtual int getReducedFunctionOnContactOneCode() const;
+
+    /**
+     \brief
+     Return a Solution code
+    */
+    virtual int getSolutionCode() const;
+
+    /**
+     \brief
+     Return a ReducedSolution code
+    */
+    virtual int getReducedSolutionCode() const;
+
+    /**
+     \brief
+     Return a DiracDeltaFunctions code
+    */
+    virtual int getDiracDeltaFunctionsCode() const;
+
+    /**
+     \brief
+    */
+    typedef std::map<int, std::string> FunctionSpaceNamesMapType;
+
+    /**
+     \brief returns the dimensionality of this domain
+    */
+    virtual int getDim() const { return m_nodes->numDim; }
+
+    /**
+     \brief
+      Returns a status indicator of the domain. The status identifier should be unique over
+      the live time if the object but may be updated if changes to the domain happen, e.g.
+      modifications to its geometry.
+    */
+    virtual StatusType getStatus() const;
+
+    /**
+     \brief
+     Return the number of data points summed across all MPI processes
+    */
+    virtual dim_t getNumDataPointsGlobal() const;
+
+    /**
+     \brief
+     Return the number of data points per sample, and the number of samples as a pair.
+     \param functionSpaceCode Input -
+    */
+    virtual std::pair<int,dim_t> getDataShape(int functionSpaceCode) const;
+
+    /**
+     \brief
+     copies the location of data points into arg. The domain of arg has to match this.
+     has to be implemented by the actual Domain adapter.
+    */
+    virtual void setToX(escript::Data& arg) const;
+
+    /**
+     \brief
+     sets a map from a clear tag name to a tag key
+     \param name Input - tag name.
+     \param tag Input - tag key.
+    */
+    virtual void setTagMap(const std::string& name, int tag);
+
+    /**
+     \brief
+     Return the tag key for tag name.
+     \param name Input - tag name
+    */
+    virtual int getTag(const std::string& name) const;
+
+    /**
+     \brief
+     Returns true if name is a defined tag name.
+     \param name Input - tag name to be checked.
+    */
+    virtual bool isValidTagName(const std::string& name) const;
+
+    /**
+     \brief
+     Returns all tag names in a single string sperated by commas
+    */
+    virtual std::string showTagNames() const;
+
+    /**
+     \brief
+     assigns new location to the domain
+    */
+    virtual void setNewX(const escript::Data& arg);
+
+    /**
+     \brief
+     interpolates data given on source onto target where source and target have to be given on the same domain.
+    */
+    virtual void interpolateOnDomain(escript::Data& target,
+                                     const escript::Data& source) const;
+
+    virtual bool probeInterpolationOnDomain(int functionSpaceType_source,
+                                           int functionSpaceType_target) const;
+
+    virtual signed char preferredInterpolationOnDomain(int functionSpaceType_source, int functionSpaceType_target) const;
+
+    /**
+    \brief given a vector of FunctionSpace typecodes, pass back a code which then can all be interpolated to.
+    \return true is result is valid, false if not
+    */
+    bool commonFunctionSpace(const std::vector<int>& fs, int& resultcode) const;
+
+    /**
+     \brief
+     interpolates data given on source onto target where source and target are given on different domains.
+    */
+    virtual void interpolateAcross(escript::Data& target, const escript::Data& source) const;
+
+    /**
+     \brief determines whether interpolation from source to target is possible.
+    */
+    virtual bool probeInterpolationAcross(int functionSpaceType_source,
+                                  const escript::AbstractDomain& targetDomain,
+                                  int functionSpaceType_target) const;
+
+    /**
+     \brief
+     copies the surface normals at data points into out. The actual function space to be considered
+     is defined by out. out has to be defined on this.
+    */
+    virtual void setToNormal(escript::Data& out) const;
+
+    /**
+     \brief
+     copies the size of samples into out. The actual function space to be considered
+     is defined by out. out has to be defined on this.
+    */
+    virtual void setToSize(escript::Data& out) const;
+
+    /**
+     \brief
+     copies the gradient of arg into grad. The actual function space to be considered
+     for the gradient is defined by grad. arg and grad have to be defined on this.
+    */
+    virtual void setToGradient(escript::Data& grad, const escript::Data& arg) const;
+
+    /**
+     \brief
+     copies the integrals of the function defined by arg into integrals.
+     arg has to be defined on this.
+    */
+    virtual void setToIntegrals(std::vector<double>& integrals, const escript::Data& arg) const;
+
+    /**
+     \brief
+     return the identifier of the matrix type to be used for the global
+     stiffness matrix when a particular solver, package, preconditioner,
+     and symmetric matrix is used.
+
+     \param options a SolverBuddy instance with the desired options set
+    */
+    virtual int getSystemMatrixTypeId(const boost::python::object& options) const;
+
+    /**
+     \brief
+     return the identifier of the transport problem type to be used when a particular solver, perconditioner, package
+     and symmetric matrix is used.
+     \param solver
+     \param preconditioner
+     \param package
+     \param symmetry
+    */
+    virtual int getTransportTypeId(int solver, int preconditioner, int package,
+                                   bool symmetry) const;
+
+    /**
+     \brief
+     returns true if data on this domain and a function space of type functionSpaceCode has to
+     considered as cell centered data.
+    */
+    virtual bool isCellOriented(int functionSpaceCode) const;
+
+    virtual bool ownSample(int fsCode, index_t id) const;
+
+    /**
+     \brief
+     adds a PDE onto the stiffness matrix mat and a rhs
+    */
+    virtual void addPDEToSystem(
+                     escript::AbstractSystemMatrix& mat, escript::Data& rhs,
+                     const escript::Data& A, const escript::Data& B,
+                     const escript::Data& C, const escript::Data& D,
+                     const escript::Data& X, const escript::Data& Y,
+                     const escript::Data& d, const escript::Data& y,
+                     const escript::Data& d_contact,
+                     const escript::Data& y_contact,
+                     const escript::Data& d_dirac,
+                     const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     adds a PDE onto the lumped stiffness matrix matrix
+    */
+    virtual void addPDEToLumpedSystem(escript::Data& mat,
+                                      const escript::Data& D,
+                                      const escript::Data& d,
+                                      const escript::Data& d_dirac,
+                                      bool useHRZ) const;
+
+    /**
+     \brief
+     adds a PDE onto the stiffness matrix mat and a rhs
+    */
+    virtual void addPDEToRHS(escript::Data& rhs, const escript::Data& X,
+                             const escript::Data& Y, const escript::Data& y,
+                             const escript::Data& y_contact,
+                             const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     adds a PDE onto a transport problem
+    */
+    virtual void addPDEToTransportProblem(
+                     escript::AbstractTransportProblem& tp,
+                     escript::Data& source, const escript::Data& M,
+                     const escript::Data& A, const escript::Data& B,
+                     const escript::Data& C, const escript::Data& D,
+                     const escript::Data& X, const escript::Data& Y,
+                     const escript::Data& d, const escript::Data& y,
+                     const escript::Data& d_contact,
+                     const escript::Data& y_contact,
+                     const escript::Data& d_dirac,
+                     const escript::Data& y_dirac) const;
+
+    /**
+     \brief
+     creates a stiffness matrix and initializes it with zeros
+    */
+    escript::ASM_ptr newSystemMatrix(
+                      int row_blocksize,
+                      const escript::FunctionSpace& row_functionspace,
+                      int column_blocksize,
+                      const escript::FunctionSpace& column_functionspace,
+                      int type) const;
+
+    /**
+     \brief
+      creates a TransportProblem
+    */
+    escript::ATP_ptr newTransportProblem(int blocksize,
+                                   const escript::FunctionSpace& functionspace,
+                                   int type) const;
+
+    /**
+     \brief returns locations in the FEM nodes
+    */
+    virtual escript::Data getX() const;
+
+    /**
+     \brief returns boundary normals at the quadrature point on the face
+            elements
+    */
+    virtual escript::Data getNormal() const;
+
+    /**
+     \brief returns the element size
+    */
+    virtual escript::Data getSize() const;
+
+    /**
+     \brief comparison operators
+    */
+    virtual bool operator==(const escript::AbstractDomain& other) const;
+    virtual bool operator!=(const escript::AbstractDomain& other) const;
+
+    /**
+     \brief assigns new tag newTag to all samples of functionspace with a
+            positive value of mask for any its sample point.
+    */
+    virtual void setTags(int functionSpaceType, int newTag,
+                         const escript::Data& mask) const;
+
+    /**
+      \brief
+       returns the number of tags in use and a pointer to an array with the
+       number of tags in use
+    */
+    virtual int getNumberOfTagsInUse(int functionSpaceCode) const;
+
+    virtual const int* borrowListOfTagsInUse(int functionSpaceCode) const;
+
+    /**
+     \brief Checks if this domain allows tags for the specified
+            functionSpace code.
+    */
+    virtual bool canTag(int functionSpaceCode) const;
+
+    /**
+     \brief returns the approximation order used for a function space functionSpaceCode
+    */
+    virtual int getApproximationOrder(int functionSpaceCode) const;
+
+    virtual bool supportsContactElements() const { return true; }
+
+    virtual escript::Data randomFill(const escript::DataTypes::ShapeType& shape,
+                                const escript::FunctionSpace& what, long seed,
+                                const boost::python::tuple& filter) const;
+
+    /**
+     \brief
+     returns a reference to the tag name->value map
+    */
+    const TagMap& getTagMap() const { return m_tagMap; }
+
+    void createMappings(const IndexVector& dofDistribution,
+                        const IndexVector& nodeDistribution);
+
+#ifdef ESYS_HAVE_PASO
+    /// returns a reference to the paso matrix pattern
+    paso::SystemMatrixPattern_ptr getPasoPattern(bool reducedRowOrder,
+                                                 bool reducedColOrder) const;
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// returns a Trilinos CRS graph suitable to build a sparse matrix.
+    esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph(bool reducedOrder) const;
+#endif
+
+    void glueFaces(double safetyFactor, double tolerance, bool optimize);
+
+    void joinFaces(double safetyFactor, double tolerance, bool optimize);
+
+    /// takes nodes, elements, etc. of all input meshes and copies them into
+    /// a new mesh. Ids of output are shifted by the maximum Id of inputs.
+    static FinleyDomain* merge(const std::vector<const FinleyDomain*>& meshes);
+
+private:
+    void prepare(bool optimize);
+
+    void setOrders();
+
+    /// Initially the element nodes refer to the numbering defined by the
+    /// global id assigned to the nodes in the NodeFile. It is also not ensured
+    /// that all nodes referred by an element are actually available on the
+    /// process. At the output, a local node labeling is used and all nodes are
+    /// available. In particular the numbering of the element nodes is between
+    /// 0 and Nodes->numNodes.
+    /// The function does not create a distribution of the degrees of freedom.
+    void resolveNodeIds();
+
+    /// assigns new node reference numbers to all element files.
+    /// If k is the old node, the new node is newNode[k-offset].
+    void relabelElementNodes(const IndexVector& newNode, index_t offset);
+
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrixPattern_ptr makePasoPattern(bool reducedRowOrder,
+                                                  bool reducedColOrder) const;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    esys_trilinos::GraphType* createTrilinosGraph(bool reducedOrder) const;
+#endif
+    void createColoring(const IndexVector& dofMap);
+    void distributeByRankOfDOF(const IndexVector& distribution);
+    void markNodes(std::vector<short>& mask, index_t offset, bool useLinear) const;
+    void optimizeDOFDistribution(IndexVector& distribution);
+    void optimizeDOFLabeling(const IndexVector& distribution);
+    void optimizeElementOrdering();
+    void findMatchingFaces(double safetyFactor, double tolerance, int* numPairs,
+                           int* elem0, int* elem1, int* matchingNodes) const;
+    void updateTagList();
+    void printElementInfo(const ElementFile* e, const std::string& title,
+                          const std::string& defaultType, bool full) const;
+
+    void writeElementInfo(std::ostream& stream, const ElementFile* e,
+                          const std::string& defaultType) const;
+
+    /// MPI information
+    escript::JMPI m_mpiInfo;
+    /// domain description
+    std::string m_name;
+    int approximationOrder;
+    int reducedApproximationOrder;
+    int integrationOrder;
+    int reducedIntegrationOrder;
+    /// the table of the nodes
+    NodeFile* m_nodes;
+    /// the table of the elements
+    ElementFile* m_elements;
+    /// the table of face elements
+    ElementFile* m_faceElements;
+    /// the table of contact elements
+    ElementFile* m_contactElements;
+    /// the table of points (treated as elements of dimension 0)
+    ElementFile* m_points;
+    /// the tag map mapping names to tag keys
+    TagMap m_tagMap;
+#ifdef ESYS_HAVE_PASO
+    // pointer to the sparse matrix patterns
+    mutable paso::SystemMatrixPattern_ptr FullFullPattern;
+    mutable paso::SystemMatrixPattern_ptr FullReducedPattern;
+    mutable paso::SystemMatrixPattern_ptr ReducedFullPattern;
+    mutable paso::SystemMatrixPattern_ptr ReducedReducedPattern;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    mutable esys_trilinos::TrilinosGraph_ptr m_fullGraph;
+    mutable esys_trilinos::TrilinosGraph_ptr m_reducedGraph;
+#endif
+
+    static FunctionSpaceNamesMapType m_functionSpaceTypeNames;
+};
+
+} // end of namespace
+
+#endif // __FINLEY_DOMAIN_H__
+
diff --git a/escriptcore/test/DataBlocks2DTestCase.h b/finley/src/FinleyException.h
similarity index 66%
rename from escriptcore/test/DataBlocks2DTestCase.h
rename to finley/src/FinleyException.h
index 9d278a5..8efdc39 100644
--- a/escriptcore/test/DataBlocks2DTestCase.h
+++ b/finley/src/FinleyException.h
@@ -15,19 +15,20 @@
 *****************************************************************************/
 
 
-#if !defined  DataBlocks2DTestCase_20040405_H
-#define  DataBlocks2DTestCase_20040405_H
+#ifndef __FINLEY_EXCEPTION_H__
+#define __FINLEY_EXCEPTION_H__
 
-#include <cppunit/TestFixture.h>
-#include <cppunit/TestSuite.h>
+#include <escript/EsysException.h>
 
-class DataBlocks2DTestCase : public CppUnit::TestFixture
+namespace finley {
+
+class FinleyException : public escript::EsysException
 {
 public:
-  void testAll();
-
-  static CppUnit::TestSuite* suite();
+    FinleyException(const std::string& str) : escript::EsysException(str) {}
 };
 
-#endif
+} // end of namespace
+
+#endif // __FINLEY_EXCEPTION_H__
 
diff --git a/finley/src/FinleyVersion.h b/finley/src/FinleyVersion.h
index 98c4f4c..a354e52 100644
--- a/finley/src/FinleyVersion.h
+++ b/finley/src/FinleyVersion.h
@@ -18,6 +18,6 @@
 #ifndef INC_FINLEYVERSION
 #define INC_FINLEYVERSION
 
-char Finley_Version[]="$Revision: 6109 $";
+char Finley_Version[]="$Revision: 6112 $";
 
 #endif 
diff --git a/finley/src/IndexList.cpp b/finley/src/IndexList.cpp
index dceede4..b9bbf53 100644
--- a/finley/src/IndexList.cpp
+++ b/finley/src/IndexList.cpp
@@ -21,23 +21,17 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include "IndexList.h"
 #include "ElementFile.h"
 
-/* Translate from distributed/local array indices to global indices */
-
-/****************************************************************************/
-/* inserts the contributions from the element matrices of elements
-   into the row index col. If symmetric is set, only the upper
-   triangle of the matrix is stored.
-*/
+#include <escript/index.h>
 
 namespace finley {
 
+/* Translate from distributed/local array indices to global indices */
+
+/// inserts the contributions from the element matrices of elements
+/// into the row index col.
 void IndexList_insertElements(IndexList* index_list, ElementFile* elements,
                               bool reduce_row_order, const index_t* row_map,
                               bool reduce_col_order, const index_t* col_map)
@@ -47,7 +41,7 @@ void IndexList_insertElements(IndexList* index_list, ElementFile* elements,
     if (!elements)
         return;
 
-    const int NN=elements->numNodes;
+    const int NN = elements->numNodes;
     const_ReferenceElement_ptr refElement(elements->referenceElementSet->
                                             borrowReferenceElement(false));
 
@@ -100,17 +94,17 @@ void IndexList_insertElementsWithRowRangeNoMainDiagonal(
         return;
 
     // this does not resolve macro elements
-    const int NN=elements->numNodes;
-    for (int color=elements->minColor; color<=elements->maxColor; color++) {
+    const int NN = elements->numNodes;
+    for (index_t color = elements->minColor; color <= elements->maxColor; color++) {
 #pragma omp for
-        for (index_t e=0; e<elements->numElements; e++) {
-            if (elements->Color[e]==color) {
-                for (int kr=0; kr<NN; kr++) {
-                    const index_t irow=row_map[elements->Nodes[INDEX2(kr,e,NN)]];
-                    if (firstRow<=irow && irow<lastRow) {
-                        const index_t irow_loc=irow-firstRow;
-                        for (int kc=0; kc<NN; kc++) {
-                            const index_t icol=col_map[elements->Nodes[INDEX2(kc,e,NN)]];
+        for (index_t e = 0; e < elements->numElements; e++) {
+            if (elements->Color[e] == color) {
+                for (int kr = 0; kr < NN; kr++) {
+                    const index_t irow = row_map[elements->Nodes[INDEX2(kr, e, NN)]];
+                    if (firstRow <= irow && irow < lastRow) {
+                        const index_t irow_loc = irow - firstRow;
+                        for (int kc = 0; kc < NN; kc++) {
+                            const index_t icol = col_map[elements->Nodes[INDEX2(kc, e, NN)]];
                             if (icol != irow)
                                 index_list[irow_loc].insertIndex(icol);
                         }
diff --git a/finley/src/IndexList.h b/finley/src/IndexList.h
index b858261..8cbc145 100644
--- a/finley/src/IndexList.h
+++ b/finley/src/IndexList.h
@@ -26,13 +26,13 @@
 
 #include "Finley.h"
 
-#include "esysUtils/IndexList.h"
+#include <escript/IndexList.h>
 
 // helpers to build system matrix
 
 namespace finley {
 
-using esysUtils::IndexList;
+using escript::IndexList;
 
 class ElementFile;
 
diff --git a/finley/src/Mesh.cpp b/finley/src/Mesh.cpp
deleted file mode 100644
index da1dd6e..0000000
--- a/finley/src/Mesh.cpp
+++ /dev/null
@@ -1,638 +0,0 @@
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************
-
-  Finley: Mesh
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Mesh.h"
-#include "IndexList.h"
-#include <boost/scoped_array.hpp>
-
-#include "CPPAdapter/FinleyAdapterException.h"
-
-namespace finley {
-
-/// Constructor.
-/// Allocates a Mesh with given name and dimensionality
-Mesh::Mesh(const std::string name, int numDim, esysUtils::JMPI& mpi_info) :
-    m_name(name),
-    approximationOrder(-1),
-    reducedApproximationOrder(-1),
-    integrationOrder(-1),
-    reducedIntegrationOrder(-1),
-    Elements(NULL),
-    FaceElements(NULL),
-    ContactElements(NULL),
-    Points(NULL)
-{
-    MPIInfo = mpi_info;
-
-    // allocate node table
-    Nodes = new NodeFile(numDim, mpi_info);
-}
-
-/// destructor
-Mesh::~Mesh()
-{
-    delete Nodes;
-    delete FaceElements;
-    delete Elements;
-    delete ContactElements;
-    delete Points;
-    tagMap.clear();
-}
-
-void Mesh::setElements(ElementFile *elements)
-{
-    delete Elements;
-    Elements=elements;
-}
-
-void Mesh::setFaceElements(ElementFile *elements)
-{
-    delete FaceElements;
-    FaceElements=elements;
-}
-
-void Mesh::setContactElements(ElementFile *elements)
-{
-    delete ContactElements;
-    ContactElements=elements;
-}
-
-void Mesh::setPoints(ElementFile *elements)
-{
-    delete Points;
-    Points=elements;
-}
-
-void Mesh::setOrders() 
-{
-    const int ORDER_MAX=9999999;
-    int locals[4] = { ORDER_MAX, ORDER_MAX, ORDER_MAX, ORDER_MAX };
-
-    if (Elements != NULL && Elements->numElements > 0) {
-        locals[0]=std::min(locals[0], Elements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
-        locals[1]=std::min(locals[1], Elements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
-        locals[2]=std::min(locals[2], Elements->referenceElementSet->referenceElement->integrationOrder);
-        locals[3]=std::min(locals[3], Elements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
-    }
-    if (FaceElements != NULL && FaceElements->numElements > 0) {
-        locals[0]=std::min(locals[0], FaceElements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
-        locals[1]=std::min(locals[1], FaceElements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
-        locals[2]=std::min(locals[2], FaceElements->referenceElementSet->referenceElement->integrationOrder);
-        locals[3]=std::min(locals[3], FaceElements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
-    }
-    if (ContactElements != NULL && ContactElements->numElements > 0) {
-        locals[0]=std::min(locals[0], ContactElements->referenceElementSet->referenceElement->BasisFunctions->Type->numOrder);
-        locals[1]=std::min(locals[1], ContactElements->referenceElementSet->referenceElement->LinearBasisFunctions->Type->numOrder);
-        locals[2]=std::min(locals[2], ContactElements->referenceElementSet->referenceElement->integrationOrder);
-        locals[3]=std::min(locals[3], ContactElements->referenceElementSet->referenceElementReducedQuadrature->integrationOrder);
-    }
-
-#ifdef ESYS_MPI
-    int globals[4];
-    MPI_Allreduce(locals, globals, 4, MPI_INT, MPI_MIN, MPIInfo->comm);
-    approximationOrder=(globals[0] < ORDER_MAX ? globals[0] : -1);
-    reducedApproximationOrder=(globals[1] < ORDER_MAX ? globals[1] : -1);
-    integrationOrder=(globals[2] < ORDER_MAX ? globals[2] : -1);
-    reducedIntegrationOrder=(globals[3] < ORDER_MAX ? globals[3] : -1);
-#else
-    approximationOrder=(locals[0] < ORDER_MAX ? locals[0] : -1);
-    reducedApproximationOrder=(locals[1] < ORDER_MAX ? locals[1] : -1);
-    integrationOrder=(locals[2] < ORDER_MAX ? locals[2] : -1);
-    reducedIntegrationOrder=(locals[3] < ORDER_MAX ? locals[3] : -1);
-#endif
-}
-
-/// creates node mappings without (re-)distributing anything
-void Mesh::createMappings(const std::vector<index_t>& dofDistribution,
-                          const std::vector<index_t>& nodeDistribution)
-{
-    std::vector<short> maskReducedNodes(Nodes->numNodes, -1);
-    markNodes(maskReducedNodes, 0, true);
-    std::vector<index_t> indexReducedNodes = util::packMask(maskReducedNodes);
-    if (noError())
-        Nodes->createNodeMappings(indexReducedNodes, dofDistribution,
-                                  nodeDistribution);
-}
-
-/// redistributes the Nodes and Elements including overlap
-/// according to the DOF distribution. It will create an element colouring
-/// but will not create any mappings.
-void Mesh::distributeByRankOfDOF(const std::vector<index_t>& dof_distribution)
-{
-    std::vector<int> mpiRankOfDOF(Nodes->numNodes);
-    Nodes->assignMPIRankToDOFs(mpiRankOfDOF, dof_distribution);
-
-    // first, the elements are redistributed according to mpiRankOfDOF
-    // at the input the Node tables refer to the local labeling of the nodes
-    // while at the output they refer to the global labeling which is rectified
-    // in the next step
-    if (noError())
-        Elements->distributeByRankOfDOF(mpiRankOfDOF, Nodes->Id);
-    if (noError())
-        FaceElements->distributeByRankOfDOF(mpiRankOfDOF, Nodes->Id);
-    if (noError())
-        ContactElements->distributeByRankOfDOF(mpiRankOfDOF, Nodes->Id);
-    if (noError())
-        Points->distributeByRankOfDOF(mpiRankOfDOF, Nodes->Id);
-
-    // resolve the node ids
-    if (noError())
-        resolveNodeIds();
-
-    // create a local labeling of the DOFs
-    const std::pair<index_t,index_t> dof_range(Nodes->getDOFRange());
-    const index_t len=dof_range.second-dof_range.first+1;
-    // local mask for used nodes
-    std::vector<index_t> localDOF_mask(len, -1);
-    std::vector<index_t> localDOF_map(Nodes->numNodes, -1);
-
-#pragma omp parallel for
-    for (index_t n=0; n<Nodes->numNodes; n++) {
-#ifdef BOUNDS_CHECK
-        if ((Nodes->globalDegreesOfFreedom[n]-dof_range.first) >= len ||
-                (Nodes->globalDegreesOfFreedom[n]-dof_range.first) < 0) {
-            printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
-            exit(1);
-        }
-#endif
-        localDOF_mask[Nodes->globalDegreesOfFreedom[n]-dof_range.first]=n;
-    }
-
-    index_t numDOFs=0;
-    for (int n=0; n<len; n++) {
-        const index_t k=localDOF_mask[n];
-        if (k>=0) {
-             localDOF_mask[n]=numDOFs;
-             numDOFs++;
-          }
-    }
-#pragma omp parallel for
-    for (index_t n=0; n<Nodes->numNodes; n++) {
-        const index_t k=localDOF_mask[Nodes->globalDegreesOfFreedom[n]-dof_range.first];
-        localDOF_map[n]=k;
-    }
-    // create element coloring
-    if (noError())
-        createColoring(localDOF_map);
-}
-
-/// prints the mesh details to standard output
-void Mesh::print()
-{
-    // write header
-    printf("Mesh name: %s\n", m_name.c_str());
-  
-    // write nodes
-    Nodes->print();
-  
-    // write elements
-    if (Elements) {
-        std::cout << "=== "
-                 << Elements->referenceElementSet->referenceElement->Type->Name
-                 << ":\nnumber of elements=" << Elements->numElements
-                 << "\ncolor range=[" << Elements->minColor << ","
-                 << Elements->maxColor << "]\n";
-        if (Elements->numElements > 0) {
-            const int NN=Elements->referenceElementSet->referenceElement->Type->numNodes;
-            const int NN2=Elements->numNodes;
-            std::cout << "Id,Tag,Owner,Color,Nodes" << std::endl;
-            for (index_t i=0; i<Elements->numElements; i++) {
-                std::cout << Elements->Id[i] << "," << Elements->Tag[i] << ","
-                    << Elements->Owner[i] << "," << Elements->Color[i] << ",";
-                for (int j=0; j<NN; j++)
-                    std::cout << " " << Nodes->Id[Elements->Nodes[INDEX2(j,i,NN2)]];
-                std::cout << std::endl;
-            }
-        }
-    }
-
-    // write face elements
-    if (FaceElements) {
-        std::cout << "=== "
-                 << FaceElements->referenceElementSet->referenceElement->Type->Name
-                 << ":\nnumber of elements=" << FaceElements->numElements
-                 << "\ncolor range=[" << FaceElements->minColor << ","
-                 << FaceElements->maxColor << "]\n";
-        if (FaceElements->numElements > 0) {
-            const int NN=FaceElements->referenceElementSet->referenceElement->Type->numNodes;
-            const int NN2=FaceElements->numNodes;
-            std::cout << "Id,Tag,Owner,Color,Nodes" << std::endl;
-            for (index_t i=0; i<FaceElements->numElements; i++) {
-                std::cout << FaceElements->Id[i] << "," << FaceElements->Tag[i]
-                    << "," << FaceElements->Owner[i] << ","
-                    << FaceElements->Color[i] << ",";
-                for (int j=0; j<NN; j++)
-                    std::cout << " " << Nodes->Id[FaceElements->Nodes[INDEX2(j,i,NN2)]];
-                std::cout << std::endl;
-            }
-        }
-    }
-
-    // write Contact elements
-    if (ContactElements) {
-        std::cout << "=== "
-                 << ContactElements->referenceElementSet->referenceElement->Type->Name
-                 << ":\nnumber of elements=" << ContactElements->numElements
-                 << "\ncolor range=[" << ContactElements->minColor << ","
-                 << ContactElements->maxColor << "]\n";
-        if (ContactElements->numElements > 0) {
-            const int NN=ContactElements->referenceElementSet->referenceElement->Type->numNodes;
-            const int NN2=ContactElements->numNodes;
-            std::cout << "Id,Tag,Owner,Color,Nodes" << std::endl;
-            for (index_t i=0; i<ContactElements->numElements; i++) {
-                std::cout << ContactElements->Id[i] << ","
-                    << ContactElements->Tag[i] << ","
-                    << ContactElements->Owner[i] << ","
-                    << ContactElements->Color[i] << ",";
-                for (int j=0; j<NN; j++)
-                    std::cout << " " << Nodes->Id[ContactElements->Nodes[INDEX2(j,i,NN2)]];
-                std::cout << std::endl;
-            }
-        }
-    }
-  
-    // write points
-    if (Points) {
-        std::cout << "=== "
-                 << Points->referenceElementSet->referenceElement->Type->Name
-                 << ":\nnumber of elements=" << Points->numElements
-                 << "\ncolor range=[" << Points->minColor << ","
-                 << Points->maxColor << "]\n";
-        if (Points->numElements > 0) {
-            const int NN=Points->referenceElementSet->referenceElement->Type->numNodes;
-            const int NN2=Points->numNodes;
-            std::cout << "Id,Tag,Owner,Color,Nodes" << std::endl;
-            for (index_t i=0; i<Points->numElements; i++) {
-                std::cout << Points->Id[i] << "," << Points->Tag[i] << ","
-                    << Points->Owner[i] << "," << Points->Color[i] << ",";
-                for (int j=0; j<NN; j++)
-                    std::cout << " " << Nodes->Id[Points->Nodes[INDEX2(j,i,NN2)]];
-                std::cout << std::endl;
-            }
-        }
-    }
-}
-
-void Mesh::markNodes(std::vector<short>& mask, int offset, bool useLinear)
-{
-    Elements->markNodes(mask, offset, useLinear);
-    FaceElements->markNodes(mask, offset, useLinear);
-    ContactElements->markNodes(mask, offset, useLinear);
-    Points->markNodes(mask, offset, useLinear);
-}
-
-void Mesh::markDOFsConnectedToRange(int* mask, int offset, int marker, 
-                                    index_t firstDOF, index_t lastDOF,
-                                    bool useLinear)
-{
-    const index_t *dofIndex = (useLinear ? Nodes->globalReducedDOFIndex
-                                     : Nodes->globalDegreesOfFreedom);
-    Elements->markDOFsConnectedToRange(mask, offset, marker, firstDOF, lastDOF,
-            dofIndex, useLinear);
-    FaceElements->markDOFsConnectedToRange(mask, offset, marker, firstDOF,
-            lastDOF, dofIndex, useLinear);
-    ContactElements->markDOFsConnectedToRange(mask, offset, marker, firstDOF,
-            lastDOF, dofIndex, useLinear);
-    Points->markDOFsConnectedToRange(mask, offset, marker, firstDOF, lastDOF,
-            dofIndex, useLinear);
-}
-
-/// optimizes the labeling of the DOFs on each processor
-void Mesh::optimizeDOFLabeling(const std::vector<index_t>& distribution)
-{
-    const int myRank=MPIInfo->rank;
-    const int mpiSize=MPIInfo->size;
-    const index_t myFirstVertex=distribution[myRank];
-    const index_t myLastVertex=distribution[myRank+1];
-    const dim_t myNumVertices=myLastVertex-myFirstVertex;
-    index_t len=0;
-    for (int p=0; p<mpiSize; ++p)
-        len=std::max(len, distribution[p+1]-distribution[p]);
-
-    boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
-    std::vector<index_t> newGlobalDOFID(len);
-    // create the adjacency structure xadj and adjncy
-#pragma omp parallel
-    {
-        // insert contributions from element matrices into columns index
-        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, Elements,
-                Nodes->globalDegreesOfFreedom,
-                Nodes->globalDegreesOfFreedom);
-        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, FaceElements,
-                Nodes->globalDegreesOfFreedom,
-                Nodes->globalDegreesOfFreedom);
-        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, ContactElements,
-                Nodes->globalDegreesOfFreedom,
-                Nodes->globalDegreesOfFreedom);
-        IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                myFirstVertex, myLastVertex, Points,
-                Nodes->globalDegreesOfFreedom,
-                Nodes->globalDegreesOfFreedom);
-    }
-    // create the local matrix pattern
-    paso::Pattern_ptr pattern=paso::Pattern::fromIndexListArray(0,
-            myNumVertices, index_list.get(), myFirstVertex, myLastVertex,
-            -myFirstVertex);
-
-    if (noError())
-        pattern->reduceBandwidth(&newGlobalDOFID[0]); 
-
-    esysUtils::Esys_MPIInfo_noError(MPIInfo);
-
-    if (noError()) {
-        // shift new labeling to create a global id
-#pragma omp parallel for
-        for (int i=0; i<myNumVertices; ++i)
-            newGlobalDOFID[i]+=myFirstVertex;
-
-        // distribute new labeling to other processors
-#ifdef ESYS_MPI
-        const int dest=esysUtils::mod_rank(mpiSize, myRank + 1);
-        const int source=esysUtils::mod_rank(mpiSize, myRank - 1);
-#endif
-        int current_rank=myRank;
-        for (int p=0; p<mpiSize; ++p) {
-            const index_t firstVertex=distribution[current_rank];
-            const index_t lastVertex=distribution[current_rank+1];
-#pragma omp parallel for
-            for (index_t i=0; i<Nodes->numNodes; ++i) {
-                const index_t k=Nodes->globalDegreesOfFreedom[i];
-                if (firstVertex<=k && k<lastVertex) {
-                    Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
-                }
-            }
-   
-            if (p<mpiSize-1) { // the final send can be skipped
-#ifdef ESYS_MPI
-                MPI_Status status;
-                MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T,
-                                     dest, MPIInfo->msg_tag_counter,
-                                     source, MPIInfo->msg_tag_counter,
-                                     MPIInfo->comm, &status);
-#endif
-                MPIInfo->msg_tag_counter++;
-                current_rank=esysUtils::mod_rank(mpiSize, current_rank-1);
-            }
-        }
-    }
-}
-
-/// prepares the mesh for further use
-void Mesh::prepare(bool optimize)
-{
-    setOrders();
-
-    // first step is to distribute the elements according to a global
-    // distribution of DOF
-    std::vector<index_t> distribution(MPIInfo->size+1);
-
-    // first we create dense labeling for the DOFs
-    index_t newGlobalNumDOFs=Nodes->createDenseDOFLabeling();
-
-    // create a distribution of the global DOFs and determine the MPI rank
-    // controlling the DOFs on this processor
-    MPIInfo->setDistribution(0, newGlobalNumDOFs-1, &distribution[0]);
-
-    // now the mesh is re-distributed according to the distribution vector
-    // this will redistribute the Nodes and Elements including overlap and
-    // will create an element coloring but will not create any mappings
-    // (see later in this function)
-    if (noError())
-        distributeByRankOfDOF(distribution);
-
-    // at this stage we are able to start an optimization of the DOF
-    // distribution using ParMetis. On return distribution is altered and
-    // new DOF IDs have been assigned
-    if (noError() && optimize && MPIInfo->size>1) {
-        optimizeDOFDistribution(distribution); 
-        if (noError())
-            distributeByRankOfDOF(distribution);
-    }
-    // the local labelling of the degrees of freedom is optimized
-    if (noError() && optimize) {
-        optimizeDOFLabeling(distribution); 
-    }
-    // rearrange elements with the aim of bringing elements closer to memory
-    // locations of the nodes (distributed shared memory!):
-    optimizeElementOrdering();
-
-    // create the global indices
-    if (noError()) {
-        std::vector<short> maskReducedNodes(Nodes->numNodes, -1);
-        std::vector<index_t> nodeDistribution(MPIInfo->size+1);
-        markNodes(maskReducedNodes, 0, true);
-        std::vector<index_t> indexReducedNodes = util::packMask(maskReducedNodes);
-
-        Nodes->createDenseNodeLabeling(nodeDistribution, distribution); 
-        // created reduced DOF labeling
-        Nodes->createDenseReducedLabeling(maskReducedNodes, false); 
-        // created reduced node labeling
-        Nodes->createDenseReducedLabeling(maskReducedNodes, true);
-
-        // create the missing mappings
-        if (noError())
-            Nodes->createNodeMappings(indexReducedNodes, distribution, nodeDistribution);
-    }
-
-    updateTagList();
-}
-
-/// tries to reduce the number of colours for all element files
-void Mesh::createColoring(const std::vector<index_t>& dofMap)
-{
-    if (noError())
-        Elements->createColoring(dofMap);
-    if (noError())
-        FaceElements->createColoring(dofMap);
-    if (noError())
-        Points->createColoring(dofMap);
-    if (noError())
-        ContactElements->createColoring(dofMap);
-}
-
-/// redistributes elements to minimize communication during assemblage
-void Mesh::optimizeElementOrdering()
-{
-    if (noError())
-        Elements->optimizeOrdering();
-    if (noError())
-        FaceElements->optimizeOrdering();
-    if (noError())
-        Points->optimizeOrdering();
-    if (noError())
-        ContactElements->optimizeOrdering();
-}
-
-/// regenerates list of tags in use for node file and element files
-void Mesh::updateTagList()
-{
-    if (noError()) Nodes->updateTagList();
-    if (noError()) Elements->updateTagList();
-    if (noError()) FaceElements->updateTagList();
-    if (noError()) Points->updateTagList();
-    if (noError()) ContactElements->updateTagList();
-}
-
-/// assigns new node reference numbers to all element files
-void Mesh::relabelElementNodes(const std::vector<index_t>& newNode, index_t offset)
-{
-    Elements->relabelNodes(newNode, offset);
-    FaceElements->relabelNodes(newNode, offset);
-    ContactElements->relabelNodes(newNode, offset);
-    Points->relabelNodes(newNode, offset);
-}
-
-void Mesh::resolveNodeIds()
-{
-    // Initially the element nodes refer to the numbering defined by the global
-    // id assigned to the nodes in the NodeFile. It is also not ensured that
-    // all nodes referred by an element are actually available on the process.
-    // At the output, a local node labeling is used and all nodes are
-    // available. In particular the numbering of the element nodes is between
-    // 0 and NodeFile->numNodes.
-    // The function does not create a distribution of the degrees of freedom.
-
-    // find the minimum and maximum id used by elements
-    index_t min_id=std::numeric_limits<index_t>::max();
-    index_t max_id=std::numeric_limits<index_t>::min();
-    std::pair<index_t,index_t> range(Elements->getNodeRange());
-    max_id=std::max(max_id,range.second);
-    min_id=std::min(min_id,range.first);
-    range=FaceElements->getNodeRange();
-    max_id=std::max(max_id,range.second);
-    min_id=std::min(min_id,range.first);
-    range=ContactElements->getNodeRange();
-    max_id=std::max(max_id,range.second);
-    min_id=std::min(min_id,range.first);
-    range=Points->getNodeRange();
-    max_id=std::max(max_id,range.second);
-    min_id=std::min(min_id,range.first);
-#ifdef Finley_TRACE
-    index_t global_min_id, global_max_id;
-#ifdef ESYS_MPI
-    index_t id_range[2], global_id_range[2];
-    id_range[0]=-min_id;
-    id_range[1]=max_id;
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, MPIInfo->comm);
-    global_min_id=-global_id_range[0];
-    global_max_id=global_id_range[1];
-#else
-    global_min_id=min_id;
-    global_max_id=max_id;
-#endif
-    printf("Node id range used by elements is %d:%d\n",global_min_id,global_max_id);
-#endif
-    if (min_id>max_id) {
-        max_id=-1;
-        min_id=0;
-    }
-  
-    // allocate mappings for new local node labeling to global node labeling
-    // (newLocalToGlobalNodeLabels) and global node labeling to the new local
-    // node labeling (globalToNewLocalNodeLabels[i-min_id] is the new local id
-    // of global node i)
-    index_t len=(max_id>=min_id) ? max_id-min_id+1 : 0;
-
-    // mark the nodes referred by elements in usedMask
-    std::vector<short> usedMask(len, -1);
-    markNodes(usedMask, min_id, false);
-
-    // create a local labeling newLocalToGlobalNodeLabels of the local nodes
-    // by packing the mask usedMask
-    std::vector<index_t> newLocalToGlobalNodeLabels=util::packMask(usedMask);
-    const dim_t newNumNodes=newLocalToGlobalNodeLabels.size();
-    usedMask.clear();
-
-    // invert the new labeling and shift the index newLocalToGlobalNodeLabels
-    // to global node ids
-    std::vector<index_t> globalToNewLocalNodeLabels(len, -1);
-
-#pragma omp parallel for
-    for (index_t n=0; n<newNumNodes; n++) {
-#ifdef BOUNDS_CHECK
-        if (newLocalToGlobalNodeLabels[n] >= len || newLocalToGlobalNodeLabels[n] < 0) {
-            printf("BOUNDS_CHECK %s %d n=%d\n", __FILE__, __LINE__, n);
-            exit(1);
-        }
-#endif
-        globalToNewLocalNodeLabels[newLocalToGlobalNodeLabels[n]]=n;
-        newLocalToGlobalNodeLabels[n]+=min_id;
-    }
-
-    // create a new table
-    NodeFile *newNodeFile=new NodeFile(getDim(), MPIInfo);
-    if (noError()) {
-        newNodeFile->allocTable(newNumNodes);
-    }
-    if (noError()) {
-        if (len)
-            newNodeFile->gather_global(&newLocalToGlobalNodeLabels[0], Nodes);
-        else
-            newNodeFile->gather_global(NULL, Nodes);
-    }
-    if (noError()) {
-        delete Nodes;
-        Nodes=newNodeFile;
-        // relabel nodes of the elements
-        relabelElementNodes(globalToNewLocalNodeLabels, min_id);
-    } else
-        throw FinleyAdapterException("Errors occurred during node resolution");
-}
-
-/// sets new coordinates for the nodes
-void Mesh::setCoordinates(const escript::Data& newX)
-{
-    Nodes->setCoordinates(newX);
-}
-
-void Mesh::addTagMap(const char* name, int tag_key) 
-{
-   tagMap[std::string(name)]=tag_key;
-}
-
-int Mesh::getTag(const char* name) const
-{
-    TagMap::const_iterator it = tagMap.find(name);
-    if (it == tagMap.end()) {
-        std::stringstream ss;
-        ss << "getTag: unknown tag name " << name << ".";
-        const std::string errorMsg(ss.str());
-        setError(VALUE_ERROR, errorMsg.c_str());
-        return -1;
-    }
-    return it->second;
-}
-
-bool Mesh::isValidTagName(const char* name) const
-{
-   return (tagMap.count(std::string(name)) > 0);
-}
-
-} // namespace finley
-
diff --git a/finley/src/Mesh.h b/finley/src/Mesh.h
deleted file mode 100644
index 8eb788e..0000000
--- a/finley/src/Mesh.h
+++ /dev/null
@@ -1,189 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#ifndef __FINLEY_MESH_H__
-#define __FINLEY_MESH_H__
-
-/****************************************************************************
-
-   Finley: Mesh
-
-   A mesh is built from nodes and elements which are describing the
-   domain, the surface and point sources (the latter are needed to
-   establish links with other codes, in particular to particle
-   codes). The nodes are stored in a NodeFile and elements in an
-   ElementFile. Four ElementFiles containing the elements
-   describe the domain, surface, contact and point sources, respectively.
-   Notice that the surface elements do not necessarily cover the entire
-   surface of the domain.
-
-   The element type is fixed by the reference element, see
-   ReferenceElement.h. The numbering of the nodes starts with 0.
-
-   Important: it is assumed that every node appears in at least
-   one element or surface element and that any node used in an
-   element, surface element or as a point is specified in the
-   NodeFile, see also resolveNodeIds.
-
-   In some cases it is useful to refer to a mesh entirely built from
-   order 1 (=linear) elements. The linear version of the mesh can be
-   accessed by referring to the first few nodes of each element
-   (thanks to the way the nodes are ordered). As the numbering of
-   these nodes is not continuous a relabeling vector is introduced
-   in the NodeFile. This feature is not fully implemented yet.
-
-   All nodes and elements are tagged. The tag allows to group nodes and
-   elements. A typical application is to mark surface elements on a
-   certain portion of the domain with the same tag. All these surface
-   elements can then be assigned the same value e.g. for the pressure.
-
-   The spatial dimensionality is determined by the type of elements
-   used and can be queried using getDim(). Notice that the element type
-   also determines the type of surface elements to be used.
-
-*****************************************************************************/
-
-#include "Finley.h"
-#include "NodeFile.h"
-#include "ElementFile.h"
-#include "Util.h"
-#include "paso/SystemMatrixPattern.h"
-
-#include <map>
-#include <string>
-
-namespace escript {
-    class Data;
-}
-
-namespace finley {
-
-typedef std::map<std::string, int> TagMap;
-
-/****************************************************************************/
-
-class Mesh
-{
-public:
-    Mesh(const std::string name, int numDim, esysUtils::JMPI& mpi_info);
-    ~Mesh();
-
-    static Mesh* load(esysUtils::JMPI& mpi_info, const std::string fname);
-    static Mesh* read(esysUtils::JMPI& mpi_info, const std::string fname,
-                      int order, int reducedOrder, bool optimize);
-    static Mesh* readGmsh(esysUtils::JMPI& mpi_info, const std::string fname,
-                          int numDim, int order, int reducedOrder,
-                          bool optimize, bool useMacroElements);
-
-    void write(const std::string fname) const;
-
-    int getDim() const { return Nodes->numDim; }
-    int getStatus() const { return Nodes->status; }
-
-    void addPoints(int numPoints, const double *points_ptr, const int *tags_ptr);
-    void addTagMap(const char* name, int tag_key);
-    int getTag(const char* name) const;
-    bool isValidTagName(const char* name) const;
-    paso::SystemMatrixPattern_ptr getPattern(bool reduce_row_order, bool reduce_col_order);
-    paso::SystemMatrixPattern_ptr makePattern(bool reduce_row_order, bool reduce_col_order);
-    void printInfo(bool);
-
-    void setCoordinates(const escript::Data& newX);
-    void setElements(ElementFile *elements);
-    void setFaceElements(ElementFile *elements);
-    void setContactElements(ElementFile *elements);
-    void setPoints(ElementFile *elements);
-
-    void prepare(bool optimize);
-    void resolveNodeIds();
-    void createMappings(const std::vector<index_t>& dofDistribution,
-                        const std::vector<index_t>& nodeDistribution);
-    void markDOFsConnectedToRange(int* mask, int offset, int marker,
-                                  index_t firstDOF, index_t lastDOF, bool useLinear);
-    
-    void relabelElementNodes(const std::vector<index_t>&, index_t offset);
-
-    void glueFaces(double safetyFactor, double tolerance, bool);
-    void joinFaces(double safetyFactor, double tolerance, bool);
-
-    void findMatchingFaces(double, double, int*, int*, int*, int*);
-    void print();
-
-private:
-    void createColoring(const std::vector<index_t>& dofMap);
-    void distributeByRankOfDOF(const std::vector<index_t>& distribution);
-    void markNodes(std::vector<short>& mask, int offset, bool useLinear);
-    void optimizeDOFDistribution(std::vector<index_t>& distribution);
-    void optimizeDOFLabeling(const std::vector<index_t>& distribution);
-    void optimizeElementOrdering();
-    void setOrders();
-    void updateTagList();
-    void printElementInfo(const ElementFile* e, const std::string title,
-                          const std::string defaultType, bool full) const;
-
-    void writeElementInfo(std::ostream& stream, const ElementFile* e,
-                          const std::string defaultType) const;
-
-    static Mesh* readGmshSlave(esysUtils::JMPI& mpi_info, const std::string fname,
-                               int numDim, int order, int reducedOrder,
-                               bool optimize, bool useMacroElements);
-    static Mesh* readGmshMaster(esysUtils::JMPI& mpi_info, const std::string fname,
-                                int numDim, int order, int reducedOrder,
-                                bool optimize, bool useMacroElements);
-
-public:
-    // the name of the mesh
-    std::string m_name;
-    int approximationOrder;
-    int reducedApproximationOrder;
-    int integrationOrder;
-    int reducedIntegrationOrder;
-    // the table of the nodes
-    NodeFile* Nodes;
-    // the table of the elements
-    ElementFile* Elements;
-    // the table of the face elements
-    ElementFile* FaceElements;
-    // the table of the contact elements
-    ElementFile* ContactElements;
-    // the table of points (treated as elements of dimension 0)
-    ElementFile* Points;
-    // the tag map mapping names to tag keys
-    TagMap tagMap;
-
-    // pointers to the sparse matrix patterns
-    paso::SystemMatrixPattern_ptr FullFullPattern;
-    paso::SystemMatrixPattern_ptr FullReducedPattern;
-    paso::SystemMatrixPattern_ptr ReducedFullPattern;
-    paso::SystemMatrixPattern_ptr ReducedReducedPattern;
-    esysUtils::JMPI MPIInfo;
-};
-
-// this structure is used for matching surface elements
-struct FaceCenter
-{
-   int refId;
-   std::vector<double> x;
-};
-
-
-Mesh* Mesh_merge(const std::vector<Mesh*>& meshes);
-
-
-} // namespace finley
-
-#endif // __FINLEY_MESH_H__
-
diff --git a/finley/src/Mesh_addPoints.cpp b/finley/src/Mesh_addPoints.cpp
index adb6817..6943198 100644
--- a/finley/src/Mesh_addPoints.cpp
+++ b/finley/src/Mesh_addPoints.cpp
@@ -21,22 +21,22 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include "FinleyDomain.h"
 
+#include <escript/index.h>
 
-#include "Mesh.h"
+using escript::ValueError;
 
 namespace finley {
 
 #ifdef ESYS_MPI
-void MPI_minimizeDistance(void *invec_p, void *inoutvec_p, int *len,
-                           MPI_Datatype *dtype)
+void MPI_minimizeDistance(void* invec_p, void* inoutvec_p, int *len,
+                          MPI_Datatype* dtype)
 {
-    const int numPoints = (*len)/2;
-    double *invec = reinterpret_cast<double*>(invec_p);
-    double *inoutvec = reinterpret_cast<double*>(inoutvec_p);
-    for (int i=0; i<numPoints; i++) {
+    const int numPoints = (*len) / 2;
+    double* invec = reinterpret_cast<double*>(invec_p);
+    double* inoutvec = reinterpret_cast<double*>(inoutvec_p);
+    for (int i = 0; i < numPoints; i++) {
         if (invec[2*i] < inoutvec[2*i]) {
             inoutvec[2*i] = invec[2*i];
             inoutvec[2*i+1] = invec[2*i+1];
@@ -45,51 +45,70 @@ void MPI_minimizeDistance(void *invec_p, void *inoutvec_p, int *len,
 }
 #endif
 
-void Mesh::addPoints(int numPoints, const double* points_ptr,
-                     const int* tags_ptr)
+void FinleyDomain::addDiracPoints(const std::vector<double>& points,
+                                  const std::vector<int>& tags)
 {
-    if (numPoints==0) {
-        return;
+    const int numDim = getDim();
+    if (points.size() % numDim != 0) {
+        std::stringstream ss;
+        unsigned long size = points.size();
+        ss << "Number of coordinates in dirac tags is " << size
+           << " - this should be a multiple of the domain's dimensionality "
+           << numDim;
+        throw ValueError(ss.str());
     }
-    ElementFile *oldPoints=Points;
+
+    // points will be flattened
+    int numPoints = points.size() / numDim;
+    int numTags = tags.size();
+
+    if (numPoints != numTags)
+	   throw ValueError("Number of dirac tags must match number of dirac points.");
+
+    if (numPoints == 0)
+        return;
+
+    const escript::DataTypes::real_t LARGE_POSITIVE_FLOAT =
+                                         escript::DataTypes::real_t_max();
+    ElementFile* oldPoints = m_points;
     const_ReferenceElementSet_ptr refPoints;
     int numOldPoints;
     if (oldPoints == NULL) {
         refPoints.reset(new ReferenceElementSet(Point1, integrationOrder,
                     reducedIntegrationOrder));
-        numOldPoints=0;
+        numOldPoints = 0;
     } else {
-        refPoints=oldPoints->referenceElementSet;
-        numOldPoints=oldPoints->numElements;
+        refPoints = oldPoints->referenceElementSet;
+        numOldPoints = oldPoints->numElements;
     }
-    ElementFile *newPoints=new ElementFile(refPoints, MPIInfo);
+    ElementFile* newPoints = new ElementFile(refPoints, m_mpiInfo);
 
     // first we find the node which is the closest on this processor:
     double *dist_p = new double[numPoints];
     int *node_id_p = new int[numPoints];
-    int *point_index_p = new int[numPoints];    // the code below does actually initialise this before using it
+    int *point_index_p = new int[numPoints];
+    // the code below does actually initialise this before using it
 
-    for (int i=0; i<numPoints; ++i) {
-        dist_p[i]=LARGE_POSITIVE_FLOAT;
-        node_id_p[i]=-1;
+    for (int i = 0; i < numPoints; ++i) {
+        dist_p[i] = LARGE_POSITIVE_FLOAT;
+        node_id_p[i] = -1;
     }
 
-    const double *coords = Nodes->Coordinates;
-    const int numDim = getDim();
+    const double* coords = m_nodes->Coordinates;
     if (numDim == 3) {
 #pragma omp parallel
         {
-            for (int i=0; i<numPoints; ++i) {
-                const double X0=points_ptr[INDEX2(0,i,numDim)];
-                const double X1=points_ptr[INDEX2(1,i,numDim)];
-                const double X2=points_ptr[INDEX2(2,i,numDim)];
-                double dist_local=LARGE_POSITIVE_FLOAT;
-                int node_id_local=-1;
-#pragma omp for schedule(static)
-                for (int n=0; n<Nodes->numNodes; n++) {
-                    const double D0=coords[INDEX2(0,n,numDim)] - X0;
-                    const double D1=coords[INDEX2(1,n,numDim)] - X1;
-                    const double D2=coords[INDEX2(2,n,numDim)] - X2;
+            for (int i = 0; i < numPoints; ++i) {
+                const double X0 = points[INDEX2(0,i,numDim)];
+                const double X1 = points[INDEX2(1,i,numDim)];
+                const double X2 = points[INDEX2(2,i,numDim)];
+                double dist_local = LARGE_POSITIVE_FLOAT;
+                int node_id_local = -1;
+#pragma omp for
+                for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+                    const double D0 = coords[INDEX2(0,n,numDim)] - X0;
+                    const double D1 = coords[INDEX2(1,n,numDim)] - X1;
+                    const double D2 = coords[INDEX2(2,n,numDim)] - X2;
                     const double d = D0*D0 + D1*D1 + D2*D2;
                     if (d < dist_local) {
                         dist_local = d;
@@ -98,7 +117,7 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
                 }
 #pragma omp critical
                 {
-                    if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) {
+                    if (dist_local < dist_p[i] || (dist_local == dist_p[i] && node_id_p[i]>node_id_local)) {
                         dist_p[i] = dist_local;
                         node_id_p[i] = node_id_local;
                     }
@@ -108,15 +127,15 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
     } else if (numDim == 2) {
 #pragma omp parallel
         {
-            for (int i=0; i<numPoints; ++i) {
-                const double X0=points_ptr[INDEX2(0,i,numDim)];
-                const double X1=points_ptr[INDEX2(1,i,numDim)];
-                double dist_local=LARGE_POSITIVE_FLOAT;
-                int node_id_local=-1;
-#pragma omp for schedule(static)
-                for (int n=0; n<Nodes->numNodes; n++) {
-                    const double D0=coords[INDEX2(0,n,numDim)] - X0;
-                    const double D1=coords[INDEX2(1,n,numDim)] - X1;
+            for (int i = 0; i < numPoints; ++i) {
+                const double X0 = points[INDEX2(0,i,numDim)];
+                const double X1 = points[INDEX2(1,i,numDim)];
+                double dist_local = LARGE_POSITIVE_FLOAT;
+                int node_id_local = -1;
+#pragma omp for
+                for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+                    const double D0 = coords[INDEX2(0,n,numDim)] - X0;
+                    const double D1 = coords[INDEX2(1,n,numDim)] - X1;
                     const double d = D0*D0 + D1*D1;
                     if (d < dist_local) {
                         dist_local = d;
@@ -125,7 +144,7 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
                 }
 #pragma omp critical
                 {
-                  if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) {
+                  if (dist_local < dist_p[i] || (dist_local == dist_p[i] && node_id_p[i]>node_id_local)) {
                       dist_p[i] = dist_local;
                       node_id_p[i] = node_id_local;
                   }
@@ -135,13 +154,13 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
     } else { // numDim==1
 #pragma omp parallel
         {
-            for (int i=0; i<numPoints; ++i) {
-                const double X0=points_ptr[INDEX2(0,i,numDim)];
-                double dist_local=LARGE_POSITIVE_FLOAT;
-                int node_id_local=-1;
-#pragma omp for schedule(static)
-                for (int n=0; n<Nodes->numNodes; n++) {
-                    const double D0=coords[INDEX2(0,n,numDim)] - X0;
+            for (int i = 0; i < numPoints; ++i) {
+                const double X0 = points[INDEX2(0,i,numDim)];
+                double dist_local = LARGE_POSITIVE_FLOAT;
+                int node_id_local = -1;
+#pragma omp for
+                for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+                    const double D0 = coords[INDEX2(0,n,numDim)] - X0;
                     const double d = D0*D0;
                     if (d < dist_local) {
                         dist_local = d;
@@ -150,7 +169,7 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
                 }
 #pragma omp critical
                 {
-                    if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) {
+                    if (dist_local < dist_p[i] || (dist_local == dist_p[i] && node_id_p[i]>node_id_local)) {
                         dist_p[i] = dist_local;
                         node_id_p[i] = node_id_local;
                     }
@@ -162,22 +181,22 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
 #ifdef ESYS_MPI
     // now we need to reduce this across all processors
     const int count = 2*numPoints;
-    double *sendbuf=new double[count];
-    double *recvbuf=new double[count];
+    double *sendbuf = new double[count];
+    double *recvbuf = new double[count];
 
-    for (int i=0; i<numPoints; ++i) {
-        sendbuf[2*i  ]=dist_p[i];
-        sendbuf[2*i+1]=static_cast<double>(Nodes->Id[node_id_p[i]]);
+    for (int i = 0; i < numPoints; ++i) {
+        sendbuf[2*i  ] = dist_p[i];
+        sendbuf[2*i+1] = static_cast<double>(m_nodes->Id[node_id_p[i]]);
     }
     MPI_Op op;
     MPI_Op_create(MPI_minimizeDistance, true, &op);
-    MPI_Allreduce(sendbuf, recvbuf, count, MPI_DOUBLE, op, MPIInfo->comm);
+    MPI_Allreduce(sendbuf, recvbuf, count, MPI_DOUBLE, op, m_mpiInfo->comm);
     MPI_Op_free(&op);
     // if the node id has changed we found another node which is closer
     // elsewhere
-    for (int i=0; i<numPoints; ++i) {
+    for (int i = 0; i < numPoints; ++i) {
         const int best_fit_Id = static_cast<int>(recvbuf[2*i+1]+0.5);
-        if (best_fit_Id != Nodes->Id[node_id_p[i]]) {
+        if (best_fit_Id != m_nodes->Id[node_id_p[i]]) {
             node_id_p[i] = -1;
         }
     }
@@ -187,22 +206,22 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
     delete[] dist_p;
 
     // we pick the points to be used on this processor
-    int numNewPoints=0;
-    const int firstDOF=Nodes->degreesOfFreedomDistribution->getFirstComponent();
-    const int lastDOF=Nodes->degreesOfFreedomDistribution->getLastComponent();
+    int numNewPoints = 0;
+    const index_t firstDOF = m_nodes->degreesOfFreedomDistribution->getFirstComponent();
+    const index_t lastDOF = m_nodes->degreesOfFreedomDistribution->getLastComponent();
 
-    for (int i=0; i<numPoints; ++i) {
-        if (node_id_p[i]>-1) {
+    for (int i = 0; i < numPoints; ++i) {
+        if (node_id_p[i] > -1) {
             // this processor uses a node which is identical to point i
-            if (Nodes->globalReducedDOFIndex[node_id_p[i]] > -1) {
+            if (m_nodes->globalReducedDOFIndex[node_id_p[i]] > -1) {
                 // the point is also used in the reduced mesh
-                const int global_id=Nodes->globalDegreesOfFreedom[node_id_p[i]];
-                if (firstDOF<=global_id && global_id<lastDOF) {
+                const index_t global_id = m_nodes->globalDegreesOfFreedom[node_id_p[i]];
+                if (firstDOF <= global_id && global_id < lastDOF) {
                     // is this point actually relevant
-                    bool notAnOldPoint=true;
+                    bool notAnOldPoint = true;
                     if (numOldPoints > 0) {
                         // is this point already in the Point table?
-                        for (int k=0; k<numOldPoints; ++k) {
+                        for (int k = 0; k < numOldPoints; ++k) {
                             if (global_id == oldPoints->Nodes[k]) {
                                 notAnOldPoint=false;
                                 break;
@@ -211,15 +230,15 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
                     }
                     if (notAnOldPoint) {
                         // is this point unique in the new list of points?
-                        bool notANewPoint=true;
-                        for (int k=0; k<numNewPoints; ++k) {
-                            if (global_id == Nodes->globalDegreesOfFreedom[node_id_p[point_index_p[k]]]) {
-                                notANewPoint=false;
+                        bool notANewPoint = true;
+                        for (int k = 0; k < numNewPoints; ++k) {
+                            if (global_id == m_nodes->globalDegreesOfFreedom[node_id_p[point_index_p[k]]]) {
+                                notANewPoint = false;
                                 break;
                             }
                         }
                         if (notANewPoint) {
-                            point_index_p[numNewPoints]=i;
+                            point_index_p[numNewPoints] = i;
                             numNewPoints++;
                         }
                     }
@@ -232,35 +251,31 @@ void Mesh::addPoints(int numPoints, const double* points_ptr,
     newPoints->allocTable(numOldPoints+numNewPoints);
     if (numOldPoints > 0) {
 #pragma omp parallel for schedule(static)
-        for (int n=0; n<numOldPoints; n++) {
-            newPoints->Owner[n]=oldPoints->Owner[n];
-            newPoints->Id[n]   =oldPoints->Id[n];
-            newPoints->Tag[n]  =oldPoints->Tag[n];
-            newPoints->Nodes[n]=oldPoints->Nodes[n];
-            newPoints->Color[n]=0;
+        for (int n = 0; n < numOldPoints; n++) {
+            newPoints->Owner[n] = oldPoints->Owner[n];
+            newPoints->Id[n]    = oldPoints->Id[n];
+            newPoints->Tag[n]   = oldPoints->Tag[n];
+            newPoints->Nodes[n] = oldPoints->Nodes[n];
+            newPoints->Color[n] = 0;
         }
     }
 #pragma omp parallel for schedule(static)
-    for (int n=0; n<numNewPoints; n++) {
+    for (int n = 0; n < numNewPoints; n++) {
         const int idx = point_index_p[n];
-        newPoints->Owner[numOldPoints+n]=MPIInfo->rank;
-        newPoints->Id[numOldPoints+n]   =0;
-        newPoints->Tag[numOldPoints+n]  =tags_ptr[idx];
-        newPoints->Nodes[numOldPoints+n]=node_id_p[idx];
-        newPoints->Color[numOldPoints+n]=0;
+        newPoints->Owner[numOldPoints+n] = m_mpiInfo->rank;
+        newPoints->Id[numOldPoints+n]    = 0;
+        newPoints->Tag[numOldPoints+n]   = tags[idx];
+        newPoints->Nodes[numOldPoints+n] = node_id_p[idx];
+        newPoints->Color[numOldPoints+n] = 0;
     }
-    newPoints->minColor=0;
-    newPoints->maxColor=0;
+    newPoints->minColor = 0;
+    newPoints->maxColor = 0;
 
     // all done, clean up
     delete[] node_id_p;
     delete[] point_index_p;
-    if (noError()) {
-        delete oldPoints;
-        Points=newPoints;
-    } else {
-        delete newPoints;
-    }
+    delete oldPoints;
+    m_points = newPoints;
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_findMatchingFaces.cpp b/finley/src/Mesh_findMatchingFaces.cpp
index a1ae16e..1c6862e 100644
--- a/finley/src/Mesh_findMatchingFaces.cpp
+++ b/finley/src/Mesh_findMatchingFaces.cpp
@@ -17,28 +17,35 @@
 
 /****************************************************************************
 
-  Finley: Mesh
+  Finley: Domain
 
   searches for faces in the mesh which are matching.
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include "FinleyDomain.h"
 #include "Util.h"
-#include "Mesh.h"
+
+#include <escript/index.h>
 
 namespace finley {
 
-static double lockingGridSize=0.;
+static double lockingGridSize = 0.;
+
+// this structure is used for matching surface elements
+struct FaceCenter
+{
+   int refId;
+   std::vector<double> x;
+};
+
 
 /// comparison function for findMatchingFaces
 bool FaceCenterCompare(const FaceCenter& e1, const FaceCenter& e2)
 {
-    for (int i=0; i<e1.x.size(); i++) {
-        bool l=(e1.x[i] < e2.x[i]+lockingGridSize);
-        bool g=(e2.x[i] < e1.x[i]+lockingGridSize);
+    for (int i = 0; i < e1.x.size(); i++) {
+        bool l = (e1.x[i] < e2.x[i]+lockingGridSize);
+        bool g = (e2.x[i] < e1.x[i]+lockingGridSize);
         if (! (l && g)) {
             if (l) return true;
             if (g) return false;
@@ -50,62 +57,62 @@ bool FaceCenterCompare(const FaceCenter& e1, const FaceCenter& e2)
 inline double getDist(int e0, int i0, int e1, int i1, int numDim, int NN,
                       const double* X)
 {
-    double dist=0.;
-    for (int i=0; i<numDim; i++) {
-        dist=std::max(dist, std::abs(X[INDEX3(i, i0, e0, numDim, NN)]
+    double dist = 0.;
+    for (int i = 0; i < numDim; i++) {
+        dist = std::max(dist, std::abs(X[INDEX3(i, i0, e0, numDim, NN)]
                     - X[INDEX3(i, i1, e1, numDim, NN)]));
     }
     return dist;
 }
 
-void Mesh::findMatchingFaces(double safety_factor, double tolerance,
-                             int* numPairs, int* elem0, int* elem1,
-                             int* matching_nodes_in_elem1)
+void FinleyDomain::findMatchingFaces(double safety_factor, double tolerance,
+                                     int* numPairs, int* elem0, int* elem1,
+                                     int* matching_nodes_in_elem1) const
 {
-    const_ReferenceElement_ptr refElement(FaceElements->referenceElementSet->
+    const_ReferenceElement_ptr refElement(m_faceElements->referenceElementSet->
                                             borrowReferenceElement(false));
-    const int numDim=Nodes->numDim;
-    const int NN=FaceElements->numNodes;
-    const int numNodesOnFace=refElement->Type->numNodesOnFace;
-    const int* faceNodes=refElement->Type->faceNodes;
-    const int* shiftNodes=refElement->Type->shiftNodes;
-    const int* reverseNodes=refElement->Type->reverseNodes;
+    const int numDim = m_nodes->numDim;
+    const int NN = m_faceElements->numNodes;
+    const int numNodesOnFace = refElement->Type->numNodesOnFace;
+    const int* faceNodes = refElement->Type->faceNodes;
+    const int* shiftNodes = refElement->Type->shiftNodes;
+    const int* reverseNodes = refElement->Type->reverseNodes;
 
     if (numNodesOnFace <= 0) {
-        char error_msg[LenErrorMsg_MAX];
-        sprintf(error_msg, "Mesh::findMatchingFaces: matching faces cannot be applied to face elements of type %s",refElement->Type->Name);
-        setError(TYPE_ERROR, error_msg);
-        return;
+        std::stringstream ss;
+        ss << "Mesh::findMatchingFaces: matching faces cannot be applied to "
+            "face elements of type " << refElement->Type->Name;
+        throw escript::ValueError(ss.str());
     }
-    double* X = new double[NN*numDim*FaceElements->numElements];
-    std::vector<FaceCenter> center(FaceElements->numElements);
-    int* a1=new int[NN];
-    int* a2=new int[NN];
-    double h=std::numeric_limits<double>::max();
+    double* X = new double[NN * numDim * m_faceElements->numElements];
+    std::vector<FaceCenter> center(m_faceElements->numElements);
+    int* a1 = new int[NN];
+    int* a2 = new int[NN];
+    double h = std::numeric_limits<double>::max();
 
     // TODO: OMP
-    for (int e=0; e<FaceElements->numElements; e++) {
+    for (index_t e = 0; e < m_faceElements->numElements; e++) {
         // get the coordinates of the nodes
-        util::gather(NN, &(FaceElements->Nodes[INDEX2(0,e,NN)]), numDim,
-                     Nodes->Coordinates, &X[INDEX3(0,0,e,numDim,NN)]);
+        util::gather(NN, &(m_faceElements->Nodes[INDEX2(0,e,NN)]), numDim,
+                     m_nodes->Coordinates, &X[INDEX3(0,0,e,numDim,NN)]);
         // get the element center
-        center[e].refId=e;
+        center[e].refId = e;
         center[e].x.assign(numDim, 0);
-        for (int i0=0; i0<numNodesOnFace; i0++) {
-            for (int i=0; i<numDim; i++)
+        for (int i0 = 0; i0 < numNodesOnFace; i0++) {
+            for (int i = 0; i < numDim; i++)
                 center[e].x[i] += X[INDEX3(i,faceNodes[i0],e,numDim,NN)];
         }
-        for (int i=0; i<numDim; i++)
-            center[e].x[i]/=numNodesOnFace;
+        for (int i = 0; i < numDim; i++)
+            center[e].x[i] /= numNodesOnFace;
         // get the minimum distance between nodes in the element
-        for (int i0=0; i0<numNodesOnFace; i0++) {
-            for (int i1=i0+1; i1<numNodesOnFace; i1++) {
-                double h_local=getDist(e, faceNodes[i0], e, faceNodes[i1], numDim, NN, X);
-                h=std::min(h, h_local);
+        for (int i0 = 0; i0 < numNodesOnFace; i0++) {
+            for (int i1 = i0+1; i1 < numNodesOnFace; i1++) {
+                double h_local = getDist(e, faceNodes[i0], e, faceNodes[i1], numDim, NN, X);
+                h = std::min(h, h_local);
             }
         }
     }
-    lockingGridSize=h*std::max(safety_factor, 0.);
+    lockingGridSize = h*std::max(safety_factor, 0.);
 #ifdef Finley_TRACE
     printf("locking grid size is %e\n", lockingGridSize);
     printf("absolute tolerance is %e.\n", h * tolerance);
@@ -113,101 +120,101 @@ void Mesh::findMatchingFaces(double safety_factor, double tolerance,
     // sort the elements by center coordinates (lexicographical)
     std::sort(center.begin(), center.end(), FaceCenterCompare);
     // find elements with matching center
-    *numPairs=0;
+    *numPairs = 0;
 
     // TODO: OMP
-    for (int e=0; e<FaceElements->numElements-1 && noError(); e++) {
-        double dist=0.;
-        for (int i=0; i<numDim; i++)
-            dist=std::max(dist, std::abs(center[e].x[i]-center[e+1].x[i]));
+    for (index_t e = 0; e < m_faceElements->numElements-1; e++) {
+        double dist = 0.;
+        for (int i = 0; i < numDim; i++)
+            dist = std::max(dist, std::abs(center[e].x[i]-center[e+1].x[i]));
         if (dist < h * tolerance) {
-            const int e_0=center[e].refId;
-            const int e_1=center[e+1].refId;
-            elem0[*numPairs]=e_0;
-            elem1[*numPairs]=e_1;
+            const int e_0 = center[e].refId;
+            const int e_1 = center[e+1].refId;
+            elem0[*numPairs] = e_0;
+            elem1[*numPairs] = e_1;
             // now the element e_1 is rotated such that the first node in
             // element e_0 and e_1 have the same coordinates
-            int* perm=a1;
-            int* perm_tmp=a2;
-            for (int i=0; i<NN; i++)
-                perm[i]=i;
-            while (noError()) {
+            int* perm = a1;
+            int* perm_tmp = a2;
+            for (int i = 0; i < NN; i++)
+                perm[i] = i;
+            while (1) {
                 // if node 0 and perm[0] are the same we are ready
-                dist=getDist(e_0, 0, e_1, perm[0], numDim, NN, X);
+                dist = getDist(e_0, 0, e_1, perm[0], numDim, NN, X);
                 if (dist <= h*tolerance)
                     break;
-                if (shiftNodes[0]>=0) {
+                if (shiftNodes[0] >= 0) {
                     // rotate the nodes
-                    int* itmp_ptr=perm;
-                    perm=perm_tmp;
-                    perm_tmp=itmp_ptr;
+                    int* itmp_ptr = perm;
+                    perm = perm_tmp;
+                    perm_tmp = itmp_ptr;
                     #pragma ivdep
-                    for (int i=0; i<NN; i++)
-                        perm[i]=perm_tmp[shiftNodes[i]];
+                    for (int i = 0; i < NN; i++)
+                        perm[i] = perm_tmp[shiftNodes[i]];
                 }
                 // if the permutation is back at the identity, i.e. perm[0]=0,
                 // the faces don't match:
-                if (perm[0]==0) {
-                    char error_msg[LenErrorMsg_MAX];
-                    sprintf(error_msg, "Mesh_findMatchingFaces: couldn't match first node of element %d to touching element %d", e_0, e_1);
-                    setError(VALUE_ERROR, error_msg);
+                if (perm[0] == 0) {
+                    std::stringstream ss;
+                    ss << "Mesh::findMatchingFaces: couldn't match first node "
+                        "of element " << e_0 << " to touching element " << e_1;
+                    throw escript::ValueError(ss.str());
                 }
             }
             // now we check if the second nodes match
-            if (noError()) {
-                if (numNodesOnFace > 1) {
-                    dist=getDist(e_0, 1, e_1, perm[faceNodes[1]], numDim, NN, X);
-                    // if the second node does not match we reverse the
-                    // direction of the nodes
-                    if (dist > h*tolerance) {
-                        // rotate the nodes
-                        if (reverseNodes[0] < 0) {
-                            char error_msg[LenErrorMsg_MAX];
-                            sprintf(error_msg, "Mesh_findMatchingFaces: couldn't match the second node of element %d to touching element %d", e_0, e_1);
-                            setError(VALUE_ERROR, error_msg);
-                        } else {
-                            int* itmp_ptr=perm;
-                            perm=perm_tmp;
-                            perm_tmp=itmp_ptr;
-                            #pragma ivdep
-                            for (int i=0; i<NN; i++)
-                                perm[i]=perm_tmp[reverseNodes[i]];
-                            dist=getDist(e_0, 1, e_1, perm[faceNodes[1]], numDim, NN, X);
-                            if (dist > h*tolerance) {
-                                char error_msg[LenErrorMsg_MAX];
-                                sprintf(error_msg, "Mesh_findMatchingFaces: couldn't match the second node of element %d to touching element %d", e_0, e_1);
-                                setError(VALUE_ERROR, error_msg);
-                            }
+            if (numNodesOnFace > 1) {
+                dist = getDist(e_0, 1, e_1, perm[faceNodes[1]], numDim, NN, X);
+                // if the second node does not match we reverse the
+                // direction of the nodes
+                if (dist > h*tolerance) {
+                    // rotate the nodes
+                    if (reverseNodes[0] < 0) {
+                        std::stringstream ss;
+                        ss << "Mesh::findMatchingFaces: couldn't match the"
+                            " second node of element " << e_0
+                            << " to touching element " << e_1;
+                        throw escript::ValueError(ss.str());
+                    } else {
+                        int* itmp_ptr = perm;
+                        perm = perm_tmp;
+                        perm_tmp = itmp_ptr;
+                        #pragma ivdep
+                        for (int i = 0; i < NN; i++)
+                            perm[i] = perm_tmp[reverseNodes[i]];
+                        dist = getDist(e_0, 1, e_1, perm[faceNodes[1]], numDim, NN, X);
+                        if (dist > h*tolerance) {
+                            std::stringstream ss;
+                            ss << "Mesh::findMatchingFaces: couldn't match the"
+                                " second node of element " << e_0
+                                << " to touching element " << e_1;
+                            throw escript::ValueError(ss.str());
                         }
                     }
                 }
             }
             // we check if the rest of the face nodes match
-            if (noError()) {
-                for (int i=2; i<numNodesOnFace; i++) {
-                    const int n=faceNodes[i];
-                    dist=getDist(e_0, n, e_1, perm[n], numDim, NN, X);
-                    if (dist > h*tolerance) {
-                        char error_msg[LenErrorMsg_MAX];
-                        sprintf(error_msg, "Mesh_findMatchingFaces: couldn't match the %d-th node of element %d to touching element %d", i, e_0, e_1);
-                        setError(VALUE_ERROR, error_msg);
-                        break;
-                    }
+            for (int i = 2; i < numNodesOnFace; i++) {
+                const int n = faceNodes[i];
+                dist = getDist(e_0, n, e_1, perm[n], numDim, NN, X);
+                if (dist > h*tolerance) {
+                    std::stringstream ss;
+                    ss << "Mesh::findMatchingFaces: couldn't match the "
+                        << i << "-th node of element " << e_0
+                        << " to touching element " << e_1;
+                    throw escript::ValueError(ss.str());
                 }
             }
             // copy over the permuted nodes of e_1 into matching_nodes_in_elem1
-            if (noError()) {
-                for (int i=0; i<NN; i++)
-                    matching_nodes_in_elem1[INDEX2(i,*numPairs,NN)]=FaceElements->Nodes[INDEX2(perm[i],e_1,NN)];
-            }
+            for (int i = 0; i < NN; i++)
+                matching_nodes_in_elem1[INDEX2(i,*numPairs,NN)] =
+                    m_faceElements->Nodes[INDEX2(perm[i],e_1,NN)];
             (*numPairs)++;
         }
     }
 #ifdef Finley_TRACE
-    printf("number of pairs of matching faces %d\n",*numPairs);
+    printf("number of pairs of matching faces %d\n", *numPairs);
 #endif
 
-    /* clean up */
     delete[] X;
     delete[] a1;
     delete[] a2;
diff --git a/finley/src/Mesh_getPasoPattern.cpp b/finley/src/Mesh_getPasoPattern.cpp
new file mode 100644
index 0000000..49f584c
--- /dev/null
+++ b/finley/src/Mesh_getPasoPattern.cpp
@@ -0,0 +1,137 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifdef ESYS_HAVE_PASO
+#include "FinleyDomain.h"
+#include "IndexList.h"
+
+#include <boost/scoped_array.hpp>
+
+namespace finley {
+
+paso::SystemMatrixPattern_ptr FinleyDomain::getPasoPattern(
+                              bool reducedRowOrder, bool reducedColOrder) const
+{
+    paso::SystemMatrixPattern_ptr out;
+    // make sure that the requested pattern is available
+    if (reducedRowOrder) {
+        if (reducedColOrder) {
+            if (!ReducedReducedPattern)
+                ReducedReducedPattern = makePasoPattern(reducedRowOrder, reducedColOrder);
+        } else {
+            if (!ReducedFullPattern)
+                ReducedFullPattern = makePasoPattern(reducedRowOrder, reducedColOrder);
+        }
+    } else {
+        if (reducedColOrder) {
+            if (!FullReducedPattern)
+                FullReducedPattern = makePasoPattern(reducedRowOrder, reducedColOrder);
+        } else {
+            if (!FullFullPattern)
+                FullFullPattern = makePasoPattern(reducedRowOrder, reducedColOrder);
+        }
+    }
+    if (reducedRowOrder) {
+        if (reducedColOrder) {
+            out = ReducedReducedPattern;
+        } else {
+            out = ReducedFullPattern;
+        }
+    } else {
+        if (reducedColOrder) {
+            out = FullReducedPattern;
+        } else {
+            out = FullFullPattern;
+        }
+    }  
+    return out;
+}
+
+paso::SystemMatrixPattern_ptr FinleyDomain::makePasoPattern(
+                              bool reducedRowOrder, bool reducedColOrder) const
+{
+    paso::Connector_ptr col_connector, row_connector;
+    escript::Distribution_ptr colDistribution, rowDistribution;
+  
+    dim_t myNumColTargets, myNumRowTargets;
+    dim_t numColTargets, numRowTargets;
+    const index_t *colTarget, *rowTarget;
+
+    if (reducedColOrder) {
+        myNumColTargets = m_nodes->getNumReducedDegreesOfFreedom();
+        numColTargets = m_nodes->getNumReducedDegreesOfFreedomTargets();
+        colTarget = m_nodes->borrowTargetReducedDegreesOfFreedom();
+        colDistribution = m_nodes->reducedDegreesOfFreedomDistribution;
+        col_connector = m_nodes->reducedDegreesOfFreedomConnector;
+    } else {
+        myNumColTargets = m_nodes->getNumDegreesOfFreedom();
+        numColTargets = m_nodes->getNumDegreesOfFreedomTargets();
+        colTarget = m_nodes->borrowTargetDegreesOfFreedom();
+        colDistribution = m_nodes->degreesOfFreedomDistribution;
+        col_connector = m_nodes->degreesOfFreedomConnector;
+    }
+
+    if (reducedRowOrder) {
+        myNumRowTargets = m_nodes->getNumReducedDegreesOfFreedom();
+        numRowTargets = m_nodes->getNumReducedDegreesOfFreedomTargets();
+        rowTarget = m_nodes->borrowTargetReducedDegreesOfFreedom();
+        rowDistribution = m_nodes->reducedDegreesOfFreedomDistribution;
+        row_connector = m_nodes->reducedDegreesOfFreedomConnector;
+    } else {
+        myNumRowTargets = m_nodes->getNumDegreesOfFreedom();
+        numRowTargets = m_nodes->getNumDegreesOfFreedomTargets();
+        rowTarget = m_nodes->borrowTargetDegreesOfFreedom();
+        rowDistribution = m_nodes->degreesOfFreedomDistribution;
+        row_connector = m_nodes->degreesOfFreedomConnector;
+    }
+    boost::scoped_array<IndexList> index_list(new IndexList[numRowTargets]);
+  
+#pragma omp parallel
+    {
+        // insert contributions from element matrices into columns in indexlist
+        IndexList_insertElements(index_list.get(), m_elements, reducedRowOrder,
+                                 rowTarget, reducedColOrder, colTarget);
+        IndexList_insertElements(index_list.get(), m_faceElements,
+                                 reducedRowOrder, rowTarget, reducedColOrder,
+                                 colTarget);
+        IndexList_insertElements(index_list.get(), m_contactElements,
+                                 reducedRowOrder, rowTarget, reducedColOrder,
+                                 colTarget);
+        IndexList_insertElements(index_list.get(), m_points, reducedRowOrder,
+                                 rowTarget, reducedColOrder, colTarget);
+    }
+
+    // create pattern
+    paso::Pattern_ptr mainPattern(paso::Pattern::fromIndexListArray(0,
+              myNumRowTargets, index_list.get(), 0, myNumColTargets, 0));
+    paso::Pattern_ptr colCouplePattern(paso::Pattern::fromIndexListArray(0,
+              myNumRowTargets, index_list.get(), myNumColTargets,
+              numColTargets, -myNumColTargets));
+    paso::Pattern_ptr rowCouplePattern(paso::Pattern::fromIndexListArray(
+              myNumRowTargets, numRowTargets, index_list.get(), 0,
+              myNumColTargets, 0));
+
+    paso::SystemMatrixPattern_ptr out(new paso::SystemMatrixPattern(
+                MATRIX_FORMAT_DEFAULT, rowDistribution, colDistribution,
+                mainPattern, colCouplePattern, rowCouplePattern,
+                col_connector, row_connector));
+    return out;
+}
+
+} // namespace finley
+
+#endif // ESYS_HAVE_PASO
+
diff --git a/finley/src/Mesh_getPattern.cpp b/finley/src/Mesh_getPattern.cpp
deleted file mode 100644
index 74e0e07..0000000
--- a/finley/src/Mesh_getPattern.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************
-
-  Finley: Mesh
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Mesh.h"
-#include "IndexList.h"
-#include <boost/scoped_array.hpp>
-
-namespace finley {
-
-/// returns a reference to the matrix pattern
-paso::SystemMatrixPattern_ptr Mesh::getPattern(bool reduce_row_order, bool reduce_col_order)
-{
-    paso::SystemMatrixPattern_ptr out;
-    resetError();
-    /* make sure that the requested pattern is available */
-    if (reduce_row_order) {
-        if (reduce_col_order) {
-            if (ReducedReducedPattern==NULL)
-                ReducedReducedPattern=makePattern(reduce_row_order,reduce_col_order);
-        } else {
-            if (ReducedFullPattern==NULL)
-                ReducedFullPattern=makePattern(reduce_row_order,reduce_col_order);
-        }
-    } else {
-        if (reduce_col_order) {
-            if (FullReducedPattern==NULL)
-                FullReducedPattern=makePattern(reduce_row_order,reduce_col_order);
-        } else {
-            if (FullFullPattern==NULL)
-                FullFullPattern=makePattern(reduce_row_order,reduce_col_order);
-        }
-    }
-    if (noError()) {
-        if (reduce_row_order) {
-            if (reduce_col_order) {
-                out = ReducedReducedPattern;
-            } else {
-                out = ReducedFullPattern;
-            }
-        } else {
-            if (reduce_col_order) {
-                out = FullReducedPattern;
-            } else {
-                out = FullFullPattern;
-            }
-        }
-    }  
-    return out;
-}
-
-paso::SystemMatrixPattern_ptr Mesh::makePattern(bool reduce_row_order, bool reduce_col_order)
-{
-    paso::SystemMatrixPattern_ptr out;
-    paso::Connector_ptr col_connector, row_connector;
-    paso::Distribution_ptr colDistribution, rowDistribution;
-  
-    resetError();
-
-    int myNumColTargets, myNumRowTargets;
-    int numColTargets, numRowTargets;
-    const index_t *colTarget, *rowTarget;
-
-    if (reduce_col_order) {
-        myNumColTargets=Nodes->getNumReducedDegreesOfFreedom();
-        numColTargets=Nodes->reducedDegreesOfFreedomMapping.getNumTargets();
-        colTarget=Nodes->borrowTargetReducedDegreesOfFreedom();
-        colDistribution=Nodes->reducedDegreesOfFreedomDistribution;
-        col_connector=Nodes->reducedDegreesOfFreedomConnector;
-    } else {
-        myNumColTargets=Nodes->getNumDegreesOfFreedom();
-        numColTargets=Nodes->degreesOfFreedomMapping.getNumTargets();
-        colTarget=Nodes->borrowTargetDegreesOfFreedom();
-        colDistribution=Nodes->degreesOfFreedomDistribution;
-        col_connector=Nodes->degreesOfFreedomConnector;
-    }
-
-    if (reduce_row_order) {
-        myNumRowTargets=Nodes->getNumReducedDegreesOfFreedom();
-        numRowTargets=Nodes->reducedDegreesOfFreedomMapping.getNumTargets();
-        rowTarget=Nodes->borrowTargetReducedDegreesOfFreedom();
-        rowDistribution=Nodes->reducedDegreesOfFreedomDistribution;
-        row_connector=Nodes->reducedDegreesOfFreedomConnector;
-    } else {
-        myNumRowTargets=Nodes->getNumDegreesOfFreedom();
-        numRowTargets=Nodes->degreesOfFreedomMapping.getNumTargets();
-        rowTarget=Nodes->borrowTargetDegreesOfFreedom();
-        rowDistribution=Nodes->degreesOfFreedomDistribution;
-        row_connector=Nodes->degreesOfFreedomConnector;
-    }
-    boost::scoped_array<IndexList> index_list(new IndexList[numRowTargets]);
-  
-#pragma omp parallel
-    {
-        // insert contributions from element matrices into columns in indexlist:
-        IndexList_insertElements(index_list.get(), Elements, reduce_row_order,
-                                 rowTarget, reduce_col_order, colTarget);
-        IndexList_insertElements(index_list.get(), FaceElements,
-                                 reduce_row_order, rowTarget, reduce_col_order,
-                                 colTarget);
-        IndexList_insertElements(index_list.get(), ContactElements,
-                                 reduce_row_order, rowTarget, reduce_col_order,
-                                 colTarget);
-        IndexList_insertElements(index_list.get(), Points, reduce_row_order,
-                                 rowTarget, reduce_col_order, colTarget);
-    }
- 
-    /* create pattern */
-    paso::Pattern_ptr main_pattern, col_couple_pattern, row_couple_pattern;
-    main_pattern=paso::Pattern::fromIndexListArray(
-            0, myNumRowTargets, index_list.get(), 0, myNumColTargets, 0);
-    col_couple_pattern=paso::Pattern::fromIndexListArray(
-            0, myNumRowTargets, index_list.get(), myNumColTargets,
-            numColTargets, -myNumColTargets);
-    row_couple_pattern=paso::Pattern::fromIndexListArray(
-            myNumRowTargets, numRowTargets, index_list.get(), 0, myNumColTargets, 0);
-
-    // if everything is in order we can create the return value
-    if (noError()) {
-        out.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-                rowDistribution, colDistribution, main_pattern,
-                col_couple_pattern, row_couple_pattern,
-                col_connector, row_connector));
-    }
-    Esys_MPIInfo_noError(MPIInfo);
-    return out;
-}
-
-} // namespace finley
-
diff --git a/finley/src/Mesh_getTrilinosGraph.cpp b/finley/src/Mesh_getTrilinosGraph.cpp
new file mode 100644
index 0000000..aade055
--- /dev/null
+++ b/finley/src/Mesh_getTrilinosGraph.cpp
@@ -0,0 +1,103 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2003-2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifdef ESYS_HAVE_TRILINOS
+#include "FinleyDomain.h"
+#include "IndexList.h"
+
+#include <boost/scoped_array.hpp>
+
+using namespace esys_trilinos;
+
+namespace finley {
+
+const_TrilinosGraph_ptr FinleyDomain::getTrilinosGraph(bool reducedOrder) const
+{
+    const_TrilinosGraph_ptr out;
+    // make sure that the requested graph is available
+    if (reducedOrder) {
+        if (m_reducedGraph.is_null())
+            m_reducedGraph.reset(createTrilinosGraph(reducedOrder));
+        out = m_reducedGraph;
+    } else {
+        if (m_fullGraph.is_null())
+            m_fullGraph.reset(createTrilinosGraph(reducedOrder));
+        out = m_fullGraph;
+    }
+    return out;
+}
+
+GraphType* FinleyDomain::createTrilinosGraph(bool reducedOrder) const
+{
+    index_t myNumTargets;
+    index_t numTargets;
+    const index_t* target;
+    const_TrilinosMap_ptr rowMap;
+    const_TrilinosMap_ptr colMap;
+    if (reducedOrder) {
+        myNumTargets = m_nodes->getNumReducedDegreesOfFreedom();
+        numTargets = m_nodes->getNumReducedDegreesOfFreedomTargets();
+        target = m_nodes->borrowTargetReducedDegreesOfFreedom();
+        rowMap = m_nodes->trilinosReducedRowMap;
+        colMap = m_nodes->trilinosReducedColMap;
+    } else {
+        myNumTargets = m_nodes->getNumDegreesOfFreedom();
+        numTargets = m_nodes->getNumDegreesOfFreedomTargets();
+        target = m_nodes->borrowTargetDegreesOfFreedom();
+        rowMap = m_nodes->trilinosRowMap;
+        colMap = m_nodes->trilinosColMap;
+    }
+
+    boost::scoped_array<IndexList> indexList(new IndexList[numTargets]);
+
+#pragma omp parallel
+    {
+        // insert contributions from element matrices into columns in
+        // index list
+        IndexList_insertElements(indexList.get(), m_elements, reducedOrder,
+                                 target, reducedOrder, target);
+        IndexList_insertElements(indexList.get(), m_faceElements,
+                                 reducedOrder, target, reducedOrder, target);
+        IndexList_insertElements(indexList.get(), m_contactElements,
+                                 reducedOrder, target, reducedOrder, target);
+        IndexList_insertElements(indexList.get(), m_points, reducedOrder,
+                                 target, reducedOrder, target);
+    }
+
+    Teuchos::ArrayRCP<size_t> rowPtr(myNumTargets + 1);
+    for (size_t i = 0; i < myNumTargets; i++) {
+        rowPtr[i+1] = rowPtr[i] + indexList[i].count(0, numTargets);
+    }
+
+    Teuchos::ArrayRCP<LO> colInd(rowPtr[myNumTargets]);
+
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumTargets; i++) {
+        indexList[i].toArray(&colInd[rowPtr[i]], 0, numTargets, 0);
+        std::sort(&colInd[rowPtr[i]], &colInd[rowPtr[i+1]]);
+    }
+
+    GraphType* graph = new GraphType(rowMap, colMap, rowPtr, colInd);
+    Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    params->set("Optimize Storage", true);
+    graph->fillComplete(rowMap, rowMap, params);
+    return graph;
+}
+
+} // namespace finley
+
+#endif // ESYS_HAVE_TRILINOS
+
diff --git a/finley/src/Mesh_glueFaces.cpp b/finley/src/Mesh_glueFaces.cpp
index 5a24c61..6483723 100644
--- a/finley/src/Mesh_glueFaces.cpp
+++ b/finley/src/Mesh_glueFaces.cpp
@@ -23,102 +23,99 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include "FinleyDomain.h"
 
-#include "Mesh.h"
+#include <escript/index.h>
 
 namespace finley {
 
-void Mesh::glueFaces(double safety_factor, double tolerance, bool optimize)
+void FinleyDomain::glueFaces(double safetyFactor, double tolerance, bool optimize)
 {
-    if (MPIInfo->size > 1) {
-        setError(TYPE_ERROR, "Mesh::glueFaces: MPI is not supported yet.");
-        return;
+    if (m_mpiInfo->size > 1) {
+        throw escript::NotImplementedError("glueFaces: MPI is not supported yet.");
     }
-    if (!FaceElements)
+    if (!m_faceElements)
         return;
 
-    char error_msg[LenErrorMsg_MAX];
-    const_ReferenceElement_ptr faceRefElement(FaceElements->
+    const_ReferenceElement_ptr faceRefElement(m_faceElements->
                         referenceElementSet->borrowReferenceElement(false));
-    const int NNFace=faceRefElement->Type->numNodesOnFace;
-    const int NN=FaceElements->numNodes;
-    const int numDim=Nodes->numDim;
-    const int* faceNodes=faceRefElement->Type->faceNodes;
+    const int NNFace = faceRefElement->Type->numNodesOnFace;
+    const int NN = m_faceElements->numNodes;
+    const int numDim = m_nodes->numDim;
+    const int* faceNodes = faceRefElement->Type->faceNodes;
    
     if (NNFace <= 0) {
-        sprintf(error_msg, "Mesh::glueFaces: glueing faces cannot be applied to face elements of type %s",faceRefElement->Type->Name);
-        setError(TYPE_ERROR, error_msg);
-        return;
+        std::stringstream ss;
+        ss << "Mesh::glueFaces: glueing faces cannot be applied to face "
+            "elements of type " << faceRefElement->Type->Name;
+        throw escript::ValueError(ss.str());
     }
 
     // allocate work arrays
-    int* elem1=new int[FaceElements->numElements];
-    int* elem0=new int[FaceElements->numElements];
-    std::vector<index_t> elem_mask(FaceElements->numElements, 0);
-    int* matching_nodes_in_elem1=new int[FaceElements->numElements*NN];
-    std::vector<index_t> new_node_label(Nodes->numNodes);
+    int* elem1 = new int[m_faceElements->numElements];
+    int* elem0 = new int[m_faceElements->numElements];
+    IndexVector elem_mask(m_faceElements->numElements, 0);
+    int* matching_nodes_in_elem1 = new int[m_faceElements->numElements*NN];
+    IndexVector new_node_label(m_nodes->getNumNodes());
     // find the matching face elements
     int numPairs;
-    findMatchingFaces(safety_factor, tolerance, &numPairs, elem0, elem1,
+    findMatchingFaces(safetyFactor, tolerance, &numPairs, elem0, elem1,
                       matching_nodes_in_elem1);
-    if (noError()) {
-        for (index_t n=0; n<Nodes->numNodes; n++)
-            new_node_label[n]=n;
-        // mark matching face elements to be removed
-        for (int e=0; e<numPairs; e++) {
-            elem_mask[elem0[e]]=1;
-            elem_mask[elem1[e]]=1;
-            for (int i=0; i<NNFace; i++) {
-                const int face_node=faceNodes[i];
-                new_node_label[matching_nodes_in_elem1[INDEX2(face_node,e,NN)]]=FaceElements->Nodes[INDEX2(face_node,elem0[e],NN)];
-            }
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++)
+        new_node_label[n] = n;
+    // mark matching face elements to be removed
+    for (int e = 0; e < numPairs; e++) {
+        elem_mask[elem0[e]] = 1;
+        elem_mask[elem1[e]] = 1;
+        for (int i = 0; i < NNFace; i++) {
+            const int face_node = faceNodes[i];
+            new_node_label[matching_nodes_in_elem1[INDEX2(face_node,e,NN)]] =
+                    m_faceElements->Nodes[INDEX2(face_node,elem0[e],NN)];
         }
-        // create an index of face elements
-        dim_t new_numFaceElements=0;
-        for (index_t e=0; e<FaceElements->numElements; e++) {
-            if (elem_mask[e] < 1) {
-                elem_mask[new_numFaceElements]=e;
-                new_numFaceElements++;
-            }
+    }
+    // create an index of face elements
+    dim_t new_numFaceElements = 0;
+    for (index_t e = 0; e < m_faceElements->numElements; e++) {
+        if (elem_mask[e] < 1) {
+            elem_mask[new_numFaceElements] = e;
+            new_numFaceElements++;
         }
-        // get the new number of nodes
-        std::vector<index_t> new_node_mask(Nodes->numNodes, -1);
-        std::vector<index_t> new_node_list;
-        dim_t newNumNodes=0;
-        for (index_t n=0; n<Nodes->numNodes; n++)
-            new_node_mask[new_node_label[n]]=1;
-        for (index_t n=0; n<Nodes->numNodes; n++) {
-            if (new_node_mask[n]>0) {
-                new_node_mask[n]=newNumNodes;
-                new_node_list.push_back(n);
-                newNumNodes++;
-            }
+    }
+    // get the new number of nodes
+    IndexVector new_node_mask(m_nodes->getNumNodes(), -1);
+    IndexVector new_node_list;
+    dim_t newNumNodes = 0;
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++)
+        new_node_mask[new_node_label[n]] = 1;
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++) {
+        if (new_node_mask[n] > 0) {
+            new_node_mask[n] = newNumNodes;
+            new_node_list.push_back(n);
+            newNumNodes++;
         }
-        for (index_t n=0; n<Nodes->numNodes; n++)
-            new_node_label[n]=new_node_mask[new_node_label[n]];
-        // allocate new node and element files
-        NodeFile *newNodeFile=new NodeFile(numDim, MPIInfo); 
-        newNodeFile->allocTable(newNumNodes);
-        ElementFile *newFaceElementsFile=new ElementFile(
-                FaceElements->referenceElementSet, MPIInfo);
-        newFaceElementsFile->allocTable(new_numFaceElements);
-        // get the new nodes
-        newNodeFile->gather(&new_node_list[0], Nodes);
-        // they are the new nodes
-        delete Nodes;
-        Nodes=newNodeFile;
-        // get the face elements which are still in use
-        newFaceElementsFile->gather(&elem_mask[0], FaceElements);
-        // they are the new face elements
-        delete FaceElements;
-        FaceElements=newFaceElementsFile;
-    
-        // assign new node ids to elements
-        relabelElementNodes(new_node_label, 0);
-        prepare(optimize);
     }
+    for (index_t n = 0; n < m_nodes->getNumNodes(); n++)
+        new_node_label[n] = new_node_mask[new_node_label[n]];
+    // allocate new node and element files
+    NodeFile* newNodeFile = new NodeFile(numDim, m_mpiInfo); 
+    newNodeFile->allocTable(newNumNodes);
+    ElementFile* newFaceElementsFile = new ElementFile(
+            m_faceElements->referenceElementSet, m_mpiInfo);
+    newFaceElementsFile->allocTable(new_numFaceElements);
+    // get the new nodes
+    newNodeFile->gather(&new_node_list[0], m_nodes);
+    // they are the new nodes
+    delete m_nodes;
+    m_nodes = newNodeFile;
+    // get the face elements which are still in use
+    newFaceElementsFile->gather(&elem_mask[0], m_faceElements);
+    // they are the new face elements
+    delete m_faceElements;
+    m_faceElements = newFaceElementsFile;
+
+    // assign new node ids to elements
+    relabelElementNodes(new_node_label, 0);
+    prepare(optimize);
     delete[] elem1;
     delete[] elem0;
     delete[] matching_nodes_in_elem1;
diff --git a/finley/src/Mesh_hex20.cpp b/finley/src/Mesh_hex20.cpp
index 61fa39b..5fe6b93 100644
--- a/finley/src/Mesh_hex20.cpp
+++ b/finley/src/Mesh_hex20.cpp
@@ -19,37 +19,41 @@
 
   Finley: generates rectangular meshes
 
-  Generates a numElements[0] x numElements[1] x numElements[2] mesh with
-  second order elements (Hex20) in the brick
-  [0,Length[0]] x [0,Length[1]] x [0,Length[2]].
+  Generates a NE0 x NE1 x NE2 mesh with second order elements (Hex20) in the
+  brick [0,l0] x [0,l1] x [0,l2].
   order is the desired accuracy of the integration scheme.
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
+#include "FinleyDomain.h"
 
-#include "RectangularMesh.h"
+#include <escript/index.h>
+
+#define MAX3(_arg1_,_arg2_,_arg3_) std::max(_arg1_,std::max(_arg2_,_arg3_))
+
+using escript::DataTypes::real_t;
 
 namespace finley {
 
-Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
-                            const bool* periodic, int order, int reduced_order,
+escript::Domain_ptr FinleyDomain::createHex20(dim_t NE0, dim_t NE1, dim_t NE2,
+                            double l0, double l1, double l2,
+                            bool periodic0, bool periodic1, bool periodic2,
+                            int order, int reduced_order,
                             bool useElementsOnFace, bool useFullElementOrder,
                             bool useMacroElements, bool optimize,
-                            esysUtils::JMPI& mpiInfo)
+                            escript::JMPI mpiInfo)
 {
     const int N_PER_E = 2;
     const int DIM = 3;
     dim_t Nstride0=0, Nstride1=0, Nstride2=0, local_NE0, local_NE1, local_NE2;
     index_t e_offset0, e_offset1, e_offset2;
 
-    const Esys_MPI_rank myRank = mpiInfo->rank;
+    const int myRank = mpiInfo->rank;
 
     // set up the global dimensions of the mesh
-    const dim_t NE0 = std::max(dim_t(1),numElements[0]);
-    const dim_t NE1 = std::max(dim_t(1),numElements[1]);
-    const dim_t NE2 = std::max(dim_t(1),numElements[2]);
+    NE0 = std::max(dim_t(1), NE0);
+    NE1 = std::max(dim_t(1), NE1);
+    NE2 = std::max(dim_t(1), NE2);
     const dim_t N0 = N_PER_E*NE0+1;
     const dim_t N1 = N_PER_E*NE1+1;
     const dim_t N2 = N_PER_E*NE2+1;
@@ -57,10 +61,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
     // allocate mesh
     std::stringstream name;
     name << "Brick " << N0 << " x " << N1 << " x " << N2;
-    Mesh* out = new Mesh(name.str(), DIM, mpiInfo);
+    FinleyDomain* out = new FinleyDomain(name.str(), DIM, mpiInfo);
 
     const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
-    bool generateAllNodes=(useFullElementOrder || useMacroElements);
+    bool generateAllNodes = useFullElementOrder || useMacroElements;
 
     if (generateAllNodes) {
         if (useMacroElements) {
@@ -69,9 +73,7 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
             refElements.reset(new ReferenceElementSet(Hex27, order, reduced_order));
         }
         if (useElementsOnFace) {
-            setError(SYSTEM_ERROR, "rich elements for Hex27 elements are not supported.");
-            delete out;
-            return NULL;
+            throw escript::NotImplementedError("rich elements for Hex27 elements are not supported.");
         } else {
             if (useMacroElements) {
                 refFaceElements.reset(new ReferenceElementSet(Rec9Macro, order, reduced_order));
@@ -92,10 +94,14 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
     }
     refPoints.reset(new ReferenceElementSet(Point1, order, reduced_order));
 
-    out->setPoints(new ElementFile(refPoints, mpiInfo));
-    out->setContactElements(new ElementFile(refContactElements, mpiInfo));
-    out->setFaceElements(new ElementFile(refFaceElements, mpiInfo));
-    out->setElements(new ElementFile(refElements, mpiInfo));
+    ElementFile* points = new ElementFile(refPoints, mpiInfo);
+    out->setPoints(points);
+    ElementFile* contacts  = new ElementFile(refContactElements, mpiInfo);
+    out->setContactElements(contacts);
+    ElementFile* faces  = new ElementFile(refFaceElements, mpiInfo);
+    out->setFaceElements(faces);
+    ElementFile* elements  = new ElementFile(refElements, mpiInfo);
+    out->setElements(elements);
 
     // work out the largest dimension
     if (N2==MAX3(N0,N1,N2)) {
@@ -136,7 +142,7 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
 
     // get the number of surface elements
     dim_t NFaceElements = 0;
-    if (!periodic[2] && local_NE2>0) {
+    if (!periodic2 && local_NE2 > 0) {
         NDOF2=N2;
         if (offset2==0) NFaceElements+=local_NE1*local_NE0;
         if (local_NE2+e_offset2 == NE2) NFaceElements+=local_NE1*local_NE0;
@@ -144,14 +150,14 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
         NDOF2=N2-1;
     }
 
-    if (!periodic[0] && local_NE0>0) {
+    if (!periodic0 && local_NE0 > 0) {
         NDOF0=N0;
         if (e_offset0 == 0) NFaceElements+=local_NE1*local_NE2;
         if (local_NE0+e_offset0 == NE0) NFaceElements+=local_NE1*local_NE2;
     } else {
         NDOF0=N0-1;
     }
-    if (!periodic[1] && local_NE1>0) {
+    if (!periodic1 && local_NE1 > 0) {
         NDOF1=N1;
         if (e_offset1 == 0) NFaceElements+=local_NE0*local_NE2;
         if (local_NE1+e_offset1 == NE1) NFaceElements+=local_NE0*local_NE2;
@@ -160,9 +166,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
     }
 
     // allocate tables
-    out->Nodes->allocTable(local_N0*local_N1*local_N2);
-    out->Elements->allocTable(local_NE0*local_NE1*local_NE2);
-    out->FaceElements->allocTable(NFaceElements);
+    NodeFile* nodes = out->getNodes();
+    nodes->allocTable(local_N0*local_N1*local_N2);
+    elements->allocTable(local_NE0*local_NE1*local_NE2);
+    faces->allocTable(NFaceElements);
 
     // create nodes
 #pragma omp parallel for
@@ -173,21 +180,21 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                 const index_t global_i0 = i0+offset0;
                 const index_t global_i1 = i1+offset1;
                 const index_t global_i2 = i2+offset2;
-                out->Nodes->Coordinates[INDEX2(0,k,DIM)]=DBLE(global_i0)/DBLE(N0-1)*Length[0];
-                out->Nodes->Coordinates[INDEX2(1,k,DIM)]=DBLE(global_i1)/DBLE(N1-1)*Length[1];
-                out->Nodes->Coordinates[INDEX2(2,k,DIM)]=DBLE(global_i2)/DBLE(N2-1)*Length[2];
-                out->Nodes->Id[k]=Nstride0*global_i0+Nstride1*global_i1+Nstride2*global_i2;
-                out->Nodes->Tag[k]=0;
-                out->Nodes->globalDegreesOfFreedom[k]=Nstride0*(global_i0%NDOF0)
-                                                +Nstride1*(global_i1%NDOF1)
-                                                +Nstride2*(global_i2%NDOF2);
+                nodes->Coordinates[INDEX2(0,k,DIM)] = (real_t)global_i0/(real_t)(N0-1)*l0;
+                nodes->Coordinates[INDEX2(1,k,DIM)] = (real_t)global_i1/(real_t)(N1-1)*l1;
+                nodes->Coordinates[INDEX2(2,k,DIM)] = (real_t)global_i2/(real_t)(N2-1)*l2;
+                nodes->Id[k] = Nstride0*global_i0+Nstride1*global_i1+Nstride2*global_i2;
+                nodes->Tag[k] = 0;
+                nodes->globalDegreesOfFreedom[k] = Nstride0*(global_i0%NDOF0)
+                                                 + Nstride1*(global_i1%NDOF1)
+                                                 + Nstride2*(global_i2%NDOF2);
             }
         }
     }
 
     // set the elements
-    dim_t NN = out->Elements->numNodes;
-    index_t* eNodes = out->Elements->Nodes;
+    dim_t NN = elements->numNodes;
+    index_t* eNodes = elements->Nodes;
 #pragma omp parallel for
     for (index_t i2=0; i2<local_NE2; i2++) {
         for (index_t i1=0; i1<local_NE1; i1++) {
@@ -197,11 +204,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                                     + Nstride1*N_PER_E*(i1+e_offset1)
                                     + Nstride2*N_PER_E*(i2+e_offset2);
 
-                out->Elements->Id[k] = (i0+e_offset0)
-                                     + NE0*(i1+e_offset1)
-                                     + NE0*NE1*(i2+e_offset2);
-                out->Elements->Tag[k]=0;
-                out->Elements->Owner[k]=myRank;
+                elements->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                 + NE0*NE1*(i2+e_offset2);
+                elements->Tag[k] = 0;
+                elements->Owner[k] = myRank;
 
                 eNodes[INDEX2(0,k,NN)] =node0;
                 eNodes[INDEX2(1,k,NN)] =node0+                      2*Nstride0;
@@ -237,13 +243,13 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
     }
 
     // face elements
-    NN=out->FaceElements->numNodes;
+    NN = faces->numNodes;
     dim_t totalNECount=NE0*NE1*NE2;
     dim_t faceNECount = 0;
-    eNodes = out->FaceElements->Nodes;
+    eNodes = faces->Nodes;
 
     // these are the quadrilateral elements on boundary 1 (x3=0):
-    if (!periodic[2] && local_NE2>0) {
+    if (!periodic2 && local_NE2 > 0) {
         // **  elements on boundary 100 (x3=0):
         if (offset2==0) {
 #pragma omp parallel for
@@ -253,10 +259,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride1*N_PER_E*(i1+e_offset1);
 
-                    out->FaceElements->Id[k] = (i0+e_offset0)
-                                             + NE0*(i1+e_offset1)+totalNECount;
-                    out->FaceElements->Tag[k]=100;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                  + totalNECount;
+                    faces->Tag[k] = 100;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)] =node0;
@@ -308,10 +314,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                                         + Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(NE2-1);
 
-                    out->FaceElements->Id[k] = (i0+e_offset0)
-                                             + NE0*(i1+e_offset1)+totalNECount;
-                    out->FaceElements->Tag[k]=200;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                  + totalNECount;
+                    faces->Tag[k] = 200;
+                    faces->Owner[k] = myRank;
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)] =node0+2*Nstride2;
                         eNodes[INDEX2(1,k,NN)] =node0+2*Nstride2+           2*Nstride0;
@@ -355,9 +361,9 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
             faceNECount+=local_NE1*local_NE0;
         }
         totalNECount+=NE1*NE0;
-    } // !periodic[2] && local_NE2>0
+    } // !periodic2 && local_NE2 > 0
 
-    if (!periodic[0] && local_NE0>0) {
+    if (!periodic0 && local_NE0 > 0) {
         // **  elements on boundary 001 (x1=0):
         if (e_offset0 == 0) {
 #pragma omp parallel for
@@ -366,10 +372,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                     const dim_t k = i1+local_NE1*i2+faceNECount;
                     const index_t node0 = Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
-                    out->FaceElements->Id[k] = (i1+e_offset1)
-                                             + NE1*(i2+e_offset2)+totalNECount;
-                    out->FaceElements->Tag[k]=1;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i1+e_offset1) + NE1*(i2+e_offset2)
+                                                  + totalNECount;
+                    faces->Tag[k] = 1;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)] =node0;
@@ -424,10 +430,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                     const index_t node0 = Nstride0*N_PER_E*(NE0-1)
                                         + Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
-                    out->FaceElements->Id[k] = (i1+e_offset1)
-                                             + NE1*(i2+e_offset2)+totalNECount;
-                    out->FaceElements->Tag[k]=2;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i1+e_offset1) + NE1*(i2+e_offset2)
+                                                  + totalNECount;
+                    faces->Tag[k] = 2;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)]=node0+                      2*Nstride0;
@@ -472,9 +478,9 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
             faceNECount+=local_NE1*local_NE2;
         }
         totalNECount+=NE1*NE2;
-    } // !periodic[0] && local_NE0>0
+    } // !periodic0 && local_NE0 > 0
 
-    if (!periodic[1] && local_NE1>0) {
+    if (!periodic1 && local_NE1 > 0) {
         // **  elements on boundary 010 (x2=0):
         if (e_offset1 == 0) {
 #pragma omp parallel for
@@ -484,10 +490,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
 
-                    out->FaceElements->Id[k] = (i2+e_offset2)
-                                             + NE2*(e_offset0+i0)+totalNECount;
-                    out->FaceElements->Tag[k]=10;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i2+e_offset2) + NE2*(e_offset0+i0)
+                                                  + totalNECount;
+                    faces->Tag[k] = 10;
+                    faces->Owner[k] = myRank;
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)]=node0;
                         eNodes[INDEX2(1,k,NN)]=node0+                      2*Nstride0;
@@ -542,10 +548,10 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
                                         + Nstride1*N_PER_E*(NE1-1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
 
-                    out->FaceElements->Id[k] = (i2+e_offset2)
-                                             + NE2*(i0+e_offset0)+totalNECount;
-                    out->FaceElements->Tag[k]=20;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i2+e_offset2) + NE2*(i0+e_offset0)
+                                                  + totalNECount;
+                    faces->Tag[k] = 20;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
                         eNodes[INDEX2(0,k,NN)]=node0+           2*Nstride1;
@@ -593,24 +599,17 @@ Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* Length,
     }
 
     // add tag names
-    out->addTagMap("top", 200);
-    out->addTagMap("bottom", 100);
-    out->addTagMap("left", 1);
-    out->addTagMap("right", 2);
-    out->addTagMap("front", 10);
-    out->addTagMap("back", 20);
+    out->setTagMap("top", 200);
+    out->setTagMap("bottom", 100);
+    out->setTagMap("left", 1);
+    out->setTagMap("right", 2);
+    out->setTagMap("front", 10);
+    out->setTagMap("back", 20);
 
     // prepare mesh for further calculations
     out->resolveNodeIds();
-    if (noError()) {
-        out->prepare(optimize);
-    }
-
-    if (!noError()) {
-        delete out;
-        out=NULL;
-    }
-    return out;
+    out->prepare(optimize);
+    return out->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_hex8.cpp b/finley/src/Mesh_hex8.cpp
index a5cbc9c..73a758b 100644
--- a/finley/src/Mesh_hex8.cpp
+++ b/finley/src/Mesh_hex8.cpp
@@ -26,29 +26,34 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
+#include "FinleyDomain.h"
 
-#include "RectangularMesh.h"
+#include <escript/index.h>
+
+#define MAX3(_arg1_,_arg2_,_arg3_) std::max(_arg1_,std::max(_arg2_,_arg3_))
+
+using escript::DataTypes::real_t;
 
 namespace finley {
 
-Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
-                           const bool* periodic, int order, int reduced_order,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool optimize, esysUtils::JMPI& mpiInfo)
+escript::Domain_ptr FinleyDomain::createHex8(dim_t NE0, dim_t NE1, dim_t NE2,
+                            double l0, double l1, double l2,
+                            bool periodic0, bool periodic1, bool periodic2,
+                            int order, int reduced_order,
+                            bool useElementsOnFace,
+                            bool optimize, escript::JMPI mpiInfo)
 {
     const int N_PER_E = 1;
     const int DIM = 3;
     dim_t Nstride0=0, Nstride1=0, Nstride2=0, local_NE0, local_NE1, local_NE2;
     index_t e_offset0, e_offset1, e_offset2;
 
-    const Esys_MPI_rank myRank = mpiInfo->rank;
+    const int myRank = mpiInfo->rank;
 
     // set up the global dimensions of the mesh
-    const dim_t NE0 = std::max(dim_t(1),numElements[0]);
-    const dim_t NE1 = std::max(dim_t(1),numElements[1]);
-    const dim_t NE2 = std::max(dim_t(1),numElements[2]);
+    NE0 = std::max(dim_t(1), NE0);
+    NE1 = std::max(dim_t(1), NE1);
+    NE2 = std::max(dim_t(1), NE2);
     const dim_t N0 = N_PER_E*NE0+1;
     const dim_t N1 = N_PER_E*NE1+1;
     const dim_t N2 = N_PER_E*NE2+1;
@@ -56,7 +61,7 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
     // allocate mesh
     std::stringstream name;
     name << "Rectangular " << N0 << " x " << N1 << " x " << N2 << " mesh";
-    Mesh* out = new Mesh(name.str(), DIM, mpiInfo);
+    FinleyDomain* out = new FinleyDomain(name.str(), DIM, mpiInfo);
 
     const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
     if (useElementsOnFace) {
@@ -71,8 +76,10 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
 
     out->setPoints(new ElementFile(refPoints, mpiInfo));
     out->setContactElements(new ElementFile(refContactElements, mpiInfo));
-    out->setFaceElements(new ElementFile(refFaceElements, mpiInfo));
-    out->setElements(new ElementFile(refElements, mpiInfo));
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    out->setFaceElements(faces);
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    out->setElements(elements);
 
     // work out the largest dimension
     if (N2==MAX3(N0,N1,N2)) {
@@ -113,7 +120,7 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
 
     // get the number of surface elements
     dim_t NFaceElements = 0;
-    if (!periodic[2] && local_NE2>0) {
+    if (!periodic2 && local_NE2 > 0) {
         NDOF2=N2;
         if (offset2==0)
             NFaceElements+=local_NE1*local_NE0;
@@ -123,7 +130,7 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
         NDOF2=N2-1;
     }
 
-    if (!periodic[0] && local_NE0>0) {
+    if (!periodic0 && local_NE0 > 0) {
         NDOF0=N0;
         if (e_offset0 == 0)
             NFaceElements+=local_NE1*local_NE2;
@@ -132,7 +139,7 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
     } else {
         NDOF0=N0-1;
     }
-    if (!periodic[1] && local_NE1>0) {
+    if (!periodic1 && local_NE1 > 0) {
         NDOF1=N1;
         if (e_offset1 == 0)
             NFaceElements+=local_NE0*local_NE2;
@@ -143,9 +150,10 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
     }
 
     // allocate tables
-    out->Nodes->allocTable(local_N0*local_N1*local_N2);
-    out->Elements->allocTable(local_NE0*local_NE1*local_NE2);
-    out->FaceElements->allocTable(NFaceElements);
+    NodeFile* nodes = out->getNodes();
+    nodes->allocTable(local_N0*local_N1*local_N2);
+    elements->allocTable(local_NE0*local_NE1*local_NE2);
+    faces->allocTable(NFaceElements);
 
     // create nodes
 #pragma omp parallel for
@@ -156,293 +164,285 @@ Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* Length,
                 const index_t global_i0 = i0+offset0;
                 const index_t global_i1 = i1+offset1;
                 const index_t global_i2 = i2+offset2;
-                out->Nodes->Coordinates[INDEX2(0,k,DIM)]=DBLE(global_i0)/DBLE(N0-1)*Length[0];
-                out->Nodes->Coordinates[INDEX2(1,k,DIM)]=DBLE(global_i1)/DBLE(N1-1)*Length[1];
-                out->Nodes->Coordinates[INDEX2(2,k,DIM)]=DBLE(global_i2)/DBLE(N2-1)*Length[2];
-                out->Nodes->Id[k]=Nstride0*global_i0+Nstride1*global_i1+Nstride2*global_i2;
-                out->Nodes->Tag[k]=0;
-                out->Nodes->globalDegreesOfFreedom[k]=Nstride0*(global_i0%NDOF0)
-                                                +Nstride1*(global_i1%NDOF1)
-                                                +Nstride2*(global_i2%NDOF2);
+                nodes->Coordinates[INDEX2(0,k,DIM)]=(real_t)global_i0/(real_t)(N0-1)*l0;
+                nodes->Coordinates[INDEX2(1,k,DIM)]=(real_t)global_i1/(real_t)(N1-1)*l1;
+                nodes->Coordinates[INDEX2(2,k,DIM)]=(real_t)global_i2/(real_t)(N2-1)*l2;
+                nodes->Id[k] = Nstride0*global_i0+Nstride1*global_i1+Nstride2*global_i2;
+                nodes->Tag[k] = 0;
+                nodes->globalDegreesOfFreedom[k] = Nstride0*(global_i0%NDOF0)
+                                                 + Nstride1*(global_i1%NDOF1)
+                                                 + Nstride2*(global_i2%NDOF2);
             }
         }
     }
 
     // set the elements
-    dim_t NN = out->Elements->numNodes;
-    index_t* eNodes = out->Elements->Nodes;
+    dim_t NN = elements->numNodes;
+    index_t* eNodes = elements->Nodes;
 #pragma omp parallel for
-    for (index_t i2=0; i2<local_NE2; i2++) {
-        for (index_t i1=0; i1<local_NE1; i1++) {
-            for (index_t i0=0; i0<local_NE0; i0++) {
+    for (index_t i2 = 0; i2 < local_NE2; i2++) {
+        for (index_t i1 = 0; i1 < local_NE1; i1++) {
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
                 const dim_t k = i0+local_NE0*i1+local_NE0*local_NE1*i2;
                 const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                     + Nstride1*N_PER_E*(i1+e_offset1)
                                     + Nstride2*N_PER_E*(i2+e_offset2);
 
-                out->Elements->Id[k] = (i0+e_offset0)
-                                     + NE0*(i1+e_offset1)
-                                     + NE0*NE1*(i2+e_offset2);
-                out->Elements->Tag[k]=0;
-                out->Elements->Owner[k]=myRank;
-
-                eNodes[INDEX2(0,k,NN)]=node0;
-                eNodes[INDEX2(1,k,NN)]=node0+Nstride0;
-                eNodes[INDEX2(2,k,NN)]=node0+Nstride1+Nstride0;
-                eNodes[INDEX2(3,k,NN)]=node0+Nstride1;
-                eNodes[INDEX2(4,k,NN)]=node0+Nstride2;
-                eNodes[INDEX2(5,k,NN)]=node0+Nstride2+Nstride0;
-                eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                eNodes[INDEX2(7,k,NN)]=node0+Nstride2+Nstride1;
+                elements->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                 + NE0*NE1*(i2+e_offset2);
+                elements->Tag[k] = 0;
+                elements->Owner[k] = myRank;
+
+                eNodes[INDEX2(0,k,NN)] = node0;
+                eNodes[INDEX2(1,k,NN)] = node0+Nstride0;
+                eNodes[INDEX2(2,k,NN)] = node0+Nstride1+Nstride0;
+                eNodes[INDEX2(3,k,NN)] = node0+Nstride1;
+                eNodes[INDEX2(4,k,NN)] = node0+Nstride2;
+                eNodes[INDEX2(5,k,NN)] = node0+Nstride2+Nstride0;
+                eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                eNodes[INDEX2(7,k,NN)] = node0+Nstride2+Nstride1;
             }
         }
     }
 
     // face elements
-    NN=out->FaceElements->numNodes;
-    dim_t totalNECount=NE0*NE1*NE2;
+    NN = faces->numNodes;
+    dim_t totalNECount = NE0*NE1*NE2;
     dim_t faceNECount = 0;
-    eNodes = out->FaceElements->Nodes;
+    eNodes = faces->Nodes;
 
     // these are the quadrilateral elements on boundary 1 (x3=0):
-    if (!periodic[2] && local_NE2>0) {
+    if (!periodic2 && local_NE2 > 0) {
         // **  elements on boundary 100 (x3=0):
-        if (e_offset2==0) {
+        if (e_offset2 == 0) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                for (index_t i0=0; i0<local_NE0; i0++) {
-                    const dim_t k = i0+local_NE0*i1+faceNECount;
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                for (index_t i0=0; i0 < local_NE0; i0++) {
+                    const dim_t k = i0 + local_NE0*i1 + faceNECount;
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride1*N_PER_E*(i1+e_offset1);
 
-                    out->FaceElements->Id[k] = (i0+e_offset0)
-                                             + NE0*(i1+e_offset1)+totalNECount;
-                    out->FaceElements->Tag[k]=100;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                  + totalNECount;
+                    faces->Tag[k] = 100;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(4,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(7,k,NN)]=node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(4,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(7,k,NN)] = node0+Nstride2+Nstride0;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride0;
                     }
                 }
             }
-            faceNECount+=local_NE1*local_NE0;
+            faceNECount += local_NE1*local_NE0;
         }
-        totalNECount+=NE1*NE0;
+        totalNECount += NE1*NE0;
 
         // **  elements on boundary 200 (x3=1):
         if (local_NE2+e_offset2 == NE2) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                for (index_t i0=0; i0<local_NE0; i0++) {
-                    const dim_t k = i0+local_NE0*i1+faceNECount;
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const dim_t k = i0 + local_NE0*i1 + faceNECount;
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(NE2-1);
 
-                    out->FaceElements->Id[k] = (i0+e_offset0)
-                                             + NE0*(i1+e_offset1)+totalNECount;
-                    out->FaceElements->Tag[k]=200;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1)
+                                                  + totalNECount;
+                    faces->Tag[k] = 200;
+                    faces->Owner[k] = myRank;
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2+         Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2+Nstride1;
-
-                        eNodes[INDEX2(4,k,NN)]=node0;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(6,k,NN)]=node0+         Nstride1+Nstride0;
-                        eNodes[INDEX2(7,k,NN)]=node0+         Nstride1;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2+         Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2+Nstride1;
+
+                        eNodes[INDEX2(4,k,NN)] = node0;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(6,k,NN)] = node0+         Nstride1+Nstride0;
+                        eNodes[INDEX2(7,k,NN)] = node0+         Nstride1;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2         +Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2         +Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2+Nstride1;
                     }
                 }
             }
-            faceNECount+=local_NE1*local_NE0;
+            faceNECount += local_NE1*local_NE0;
         }
-        totalNECount+=NE1*NE0;
-    } // !periodic[2] && local_NE2>0
+        totalNECount += NE1*NE0;
+    } // !periodic2 && local_NE2 > 0
 
-    if (!periodic[0] && local_NE0>0) {
+    if (!periodic0 && local_NE0 > 0) {
         // **  elements on boundary 001 (x1=0):
         if (e_offset0 == 0) {
 #pragma omp parallel for
-            for (index_t i2=0; i2<local_NE2; i2++) {
-                for (index_t i1=0; i1<local_NE1; i1++) {
-                    const dim_t k = i1+local_NE1*i2+faceNECount;
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                    const dim_t k = i1 + local_NE1*i2 + faceNECount;
                     const index_t node0 = Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
-                    out->FaceElements->Id[k] = (i1+e_offset1)
-                                             + NE1*(i2+e_offset2)+totalNECount;
-                    out->FaceElements->Tag[k]=1;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i1+e_offset1) + NE1*(i2+e_offset2)
+                                                  + totalNECount;
+                    faces->Tag[k] = 1;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(4,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride2+Nstride0;
-                        eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(7,k,NN)]=node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(4,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(7,k,NN)] = node0+Nstride1+Nstride0;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride1;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride1;
                     }
                 }
             }
-            faceNECount+=local_NE1*local_NE2;
+            faceNECount += local_NE1*local_NE2;
         }
-        totalNECount+=NE1*NE2;
+        totalNECount += NE1*NE2;
 
         // **  elements on boundary 002 (x1=1):
         if (local_NE0+e_offset0 == NE0) {
 #pragma omp parallel for
-            for (index_t i2=0; i2<local_NE2; i2++) {
-                for (index_t i1=0; i1<local_NE1; i1++) {
-                    const dim_t k = i1+local_NE1*i2+faceNECount;
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                    const dim_t k = i1 + local_NE1*i2 + faceNECount;
                     const index_t node0 = Nstride0*N_PER_E*(NE0-1)
                                         + Nstride1*N_PER_E*(i1+e_offset1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
-                    out->FaceElements->Id[k] = (i1+e_offset1)
-                                             + NE1*(i2+e_offset2)+totalNECount;
-                    out->FaceElements->Tag[k]=2;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i1+e_offset1) + NE1*(i2+e_offset2)
+                                                  + totalNECount;
+                    faces->Tag[k] = 2;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride1+Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2+Nstride0;
-
-                        eNodes[INDEX2(4,k,NN)]=node0;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(7,k,NN)]=node0+Nstride2;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2+Nstride0;
+
+                        eNodes[INDEX2(4,k,NN)] = node0;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(7,k,NN)] = node0+Nstride2;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride1+Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2+Nstride0;
                     }
                 }
             }
-            faceNECount+=local_NE1*local_NE2;
+            faceNECount += local_NE1*local_NE2;
         }
-        totalNECount+=NE1*NE2;
-    } // !periodic[0] && local_NE0>0
+        totalNECount += NE1*NE2;
+    } // !periodic0 && local_NE0 > 0
 
-    if (!periodic[1] && local_NE1>0) {
+    if (!periodic1 && local_NE1 > 0) {
         // **  elements on boundary 010 (x2=0):
         if (e_offset1 == 0) {
 #pragma omp parallel for
-            for (index_t i2=0; i2<local_NE2; i2++) {
-                for (index_t i0=0; i0<local_NE0; i0++) {
-                    const dim_t k = i0+local_NE0*i2+faceNECount;
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const dim_t k = i0 + local_NE0*i2 + faceNECount;
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
 
-                    out->FaceElements->Id[k] = (i2+e_offset2)
-                                             + NE2*(e_offset0+i0)+totalNECount;
-                    out->FaceElements->Tag[k]=10;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i2+e_offset2) + NE2*(e_offset0+i0)
+                                                  + totalNECount;
+                    faces->Tag[k] = 10;
+                    faces->Owner[k] = myRank;
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2;
-
-                        eNodes[INDEX2(4,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride1+Nstride0;
-                        eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(7,k,NN)]=node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2;
+
+                        eNodes[INDEX2(4,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(7,k,NN)] = node0+Nstride2+Nstride1;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride0;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride2;
+                        eNodes[INDEX2(0,k,NN)] = node0;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride0;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride2;
                     }
                 }
             }
-            faceNECount+=local_NE0*local_NE2;
+            faceNECount += local_NE0*local_NE2;
         }
-        totalNECount+=NE0*NE2;
+        totalNECount += NE0*NE2;
 
         // **  elements on boundary 020 (x2=1):
         if (local_NE1+e_offset1 == NE1) {
 #pragma omp parallel for
-            for (index_t i2=0; i2<local_NE2; i2++) {
-                for (index_t i0=0; i0<local_NE0; i0++) {
-                    const dim_t k = i0+local_NE0*i2+faceNECount;
+            for (index_t i2 = 0; i2 < local_NE2; i2++) {
+                for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                    const dim_t k = i0 + local_NE0*i2 + faceNECount;
                     const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
                                         + Nstride1*N_PER_E*(NE1-1)
                                         + Nstride2*N_PER_E*(i2+e_offset2);
 
-                    out->FaceElements->Id[k] = (i2+e_offset2)
-                                             + NE2*(i0+e_offset0)+totalNECount;
-                    out->FaceElements->Tag[k]=20;
-                    out->FaceElements->Owner[k]=myRank;
+                    faces->Id[k] = (i2+e_offset2) + NE2*(i0+e_offset0)
+                                                  + totalNECount;
+                    faces->Tag[k] = 20;
+                    faces->Owner[k] = myRank;
 
                     if (useElementsOnFace) {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride1+Nstride0;
-
-                        eNodes[INDEX2(4,k,NN)]=node0;
-                        eNodes[INDEX2(5,k,NN)]=node0+Nstride2;
-                        eNodes[INDEX2(6,k,NN)]=node0+Nstride2+Nstride0;
-                        eNodes[INDEX2(7,k,NN)]=node0+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride1+Nstride0;
+
+                        eNodes[INDEX2(4,k,NN)] = node0;
+                        eNodes[INDEX2(5,k,NN)] = node0+Nstride2;
+                        eNodes[INDEX2(6,k,NN)] = node0+Nstride2+Nstride0;
+                        eNodes[INDEX2(7,k,NN)] = node0+Nstride0;
                     } else {
-                        eNodes[INDEX2(0,k,NN)]=node0+Nstride1;
-                        eNodes[INDEX2(1,k,NN)]=node0+Nstride2+Nstride1;
-                        eNodes[INDEX2(2,k,NN)]=node0+Nstride2+Nstride1+Nstride0;
-                        eNodes[INDEX2(3,k,NN)]=node0+Nstride1+Nstride0;
+                        eNodes[INDEX2(0,k,NN)] = node0+Nstride1;
+                        eNodes[INDEX2(1,k,NN)] = node0+Nstride2+Nstride1;
+                        eNodes[INDEX2(2,k,NN)] = node0+Nstride2+Nstride1+Nstride0;
+                        eNodes[INDEX2(3,k,NN)] = node0+Nstride1+Nstride0;
                     }
                 }
             }
-            faceNECount+=local_NE0*local_NE2;
+            faceNECount += local_NE0*local_NE2;
         }
-        totalNECount+=NE0*NE2;
+        totalNECount += NE0*NE2;
     }
 
     // add tag names
-    out->addTagMap("top", 200);
-    out->addTagMap("bottom", 100);
-    out->addTagMap("left", 1);
-    out->addTagMap("right", 2);
-    out->addTagMap("front", 10);
-    out->addTagMap("back", 20);
+    out->setTagMap("top", 200);
+    out->setTagMap("bottom", 100);
+    out->setTagMap("left", 1);
+    out->setTagMap("right", 2);
+    out->setTagMap("front", 10);
+    out->setTagMap("back", 20);
 
     // prepare mesh for further calculations
     out->resolveNodeIds();
-    if (noError()) {
-        out->prepare(optimize);
-    }
-
-    if (!noError()) {
-        delete out;
-        out=NULL;
-    }
-    return out;
+    out->prepare(optimize);
+    return out->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_joinFaces.cpp b/finley/src/Mesh_joinFaces.cpp
index 3d0f53b..effb5f3 100644
--- a/finley/src/Mesh_joinFaces.cpp
+++ b/finley/src/Mesh_joinFaces.cpp
@@ -23,107 +23,108 @@
 
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include "FinleyDomain.h"
 
-
-#include "Mesh.h"
+#include <escript/index.h>
 
 namespace finley {
 
-void Mesh::joinFaces(double safety_factor, double tolerance, bool optimize)
+void FinleyDomain::joinFaces(double safety_factor, double tolerance, bool optimize)
 {
-    char error_msg[LenErrorMsg_MAX];
-
-    if (MPIInfo->size>1) {
-        setError(TYPE_ERROR, "Mesh::joinFaces: MPI is not supported yet.");
-        return;
+    if (m_mpiInfo->size > 1) {
+        throw escript::NotImplementedError("Mesh::joinFaces: MPI is not supported yet.");
     }
-    if (ContactElements==NULL) {
-        setError(TYPE_ERROR, "Mesh::joinFaces: no contact element file present.");
-        return;
+    if (!m_contactElements) {
+        throw escript::ValueError("Mesh::joinFaces: No contact elements present.");
     }
-    if (!FaceElements)
+    if (!m_faceElements)
         return;
 
-    const_ReferenceElement_ptr faceRefElement(FaceElements->referenceElementSet->borrowReferenceElement(false));
-    const_ReferenceElement_ptr contactRefElement(ContactElements->referenceElementSet->borrowReferenceElement(false));
+    const_ReferenceElement_ptr faceRefElement(m_faceElements->referenceElementSet->borrowReferenceElement(false));
+    const_ReferenceElement_ptr contactRefElement(m_contactElements->referenceElementSet->borrowReferenceElement(false));
 
     if (faceRefElement->Type->numNodesOnFace <= 0) {
-        sprintf(error_msg,"Mesh_joinFaces: joining faces cannot be applied to face elements of type %s",faceRefElement->Type->Name);
-        setError(TYPE_ERROR,error_msg);
-        return;
+        std::stringstream ss;
+        ss << "Mesh::joinFaces: joining faces cannot be applied to face "
+            "elements of type " << faceRefElement->Type->Name;
+        throw escript::ValueError(ss.str());
     }
 
     if (contactRefElement->Type->numNodes != 2*faceRefElement->Type->numNodes) {
-        sprintf(error_msg,"Mesh_joinFaces: contact element file for %s needs to hold elements created from face elements %s", contactRefElement->Type->Name,faceRefElement->Type->Name);
-        setError(TYPE_ERROR,error_msg);
-        return;
+        std::stringstream ss;
+        ss << "Mesh::joinFaces: contact element file for "
+            << contactRefElement->Type->Name << " needs to hold elements "
+            "created from face elements " << faceRefElement->Type->Name;
+        throw escript::ValueError(ss.str());
     }
 
-    const int NN=FaceElements->numNodes;
-    const int NN_Contact=ContactElements->numNodes;
+    const int NN = m_faceElements->numNodes;
+    const int NN_Contact = m_contactElements->numNodes;
 
     // allocate work arrays
-    int* elem1=new int[FaceElements->numElements];
-    int* elem0=new int[FaceElements->numElements];
-    index_t* elem_mask=new index_t[FaceElements->numElements];
-    int* matching_nodes_in_elem1=new int[FaceElements->numElements*NN];
+    int* elem1 = new int[m_faceElements->numElements];
+    int* elem0 = new int[m_faceElements->numElements];
+    index_t* elem_mask = new index_t[m_faceElements->numElements];
+    int* matching_nodes_in_elem1 = new int[m_faceElements->numElements*NN];
 
     // find the matching face elements
     int numPairs;
     findMatchingFaces(safety_factor, tolerance, &numPairs, elem0, elem1, matching_nodes_in_elem1);
-    if (noError()) {
-        // get a list of the face elements to be kept
+    // get a list of the face elements to be kept
 #pragma omp parallel for
-        for (index_t e=0; e<FaceElements->numElements; e++)
-            elem_mask[e]=1;
-        for (int e=0; e<numPairs; e++) {
-            elem_mask[elem0[e]]=0;
-            elem_mask[elem1[e]]=0;
-        }
-        dim_t new_numFaceElements=0;
-        // OMP
-        for (index_t e=0; e<FaceElements->numElements; e++) {
-            if (elem_mask[e]>0) {
-                elem_mask[new_numFaceElements]=e;
-                new_numFaceElements++;
-            }
-        }
-        // allocate new face element and Contact element files
-        ElementFile *newFaceElementsFile, *newContactElementsFile;
-        newContactElementsFile=new ElementFile(ContactElements->referenceElementSet, MPIInfo);
-        newFaceElementsFile=new ElementFile(FaceElements->referenceElementSet, MPIInfo);
-        newContactElementsFile->allocTable(numPairs+ContactElements->numElements);
-        newFaceElementsFile->allocTable(new_numFaceElements);
-        // copy the old elements over
-        // get the face elements which are still in use
-        newFaceElementsFile->gather(elem_mask, FaceElements);
-        // get the contact elements which are still in use
-        newContactElementsFile->copyTable(0, 0, 0, ContactElements);
-        dim_t c=ContactElements->numElements;
-        // OMP
-        for (int e=0; e<numPairs; e++) {
-            const int e0=elem0[e];
-            const int e1=elem1[e];
-            newContactElementsFile->Id[c]=std::min(FaceElements->Id[e0],FaceElements->Id[e1]);
-            newContactElementsFile->Tag[c]=std::min(FaceElements->Tag[e0],FaceElements->Tag[e1]);
-            newContactElementsFile->Color[c]=e;
-            for (int i=0; i<NN; i++)
-                newContactElementsFile->Nodes[INDEX2(i,c,NN_Contact)]=FaceElements->Nodes[INDEX2(i,e0,NN)];
-            for (int i=0; i<NN; i++)
-                newContactElementsFile->Nodes[INDEX2(i+NN,c,NN_Contact)]=matching_nodes_in_elem1[INDEX2(i,e,NN)];
-            c++;
+    for (index_t e = 0; e < m_faceElements->numElements; e++)
+        elem_mask[e] = 1;
+
+    for (int e = 0; e < numPairs; e++) {
+        elem_mask[elem0[e]] = 0;
+        elem_mask[elem1[e]] = 0;
+    }
+    dim_t new_numFaceElements = 0;
+    // OMP
+    for (index_t e = 0; e < m_faceElements->numElements; e++) {
+        if (elem_mask[e] > 0) {
+            elem_mask[new_numFaceElements] = e;
+            new_numFaceElements++;
         }
-        newContactElementsFile->minColor=0;
-        newContactElementsFile->maxColor=numPairs-1;
-        // set new face and Contact elements
-        delete FaceElements;
-        FaceElements=newFaceElementsFile;
-        delete ContactElements;
-        ContactElements=newContactElementsFile;
-        prepare(optimize);
     }
+    // allocate new face element and Contact element files
+    ElementFile* newContactElementsFile = new ElementFile(m_contactElements->referenceElementSet, m_mpiInfo);
+    ElementFile* newFaceElementsFile = new ElementFile(m_faceElements->referenceElementSet, m_mpiInfo);
+    newContactElementsFile->allocTable(numPairs+m_contactElements->numElements);
+    newFaceElementsFile->allocTable(new_numFaceElements);
+    // copy the old elements over
+    // get the face elements which are still in use
+    newFaceElementsFile->gather(elem_mask, m_faceElements);
+    // get the contact elements which are still in use
+    newContactElementsFile->copyTable(0, 0, 0, m_contactElements);
+    dim_t c = m_contactElements->numElements;
+    // OMP
+    for (int e = 0; e < numPairs; e++) {
+        const int e0 = elem0[e];
+        const int e1 = elem1[e];
+        newContactElementsFile->Id[c] = std::min(m_faceElements->Id[e0], m_faceElements->Id[e1]);
+        newContactElementsFile->Tag[c] = std::min(m_faceElements->Tag[e0], m_faceElements->Tag[e1]);
+        newContactElementsFile->Color[c] = e;
+        for (int i = 0; i < NN; i++)
+            newContactElementsFile->Nodes[INDEX2(i,c,NN_Contact)] =
+                                        m_faceElements->Nodes[INDEX2(i,e0,NN)];
+        for (int i = 0; i < NN; i++)
+            newContactElementsFile->Nodes[INDEX2(i+NN,c,NN_Contact)] =
+                                       matching_nodes_in_elem1[INDEX2(i,e,NN)];
+        c++;
+    }
+    newContactElementsFile->minColor = 0;
+    newContactElementsFile->maxColor = numPairs-1;
+    // set new face and Contact elements
+    
+    delete m_faceElements;
+    m_faceElements = newFaceElementsFile;
+
+    delete m_contactElements;
+    m_contactElements = newContactElementsFile;
+
+    prepare(optimize);
+
     delete[] elem1;
     delete[] elem0;
     delete[] matching_nodes_in_elem1;
diff --git a/finley/src/Mesh_merge.cpp b/finley/src/Mesh_merge.cpp
index 51e570d..45aa489 100644
--- a/finley/src/Mesh_merge.cpp
+++ b/finley/src/Mesh_merge.cpp
@@ -14,194 +14,162 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Finley: Mesh
-
-  takes nodes, elements, etc. of all in put meshes and copies them into
-  a new mesh. Ids of output are shifted by the maximum Id of input.
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "Mesh.h"
+#include "FinleyDomain.h"
 #include "Util.h"
 
+using escript::ValueError;
+
 namespace finley {
 
-Mesh* Mesh_merge(const std::vector<Mesh*>& msh)
+FinleyDomain* FinleyDomain::merge(const std::vector<const FinleyDomain*>& msh)
 {
-    if (msh.size()==0) {
-        setError(VALUE_ERROR, "Mesh_merge: Empty mesh list");
-        return NULL;
+    if (msh.empty()) {
+        throw ValueError("merge: Empty mesh list");
     }
-    for (int i=0; i<msh.size(); i++) {
-        if (msh[i]->MPIInfo->size > 1) {
-            setError(TYPE_ERROR, "Mesh_merge: more than 1 processor is not supported yet.");
-            return NULL;
+    for (int i = 0; i < msh.size(); i++) {
+        if (msh[i]->getMPISize() > 1) {
+            throw escript::NotImplementedError("merge: more than 1 processor is not supported yet.");
         }
     }
 
-    const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
-    int numNodes=0;
-    int numElements=0;
-    int numFaceElements=0;
-    int numContactElements=0;
-    int numPoints=0;
-    int maxNodeID=0;
-    int maxDOF=0;
-    int maxElementID=0;
-    int maxElementID2=0;
-    ElementTypeId elementTypeId=NoRef;
-    ElementTypeId faceElementTypeId=NoRef;
-    ElementTypeId pointTypeId=NoRef;
-    ElementTypeId contactTypeId=NoRef;
-
-    int order=msh[0]->integrationOrder;
-    int reduced_order=msh[0]->reducedIntegrationOrder;
-    const int numDim=msh[0]->Nodes->numDim;
-    esysUtils::JMPI& mpiInfo=msh[0]->MPIInfo;
+    dim_t numNodes = 0;
+    dim_t numElements = 0;
+    dim_t numFaceElements = 0;
+    dim_t numContactElements = 0;
+    dim_t numPoints = 0;
+    index_t maxNodeID = 0;
+    index_t maxDOF = 0;
+    index_t maxElementID = 0;
+    index_t maxElementID2 = 0;
+    ElementTypeId elementTypeId = NoRef;
+    ElementTypeId faceElementTypeId = NoRef;
+    ElementTypeId pointTypeId = NoRef;
+    ElementTypeId contactTypeId = NoRef;
+
+    int order = msh[0]->integrationOrder;
+    int reducedOrder = msh[0]->reducedIntegrationOrder;
+    const int numDim = msh[0]->getDim();
+    escript::JMPI mpiInfo = msh[0]->getMPI();
     std::stringstream newName;
 
-    for (int i=0; i<msh.size(); i++) {
+    for (int i=0; i < msh.size(); i++) {
         // check if all meshes have the same type and dimensions
-        order=std::max(order, msh[i]->integrationOrder);
-        reduced_order=std::min(reduced_order, msh[i]->reducedIntegrationOrder);
-        numNodes+=msh[i]->Nodes->numNodes;
-        if (mpiInfo->comm != msh[i]->MPIInfo->comm) {
-            setError(TYPE_ERROR, "Mesh_merge: MPI communicators of meshes don't match.");
-            break;
+        order = std::max(order, msh[i]->integrationOrder);
+        reducedOrder = std::min(reducedOrder, msh[i]->reducedIntegrationOrder);
+        numNodes += msh[i]->getNodes()->getNumNodes();
+        if (mpiInfo->comm != msh[i]->getMPIComm()) {
+            throw ValueError("merge: MPI communicators of meshes don't match.");
         }
-        if (numDim != msh[i]->Nodes->numDim) {
-            setError(TYPE_ERROR, "Mesh_merge: Spatial dimensions of meshes don't match.");
-            break;
+        if (numDim != msh[i]->getDim()) {
+            throw ValueError("merge: Spatial dimensions of meshes don't match.");
         }
 
-        if (msh[i]->Elements) {
-            numElements+=msh[i]->Elements->numElements;
-            if (elementTypeId==NoRef) {
-                elementTypeId=msh[i]->Elements->referenceElementSet->referenceElement->Type->TypeId;
+        if (msh[i]->getElements()) {
+            numElements += msh[i]->getElements()->numElements;
+            if (elementTypeId == NoRef) {
+                elementTypeId = msh[i]->getElements()->referenceElementSet->referenceElement->Type->TypeId;
             } else {
-                if (elementTypeId != msh[i]->Elements->referenceElementSet->referenceElement->Type->TypeId) {
-                    setError(TYPE_ERROR, "Mesh_merge: element types of meshes don't match.");
-                    break;
+                if (elementTypeId != msh[i]->getElements()->referenceElementSet->referenceElement->Type->TypeId) {
+                    throw ValueError("merge: element types of meshes don't match.");
                 }
             }
         }
 
-        if (msh[i]->FaceElements) {
-            numFaceElements+=msh[i]->FaceElements->numElements;
-            if (faceElementTypeId==NoRef) {
-                faceElementTypeId=msh[i]->FaceElements->referenceElementSet->referenceElement->Type->TypeId;
+        if (msh[i]->getFaceElements()) {
+            numFaceElements += msh[i]->getFaceElements()->numElements;
+            if (faceElementTypeId == NoRef) {
+                faceElementTypeId = msh[i]->getFaceElements()->referenceElementSet->referenceElement->Type->TypeId;
             } else {
-                if (faceElementTypeId != msh[i]->FaceElements->referenceElementSet->referenceElement->Type->TypeId) {
-                    setError(TYPE_ERROR, "Mesh_merge: face element types of meshes don't match.");
-                    break;
+                if (faceElementTypeId != msh[i]->getFaceElements()->referenceElementSet->referenceElement->Type->TypeId) {
+                    throw ValueError("merge: face element types of meshes don't match.");
                 }
             }
         }
 
-        if (msh[i]->ContactElements) {
-            numContactElements+=msh[i]->ContactElements->numElements;
-            if (contactTypeId==NoRef) {
-                contactTypeId=msh[i]->ContactElements->referenceElementSet->referenceElement->Type->TypeId;
+        if (msh[i]->getContactElements()) {
+            numContactElements += msh[i]->getContactElements()->numElements;
+            if (contactTypeId == NoRef) {
+                contactTypeId = msh[i]->getContactElements()->referenceElementSet->referenceElement->Type->TypeId;
             } else {
-                if (contactTypeId != msh[i]->ContactElements->referenceElementSet->referenceElement->Type->TypeId) {
-                    setError(TYPE_ERROR, "Mesh_merge: contact element types of meshes don't match.");
-                    break;
+                if (contactTypeId != msh[i]->getContactElements()->referenceElementSet->referenceElement->Type->TypeId) {
+                    throw ValueError("merge: contact element types of meshes don't match.");
                 }
             }
         }
 
-        if (msh[i]->Points) {
-            numPoints+=msh[i]->Points->numElements;
-            if (pointTypeId==NoRef) {
-                pointTypeId=msh[i]->Points->referenceElementSet->referenceElement->Type->TypeId;
+        if (msh[i]->getPoints()) {
+            numPoints += msh[i]->getPoints()->numElements;
+            if (pointTypeId == NoRef) {
+                pointTypeId = msh[i]->getPoints()->referenceElementSet->referenceElement->Type->TypeId;
             } else {
-                if (pointTypeId != msh[i]->Points->referenceElementSet->referenceElement->Type->TypeId ) {
-                    setError(TYPE_ERROR, "Mesh_merge: point element types of meshes don't match.");
-                    break;
+                if (pointTypeId != msh[i]->getPoints()->referenceElementSet->referenceElement->Type->TypeId ) {
+                    throw ValueError("merge: point element types of meshes don't match.");
                 }
             }
         }
 
-        if (i>0)
+        if (i > 0)
             newName << "+";
         newName << msh[i]->m_name;
     }
 
     // allocate
-    Mesh* out=NULL;
-    if (noError()) {
-        out=new Mesh(newName.str(), numDim, mpiInfo);
-        refElements.reset(new ReferenceElementSet(elementTypeId, order, reduced_order));
-        refFaceElements.reset(new ReferenceElementSet(faceElementTypeId, order, reduced_order));
-        refContactElements.reset(new ReferenceElementSet(contactTypeId, order, reduced_order));
-        refPoints.reset(new ReferenceElementSet(pointTypeId, order, reduced_order));
-    }
-    if (noError()) {
-        out->Elements=new ElementFile(refElements, mpiInfo);
-        out->FaceElements=new ElementFile(refFaceElements, mpiInfo);
-        out->Points=new ElementFile(refPoints, mpiInfo);
-        out->ContactElements=new ElementFile(refContactElements, mpiInfo);
+    FinleyDomain* out = new FinleyDomain(newName.str(), numDim, mpiInfo);
+    const_ReferenceElementSet_ptr refElements(new ReferenceElementSet(elementTypeId, order, reducedOrder));
+    const_ReferenceElementSet_ptr refFaceElements(new ReferenceElementSet(faceElementTypeId, order, reducedOrder));
+    const_ReferenceElementSet_ptr refContactElements(new ReferenceElementSet(contactTypeId, order, reducedOrder));
+    const_ReferenceElementSet_ptr refPoints(new ReferenceElementSet(pointTypeId, order, reducedOrder));
+
+    NodeFile* nodes = out->getNodes();
+    out->setElements(new ElementFile(refElements, mpiInfo));
+    out->setFaceElements(new ElementFile(refFaceElements, mpiInfo));
+    out->setPoints(new ElementFile(refPoints, mpiInfo));
+    out->setContactElements(new ElementFile(refContactElements, mpiInfo));
 
-    }
     // allocate new tables
-    if (noError()) {
-        out->Nodes->allocTable(numNodes);
-        out->Elements->allocTable(numElements);
-        out->FaceElements->allocTable(numFaceElements);
-        out->ContactElements->allocTable(numContactElements);
-        out->Points->allocTable(numPoints);
-    }
+    nodes->allocTable(numNodes);
+    out->getElements()->allocTable(numElements);
+    out->getFaceElements()->allocTable(numFaceElements);
+    out->getContactElements()->allocTable(numContactElements);
+    out->getPoints()->allocTable(numPoints);
 
     // copy tables
-    if (noError()) {
-        numNodes=0;
-        numElements=0;
-        numFaceElements=0;
-        numContactElements=0;
-        numPoints=0;
-
-        for (int i=0; i<msh.size(); i++) {
-            out->Nodes->copyTable(numNodes, maxNodeID, maxDOF, msh[i]->Nodes);
-            out->Elements->copyTable(numElements,numNodes,maxElementID,msh[i]->Elements);
-            out->FaceElements->copyTable(numFaceElements,numNodes,maxElementID,msh[i]->FaceElements);
-            out->ContactElements->copyTable(numContactElements,numNodes,maxElementID,msh[i]->ContactElements);
-            out->Points->copyTable(numPoints,numNodes,maxElementID,msh[i]->Points);
-
-            numNodes=+msh[i]->Nodes->numNodes;
-            numElements=+msh[i]->Elements->numElements;
-            numFaceElements=+msh[i]->FaceElements->numElements;
-            numContactElements=+msh[i]->ContactElements->numElements;
-            numPoints=+msh[i]->Points->numElements;
-
-            if (msh[i]->Nodes->numNodes>0)
-                maxNodeID+=util::getMaxInt(1,msh[i]->Nodes->numNodes,msh[i]->Nodes->Id)+1;
-            maxDOF+=util::getMaxInt(1,msh[i]->Nodes->numNodes,msh[i]->Nodes->globalDegreesOfFreedom)+1;
-            maxElementID2=0;
-            if (msh[i]->Elements->numElements>0)
-                maxElementID2=MAX(maxElementID2, util::getMaxInt(1,msh[i]->Elements->numElements,msh[i]->Elements->Id));
-            if (msh[i]->FaceElements->numElements>0)
-                maxElementID2=MAX(maxElementID2, util::getMaxInt(1,msh[i]->FaceElements->numElements,msh[i]->FaceElements->Id));
-            if (msh[i]->ContactElements->numElements>0)
-                maxElementID2=MAX(maxElementID2, util::getMaxInt(1,msh[i]->ContactElements->numElements,msh[i]->ContactElements->Id));
-            if (msh[i]->Points->numElements)
-                maxElementID2=MAX(maxElementID2, util::getMaxInt(1,msh[i]->Points->numElements,msh[i]->Points->Id));
-                maxElementID+=maxElementID2+1;
-        }
+    numNodes = 0;
+    numElements = 0;
+    numFaceElements = 0;
+    numContactElements = 0;
+    numPoints = 0;
+
+    for (int i = 0; i < msh.size(); i++) {
+        nodes->copyTable(numNodes, maxNodeID, maxDOF, msh[i]->getNodes());
+        out->getElements()->copyTable(numElements, numNodes, maxElementID, msh[i]->getElements());
+        out->getFaceElements()->copyTable(numFaceElements, numNodes, maxElementID, msh[i]->getFaceElements());
+        out->getContactElements()->copyTable(numContactElements, numNodes, maxElementID, msh[i]->getContactElements());
+        out->getPoints()->copyTable(numPoints, numNodes, maxElementID, msh[i]->getPoints());
+
+        numNodes += msh[i]->getNodes()->getNumNodes();
+        numElements += msh[i]->getElements()->numElements;
+        numFaceElements += msh[i]->getFaceElements()->numElements;
+        numContactElements += msh[i]->getContactElements()->numElements;
+        numPoints += msh[i]->getPoints()->numElements;
+
+        if (msh[i]->getNodes()->getNumNodes() > 0)
+            maxNodeID += util::getMaxInt(1, msh[i]->getNodes()->getNumNodes(), msh[i]->getNodes()->Id) + 1;
+        maxDOF += util::getMaxInt(1, msh[i]->getNodes()->getNumNodes(), msh[i]->getNodes()->globalDegreesOfFreedom) + 1;
+        maxElementID2 = 0;
+        if (msh[i]->getElements()->numElements > 0)
+            maxElementID2 = std::max(maxElementID2, util::getMaxInt(1, msh[i]->getElements()->numElements, msh[i]->getElements()->Id));
+        if (msh[i]->getFaceElements()->numElements > 0)
+            maxElementID2 = std::max(maxElementID2, util::getMaxInt(1, msh[i]->getFaceElements()->numElements, msh[i]->getFaceElements()->Id));
+        if (msh[i]->getContactElements()->numElements > 0)
+            maxElementID2 = std::max(maxElementID2, util::getMaxInt(1, msh[i]->getContactElements()->numElements, msh[i]->getContactElements()->Id));
+        if (msh[i]->getPoints()->numElements > 0)
+            maxElementID2 = std::max(maxElementID2, util::getMaxInt(1, msh[i]->getPoints()->numElements, msh[i]->getPoints()->Id));
+        maxElementID += maxElementID2 + 1;
     }
+
     // all done
-    if (!noError()) {
-        delete out;
-        out=NULL;
-    } else {
-        out->prepare(false);
-    }
+    out->prepare(false);
     return out;
 }
 
diff --git a/finley/src/Mesh_optimizeDOFDistribution.cpp b/finley/src/Mesh_optimizeDOFDistribution.cpp
index e109880..cfd23d9 100644
--- a/finley/src/Mesh_optimizeDOFDistribution.cpp
+++ b/finley/src/Mesh_optimizeDOFDistribution.cpp
@@ -14,115 +14,127 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
+#include "IndexList.h"
 
-/****************************************************************************
-
-  Finley: Mesh: optimizes the distribution of DOFs across processors
-  using ParMETIS. On return a new distribution is given and the globalDOF
-  are relabeled accordingly but the mesh is not redistributed yet.
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
+#include <escript/index.h>
 
-#include "Mesh.h"
-#include "IndexList.h"
-#ifdef USE_PARMETIS
+#ifdef ESYS_HAVE_PARMETIS
 #include <parmetis.h>
 #ifndef REALTYPEWIDTH
 typedef float real_t;
 #endif
 #endif
+
+#include <iostream>
 #include <boost/scoped_array.hpp>
 
 namespace finley {
 
-#ifdef USE_PARMETIS
+#ifdef ESYS_HAVE_PARMETIS
 // Checks whether there is any rank which has no vertex. In case 
 // such a rank exists, we don't use parmetis since parmetis requires
 // that every rank has at least 1 vertex (at line 129 of file
 // "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if 
 // any rank has no vertex).
-bool allRanksHaveNodes(esysUtils::JMPI& mpiInfo, const std::vector<index_t>& distribution)
+static bool allRanksHaveNodes(escript::JMPI mpiInfo,
+                              const IndexVector& distribution)
 {
     int ret = 1;
 
     if (mpiInfo->rank == 0) {
-        for (int i=0; i<mpiInfo->size; i++) {
-            if (distribution[i+1] == distribution[i]) {
+        for (int i = 0; i < mpiInfo->size; i++) {
+            if (distribution[i + 1] == distribution[i]) {
                 ret = 0;
                 break;
             }
         }
         if (ret == 0) {
-            std::cout << "Mesh::optimizeDOFDistribution: "
-                << "Parmetis is not used since at least one rank has no vertex!"
-                << std::endl;
+            std::cerr << "INFO: ParMetis is not used since at least one rank "
+                         "has no vertex." << std::endl;
         }
     }
     MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm);
-    return (ret==1);
+    return ret==1;
 }
 #endif
 
-
-/****************************************************************************/
-
-void Mesh::optimizeDOFDistribution(std::vector<index_t>& distribution)
+/// optimizes the distribution of DOFs across processors using ParMETIS.
+/// On return a new distribution is given and the globalDOF are relabeled
+/// accordingly but the mesh has not been redistributed yet
+void FinleyDomain::optimizeDOFDistribution(IndexVector& distribution)
 {
-    // these two are not const because of parmetis call
-    int mpiSize=MPIInfo->size;
-    const int myRank=MPIInfo->rank;
-    const index_t myFirstVertex=distribution[myRank];
-    const index_t myLastVertex=distribution[myRank+1];
-    const dim_t myNumVertices=myLastVertex-myFirstVertex;
+    int mpiSize = m_mpiInfo->size;
+    const int myRank = m_mpiInfo->rank;
+    const index_t myFirstVertex = distribution[myRank];
+    const index_t myLastVertex = distribution[myRank + 1];
+    const dim_t myNumVertices = myLastVertex - myFirstVertex;
+    const dim_t numNodes = m_nodes->getNumNodes();
 
     // first step is to distribute the elements according to a global X of DOF
-    // len is used for the sending around of partition later on
-    index_t len=0;
-    for (int p=0; p<mpiSize; ++p)
-        len=std::max(len, distribution[p+1]-distribution[p]);
-    std::vector<index_t> partition(len);
-
-#ifdef USE_PARMETIS
-    if (mpiSize>1 && allRanksHaveNodes(MPIInfo, distribution)) {
+    dim_t len = 0;
+    for (int p = 0; p < mpiSize; ++p)
+        len = std::max(len, distribution[p + 1] - distribution[p]);
+
+    index_t* partition = new index_t[len];
+
+#ifdef ESYS_HAVE_PARMETIS
+    if (mpiSize > 1 && allRanksHaveNodes(m_mpiInfo, distribution)) {
         boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
-        int dim=Nodes->numDim;
+        int dim = m_nodes->numDim;
+
         // create the adjacency structure xadj and adjncy
 #pragma omp parallel
         {
             // insert contributions from element matrices into columns index
             IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                    myFirstVertex, myLastVertex, Elements,
-                    Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
+                    myFirstVertex, myLastVertex, m_elements,
+                    m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
             IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                    myFirstVertex, myLastVertex, FaceElements,
-                    Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
+                    myFirstVertex, myLastVertex, m_faceElements,
+                    m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
             IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                    myFirstVertex, myLastVertex, ContactElements,
-                    Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
+                    myFirstVertex, myLastVertex, m_contactElements,
+                    m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
             IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
-                    myFirstVertex, myLastVertex, Points,
-                    Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
+                    myFirstVertex, myLastVertex, m_points,
+                    m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
         }
-       
-        // create the local matrix pattern
-        const dim_t globalNumVertices=distribution[mpiSize];
-        paso::Pattern_ptr pattern(paso::Pattern::fromIndexListArray(0,
-                myNumVertices, index_list.get(), 0, globalNumVertices, 0));
+
         // set the coordinates
-        std::vector<real_t> xyz(myNumVertices*dim);
+        real_t* xyz = new real_t[myNumVertices * dim];
 #pragma omp parallel for
-        for (index_t i=0; i<Nodes->numNodes; ++i) {
-            const index_t k=Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
-            if (k>=0 && k<myNumVertices) {
-                for (int j=0; j<dim; ++j)
-                    xyz[k*dim+j]=static_cast<real_t>(Nodes->Coordinates[INDEX2(j,i,dim)]); 
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i] - myFirstVertex;
+            if (k >= 0 && k < myNumVertices) {
+                for (int j = 0; j < dim; ++j)
+                    xyz[k * dim + j] = static_cast<real_t>(m_nodes->Coordinates[INDEX2(j, i, dim)]);
             }
         }
 
+        // create the local CSR matrix pattern
+        const dim_t globalNumVertices = distribution[mpiSize];
+        index_t* ptr = new index_t[myNumVertices + 1];
+#pragma omp parallel for
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            ptr[i] = index_list[i].count(0, globalNumVertices);
+        }
+        // accumulate ptr
+        dim_t s = 0;
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            const index_t itmp = ptr[i];
+            ptr[i] = s;
+            s += itmp;
+        }
+        ptr[myNumVertices] = s;
+
+        // create index
+        index_t* index = new index_t[s];
+#pragma omp parallel for
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            index_list[i].toArray(&index[ptr[i]], 0, globalNumVertices, 0);
+        }
+
         index_t wgtflag = 0;
         index_t numflag = 0;
         index_t ncon = 1;
@@ -130,100 +142,106 @@ void Mesh::optimizeDOFDistribution(std::vector<index_t>& distribution)
         index_t impiSize = mpiSize;
         index_t idim = dim;
         // options[0]=1 -> non-default values, evaluate rest of options
-        // options[1]=15 -> DBG_TIME | DBG_INFO | DBG_PROGRESS | DBG_REFINEINFO
+        // options[1]=0 -> debug level (no output)
         // options[2] -> random seed
-        index_t options[3] = { 1, 15, 0 };
-        std::vector<real_t> tpwgts(ncon*mpiSize, 1.f/mpiSize);
+        index_t options[3] = { 1, 0, 0 };
+        std::vector<real_t> tpwgts(ncon * mpiSize, 1.f / mpiSize);
         std::vector<real_t> ubvec(ncon, 1.05f);
-        ParMETIS_V3_PartGeomKway(&distribution[0], pattern->ptr, pattern->index,
-                              NULL, NULL, &wgtflag, &numflag, &idim, &xyz[0],
-                              &ncon, &impiSize, &tpwgts[0], &ubvec[0], options,
-                              &edgecut, &partition[0], &MPIInfo->comm);
+        ParMETIS_V3_PartGeomKway(&distribution[0], ptr, index, NULL, NULL,
+                                 &wgtflag, &numflag, &idim, xyz, &ncon,
+                                 &impiSize, &tpwgts[0], &ubvec[0], options,
+                                 &edgecut, partition, &m_mpiInfo->comm);
+        delete[] xyz;
+        delete[] index;
+        delete[] ptr;
     } else {
-        for (index_t i=0; i<myNumVertices; ++i)
-            partition[i]=0; // CPU 0 owns all
+        for (index_t i = 0; i < myNumVertices; ++i)
+            partition[i] = 0; // CPU 0 owns all
     }
 #else
-    for (index_t i=0; i<myNumVertices; ++i)
-        partition[i]=myRank; // CPU myRank owns all
-#endif
+#pragma omp parallel for
+    for (index_t i = 0; i < myNumVertices; ++i)
+        partition[i] = myRank; // CPU myRank owns all
+#endif // ESYS_HAVE_PARMETIS
 
     // create a new distribution and labeling of the DOF
-    std::vector<index_t> new_distribution(mpiSize+1, 0);
+    IndexVector new_distribution(mpiSize + 1);
 #pragma omp parallel
     {
-        std::vector<int> loc_partition_count(mpiSize, 0);
+        IndexVector loc_partition_count(mpiSize);
 #pragma omp for
-        for (index_t i=0; i<myNumVertices; ++i)
+        for (index_t i = 0; i < myNumVertices; ++i)
             loc_partition_count[partition[i]]++;
 #pragma omp critical
         {
-            for (int i=0; i<mpiSize; ++i)
-                new_distribution[i]+=loc_partition_count[i];
+            for (int i = 0; i < mpiSize; ++i)
+                new_distribution[i] += loc_partition_count[i];
         }
     }
-    index_t *recvbuf=new index_t[mpiSize*mpiSize];
+
+    IndexVector recvbuf(mpiSize * mpiSize);
 #ifdef ESYS_MPI
     // recvbuf will be the concatenation of each CPU's contribution to
     // new_distribution
-    MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, recvbuf, mpiSize,
-                  MPI_INT, MPIInfo->comm);
+    MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, &recvbuf[0],
+                  mpiSize, MPI_DIM_T, m_mpiInfo->comm);
 #else
-    for (int i=0; i<mpiSize; ++i)
-        recvbuf[i]=new_distribution[i];
+    for (int i = 0; i < mpiSize; ++i)
+        recvbuf[i] = new_distribution[i];
 #endif
-    new_distribution[0]=0;
+    new_distribution[0] = 0;
     std::vector<index_t> newGlobalDOFID(len);
-    for (int rank=0; rank<mpiSize; rank++) {
-        index_t c=0;
-        for (int i=0; i<myRank; ++i)
-            c+=recvbuf[rank+mpiSize*i];
-        for (index_t i=0; i<myNumVertices; ++i) {
-            if (rank==partition[i]) {
-                newGlobalDOFID[i]=new_distribution[rank]+c;
+    for (int rank = 0; rank < mpiSize; rank++) {
+        index_t c = 0;
+        for (int i = 0; i < myRank; ++i)
+            c += recvbuf[rank + mpiSize * i];
+        for (index_t i = 0; i < myNumVertices; ++i) {
+            if (rank == partition[i]) {
+                newGlobalDOFID[i] = new_distribution[rank] + c;
                 c++;
             }
         }
-        for (int i=myRank+1; i<mpiSize; ++i)
-            c+=recvbuf[rank+mpiSize*i];
-        new_distribution[rank+1]=new_distribution[rank]+c;
+        for (int i = myRank + 1; i < mpiSize; ++i)
+            c += recvbuf[rank + mpiSize * i];
+        new_distribution[rank + 1] = new_distribution[rank] + c;
     }
-    delete[] recvbuf;
 
     // now the overlap needs to be created by sending the partition around
 #ifdef ESYS_MPI
-    int dest=esysUtils::mod_rank(mpiSize, myRank + 1);
-    int source=esysUtils::mod_rank(mpiSize, myRank - 1);
+    int dest = m_mpiInfo->mod_rank(myRank + 1);
+    int source = m_mpiInfo->mod_rank(myRank - 1);
 #endif
-    int current_rank=myRank;
-    std::vector<short> setNewDOFId(Nodes->numNodes, 1);
+    int current_rank = myRank;
+    std::vector<short> setNewDOFId(numNodes, 1);
 
-    for (int p=0; p<mpiSize; ++p) {
-        const index_t firstVertex=distribution[current_rank];
-        const index_t lastVertex=distribution[current_rank+1];
+    for (int p = 0; p < mpiSize; ++p) {
+        const index_t firstVertex = distribution[current_rank];
+        const index_t lastVertex = distribution[current_rank + 1];
 #pragma omp parallel for
-        for (index_t i=0; i<Nodes->numNodes; ++i) {
-            const index_t k=Nodes->globalDegreesOfFreedom[i];
-            if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
-                Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
-                setNewDOFId[i]=0;
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = m_nodes->globalDegreesOfFreedom[i];
+            if (setNewDOFId[i] && firstVertex <= k && k < lastVertex) {
+                m_nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
+                setNewDOFId[i] = 0;
             }
         }
 
-        if (p<mpiSize-1) { // the final send can be skipped
+        if (p < mpiSize - 1) { // the final send can be skipped
 #ifdef ESYS_MPI
             MPI_Status status;
             MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T,
-                               dest, MPIInfo->msg_tag_counter,
-                               source, MPIInfo->msg_tag_counter,
-                               MPIInfo->comm, &status);
+                                 dest, m_mpiInfo->counter(),
+                                 source, m_mpiInfo->counter(),
+                                 m_mpiInfo->comm, &status);
+            m_mpiInfo->incCounter();
 #endif
-            MPIInfo->msg_tag_counter++;
-            current_rank=esysUtils::mod_rank(mpiSize, current_rank-1);
+            current_rank = m_mpiInfo->mod_rank(current_rank - 1);
         }
     }
-    for (int i=0; i<mpiSize+1; ++i)
-        distribution[i]=new_distribution[i];
+    for (int i = 0; i < mpiSize + 1; ++i)
+        distribution[i] = new_distribution[i];
+
+    delete[] partition;
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_read.cpp b/finley/src/Mesh_read.cpp
index 110f703..1054aa0 100644
--- a/finley/src/Mesh_read.cpp
+++ b/finley/src/Mesh_read.cpp
@@ -14,690 +14,360 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
 
-/****************************************************************************
+#include <escript/index.h>
 
-  Finley: read mesh from file
+using escript::IOError;
 
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+namespace {
 
-#include <ctype.h>
-#include "Mesh.h"
+using namespace finley;
 
-namespace finley {
+ElementFile* readElementFile(std::ifstream& fileHandle, int order,
+                             int reducedOrder, escript::JMPI mpiInfo)
+{
+    dim_t numEle = 0;
+    ElementTypeId typeID = NoRef;
+    std::string elementType, line;
+
+    // Read the element typeID and number of elements
+    if (mpiInfo->rank == 0) {
+        std::getline(fileHandle, line);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file - expected <ElementType> <numEle>");
+        size_t pos = line.find(' ');
+        if (pos == std::string::npos)
+            throw IOError("Mesh::read: Scan error reading file - expected <ElementType> <numEle>");
+        elementType = line.substr(0, pos);
+        numEle = std::stol(line.substr(pos+1));
+        typeID = ReferenceElement::getTypeId(elementType.c_str());
+    }
+#ifdef ESYS_MPI
+    if (mpiInfo->size > 1) {
+        dim_t temp1[2];
+        temp1[0] = (dim_t)typeID;
+        temp1[1] = numEle;
+        int mpiError = MPI_Bcast(temp1, 2, MPI_DIM_T, 0, mpiInfo->comm);
+        if (mpiError != MPI_SUCCESS) {
+            throw FinleyException("Mesh::read: broadcast of element typeID failed");
+        }
+        typeID = static_cast<ElementTypeId>(temp1[0]);
+        numEle = temp1[1];
+    }
+#endif
+    if (typeID == NoRef) {
+        std::stringstream ss;
+        ss << "Mesh::read: Unidentified element type " << elementType;
+        throw IOError(ss.str());
+    }
 
-#define FSCANF_CHECK(scan_ret, reason) {\
-    if (scan_ret == EOF) {\
-        perror(reason);\
-        setError(IO_ERROR,"scan error while reading finley file");\
-        return NULL;\
-    }\
+    // Allocate the ElementFile
+    const_ReferenceElementSet_ptr refElements(new ReferenceElementSet(
+                                                typeID, order, reducedOrder));
+    ElementFile* out = new ElementFile(refElements, mpiInfo);
+    const int numNodes = out->numNodes;
+
+    /********************** Read the element data **************************/
+    dim_t chunkSize = numEle / mpiInfo->size + 1;
+    dim_t totalEle = 0;
+    dim_t chunkEle = 0;
+    int nextCPU = 1;
+    /// Store Id + Tag + node list (+ one int at end for chunkEle)
+    index_t* tempInts = new index_t[chunkSize * (2 + numNodes) + 1];
+    // Elements are specified as a list of integers...only need one message
+    // instead of two as with the nodes
+    if (mpiInfo->rank == 0) { // Master
+        for (;;) {            // Infinite loop
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * (2 + numNodes) + 1; i0++)
+                tempInts[i0] = -1;
+            chunkEle = 0;
+            for (index_t i0 = 0; i0 < chunkSize; i0++) {
+                if (totalEle >= numEle)
+                    break; // End inner loop
+                std::getline(fileHandle, line);
+                if (!fileHandle.good())
+                    throw IOError("Mesh::read: Scan error while reading element data");
+                std::stringstream ss;
+                ss << line;
+                ss >> tempInts[i0 * (2 + numNodes) + 0]
+                   >> tempInts[i0 * (2 + numNodes) + 1];
+                for (int i1 = 0; i1 < numNodes; i1++) {
+                    ss >> tempInts[i0 * (2 + numNodes) + 2 + i1];
+                }
+                totalEle++;
+                chunkEle++;
+            }
+#ifdef ESYS_MPI
+            // Eventually we'll send chunk of elements to each CPU except 0
+            // itself, here goes one of them
+            if (nextCPU < mpiInfo->size) {
+                tempInts[chunkSize * (2 + numNodes)] = chunkEle;
+                MPI_Send(tempInts, chunkSize * (2 + numNodes) + 1, MPI_DIM_T,
+                         nextCPU, 81722, mpiInfo->comm);
+            }
+#endif
+            nextCPU++;
+            // Infinite loop ends when I've read a chunk for each of the worker
+            // nodes plus one more chunk for the master
+            if (nextCPU > mpiInfo->size)
+                break; // End infinite loop
+        } // Infinite loop
+    } // end master
+    else { // Worker
+#ifdef ESYS_MPI
+        // Each worker receives one message
+        MPI_Status status;
+        MPI_Recv(tempInts, chunkSize * (2 + numNodes) + 1, MPI_DIM_T, 0,
+                 81722, mpiInfo->comm, &status);
+        chunkEle = tempInts[chunkSize * (2 + numNodes)];
+#endif
+    } // Worker
+
+    out->allocTable(chunkEle);
+
+    // Copy Element data from tempInts to element file
+    out->minColor = 0;
+    out->maxColor = chunkEle - 1;
+#pragma omp parallel for
+    for (index_t i0 = 0; i0 < chunkEle; i0++) {
+        out->Id[i0] = tempInts[i0 * (2 + numNodes) + 0];
+        out->Tag[i0] = tempInts[i0 * (2 + numNodes) + 1];
+        out->Owner[i0] = mpiInfo->rank;
+        out->Color[i0] = i0;
+        for (int i1 = 0; i1 < numNodes; i1++) {
+            out->Nodes[INDEX2(i1, i0, numNodes)] =
+                tempInts[i0 * (2 + numNodes) + 2 + i1];
+        }
+    }
+    delete[] tempInts;
+    return out;
 }
 
+} // anonymous
 
-Mesh* Mesh::read(esysUtils::JMPI& mpi_info, const std::string fname,
-                 int order, int reduced_order, bool optimize)
+namespace finley {
+
+escript::Domain_ptr FinleyDomain::read(escript::JMPI mpiInfo,
+                                       const std::string& filename,
+                                       int order, int reducedOrder,
+                                       bool optimize)
 {
-    int numNodes, numDim=0, numEle, i0, i1;
-    const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
-    char name[LenString_MAX],element_type[LenString_MAX],frm[20];
-    char error_msg[LenErrorMsg_MAX];
-    FILE *fileHandle_p = NULL;
-    ElementTypeId typeID=NoRef;
-    int scan_ret;
-
-    resetError();
-
-    if (mpi_info->rank == 0) {
-        // get file handle
-        fileHandle_p = fopen(fname.c_str(), "r");
-        if (fileHandle_p==NULL) {
-            sprintf(error_msg,"Mesh_read: Opening file %s for reading failed.",fname.c_str());
-            setError(IO_ERROR,error_msg);
-            return NULL;
+    dim_t numNodes = 0;
+    int numDim = 0;
+    std::string name, line, token;
+    std::ifstream fileHandle;
+
+    if (mpiInfo->rank == 0) {
+        // open file
+        fileHandle.open(filename.c_str());
+        if (!fileHandle.good()) {
+            std::stringstream ss;
+            ss << "Mesh::read: Opening file " << filename
+               << " for reading failed.";
+            throw IOError(ss.str());
         }
 
         // read header
-        sprintf(frm,"%%%d[^\n]",LenString_MAX-1);
-        scan_ret = fscanf(fileHandle_p, frm, name);
-        FSCANF_CHECK(scan_ret, "Mesh::read")
-
-        /* get the number of nodes */
-        scan_ret = fscanf(fileHandle_p, "%1d%*s %d\n", &numDim,&numNodes);
-        FSCANF_CHECK(scan_ret, "Mesh::read")
+        std::getline(fileHandle, name);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file header");
+
+        // get the number of dimensions and nodes
+        std::getline(fileHandle, line);
+        if (!fileHandle.good())
+            throw IOError("Mesh::read: Scan error while reading file - expected <?D-Nodes> <numNodes>");
+        numDim = std::stoi(line.substr(0, 1));
+        token = line.substr(line.find(' ')+1);
+        numNodes = std::stoi(token);
     }
 
 #ifdef ESYS_MPI
-    // Broadcast numDim, numNodes, name if there are multiple MPI procs
-    if (mpi_info->size > 1) {
-        int temp1[3];
-        if (mpi_info->rank == 0) {
+    // MPI Broadcast numDim, numNodes, name if there are multiple MPI procs
+    if (mpiInfo->size > 1) {
+        dim_t temp1[3];
+        if (mpiInfo->rank == 0) {
             temp1[0] = numDim;
             temp1[1] = numNodes;
-            temp1[2] = strlen(name) + 1;
+            temp1[2] = name.length() + 1;
         } else {
             temp1[0] = 0;
             temp1[1] = 0;
             temp1[2] = 1;
         }
-        MPI_Bcast(temp1, 3, MPI_INT,  0, mpi_info->comm);
+        MPI_Bcast(temp1, 3, MPI_DIM_T, 0, mpiInfo->comm);
         numDim = temp1[0];
         numNodes = temp1[1];
-        MPI_Bcast(name, temp1[2], MPI_CHAR, 0, mpi_info->comm);
+        name.resize(temp1[2]);
+        MPI_Bcast(&name[0], temp1[2], MPI_CHAR, 0, mpiInfo->comm);
     }
 #endif
 
-    /* allocate mesh */
-    Mesh* mesh_p = new Mesh(name, numDim, mpi_info);
-
-    if (noError()) {
-        /* Each CPU will get at most chunkSize nodes so the message has to be sufficiently large */
-        int chunkSize = numNodes / mpi_info->size + 1, totalNodes=0, chunkNodes=0,  nextCPU=1;
-        int *tempInts = new int[chunkSize*3+1];        /* Stores the integer message data */
-        double *tempCoords = new double[chunkSize*numDim]; /* Stores the double message data */
-
-        /*
-        Read chunkSize nodes, send it in a chunk to worker CPU which copies chunk into its local mesh_p
-        It doesn't matter that a CPU has the wrong nodes for its elements, this is sorted out later
-        First chunk sent to CPU 1, second to CPU 2, ...
-        Last chunk stays on CPU 0 (the master)
-        The three columns of integers (Id, gDOF, Tag) are gathered into a single array tempInts and sent together in a single MPI message
-        */
-
-        if (mpi_info->rank == 0) {  /* Master */
-            for (;;) {            /* Infinite loop */
-#pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*3+1; i0++) tempInts[i0] = -1;
-
-#pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*numDim; i0++) tempCoords[i0] = -1.0;
-
-                chunkNodes = 0;
-                for (i1=0; i1<chunkSize; i1++) {
-                    if (totalNodes >= numNodes) break;    /* End of inner loop */
-                    if (1 == numDim) {
-                        scan_ret = fscanf(fileHandle_p, "%d %d %d %le\n",
-                                            &tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1],
-                                            &tempCoords[i1*numDim+0]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    if (2 == numDim) {
-                        scan_ret = fscanf(fileHandle_p, "%d %d %d %le %le\n",
-                                            &tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1],
-                                            &tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    if (3 == numDim) {
-                        scan_ret = fscanf(fileHandle_p, "%d %d %d %le %le %le\n",
-                                            &tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1],
-                                            &tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1], &tempCoords[i1*numDim+2]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    totalNodes++; /* When do we quit the infinite loop? */
-                    chunkNodes++; /* How many nodes do we actually have in this chunk? It may be smaller than chunkSize. */
-                }
-                if (chunkNodes > chunkSize) {
-                    setError(ESYS_MPI_ERROR, "Mesh_read: error reading chunks of mesh, data too large for message size");
-                    return NULL;
-                }
-                #ifdef ESYS_MPI
-                    /* Eventually we'll send chunkSize nodes to each CPU numbered 1 ... mpi_info->size-1, here goes one of them */
-                    if (nextCPU < mpi_info->size) {
-                        tempInts[chunkSize*3] = chunkNodes;   /* The message has one more int to send chunkNodes */
-                        MPI_Send(tempInts, chunkSize*3+1, MPI_INT, nextCPU, 81720, mpi_info->comm);
-                        MPI_Send(tempCoords, chunkSize*numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm);
-                    }
-                #endif
-                nextCPU++;
-                /* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-                if (nextCPU > mpi_info->size) break; /* End infinite loop */
-            } /* Infinite loop */
-        }   /* End master */
-        else {  /* Worker */
-            #ifdef ESYS_MPI
-                /* Each worker receives two messages */
-                MPI_Status status;
-                MPI_Recv(tempInts, chunkSize*3+1, MPI_INT, 0, 81720, mpi_info->comm, &status);
-                MPI_Recv(tempCoords, chunkSize*numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status);
-                chunkNodes = tempInts[chunkSize*3];   /* How many nodes are in this workers chunk? */
-            #endif
-        }   /* Worker */
-
-        /* Copy node data from tempMem to mesh_p */
-        mesh_p->Nodes->allocTable(chunkNodes);
-
-        if (noError()) {
-#pragma omp parallel for private (i0, i1) schedule(static)
-            for (i0=0; i0<chunkNodes; i0++) {
-                mesh_p->Nodes->Id[i0]               = tempInts[0+i0];
-                mesh_p->Nodes->globalDegreesOfFreedom[i0]       = tempInts[chunkSize+i0];
-                mesh_p->Nodes->Tag[i0]              = tempInts[chunkSize*2+i0];
-                for (i1=0; i1<numDim; i1++) {
-                    mesh_p->Nodes->Coordinates[INDEX2(i1,i0,numDim)]  = tempCoords[i0*numDim+i1];
-                }
+    // allocate domain
+    FinleyDomain* domain = new FinleyDomain(name, numDim, mpiInfo);
+
+    // Each CPU will get at most chunkSize nodes so the message has to be
+    // sufficiently large
+    dim_t chunkSize = numNodes / mpiInfo->size + 1;
+    dim_t totalNodes = 0;
+    dim_t chunkNodes = 0;
+    int nextCPU = 1;
+    // Stores the integer message data
+    index_t* tempInts = new index_t[chunkSize * 3 + 1];
+    // Stores the double message data
+    double* tempCoords = new double[chunkSize * numDim];
+
+    // Read chunkSize nodes, send it in a chunk to worker CPU which copies
+    // chunk into its local domain.  It doesn't matter that a CPU has the wrong
+    // nodes for its elements, this is sorted out later. First chunk sent to
+    // CPU 1, second to CPU 2, ..., last chunk stays on CPU 0 (the master).
+    // The three columns of integers (Id, gDOF, Tag) are gathered into a single
+    // array tempInts and sent together in a single MPI message.
+    if (mpiInfo->rank == 0) { // Master
+        for (;;) {            // Infinite loop
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * 3 + 1; i0++)
+                tempInts[i0] = -1;
+
+#pragma omp parallel for
+            for (index_t i0 = 0; i0 < chunkSize * numDim; i0++)
+                tempCoords[i0] = -1.0;
+
+            chunkNodes = 0;
+            for (index_t i1 = 0; i1 < chunkSize; i1++) {
+                if (totalNodes >= numNodes)
+                    break;  // End of inner loop
+                std::getline(fileHandle, line);
+                if (!fileHandle.good())
+                    throw IOError("Mesh::read: Scan error while reading node data");
+                std::stringstream ss;
+                ss << line;
+                ss >> tempInts[0 + i1] >> tempInts[chunkSize + i1]
+                   >> tempInts[chunkSize * 2 + i1];
+                ss >> tempCoords[i1 * numDim];
+                if (numDim > 1)
+                    ss >> tempCoords[i1 * numDim + 1];
+                if (numDim > 2)
+                    ss >> tempCoords[i1 * numDim + 2];
+                totalNodes++; // When do we quit the infinite loop?
+                chunkNodes++; // How many nodes do we actually have in this chunk? It may be smaller than chunkSize.
             }
-        }
-        delete[] tempInts;
-        delete[] tempCoords;
-    }
-
-    /* ***********************************  read elements ****************************************************************************************/
-    if (noError()) {
-
-        /* Read the element typeID */
-        if (mpi_info->rank == 0) {
-            scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-            FSCANF_CHECK(scan_ret, "Mesh_read")
-            typeID=ReferenceElement::getTypeId(element_type);
-        }
-        #ifdef ESYS_MPI
-            if (mpi_info->size > 1) {
-                int temp1[2], mpi_error;
-                temp1[0] = (int) typeID;
-                temp1[1] = numEle;
-                mpi_error = MPI_Bcast (temp1, 2, MPI_INT,  0, mpi_info->comm);
-                if (mpi_error != MPI_SUCCESS) {
-                    setError(ESYS_MPI_ERROR, "Mesh_read: broadcast of Element typeID failed");
-                    return NULL;
-                }
-                typeID = (ElementTypeId) temp1[0];
-                numEle = temp1[1];
+            if (chunkNodes > chunkSize) {
+                throw FinleyException("Mesh::read: error reading chunks of domain, data too large for message size");
             }
-        #endif
-        if (typeID==NoRef) {
-            sprintf(error_msg, "Mesh_read: Unidentified element type %s", element_type);
-            setError(VALUE_ERROR, error_msg);
-          }
-    }
-
-    /* Allocate the ElementFile */
-    if (noError()) {
-        refElements.reset(new ReferenceElementSet(typeID, order, reduced_order));
-        mesh_p->Elements=new ElementFile(refElements, mpi_info);
-        // new meaning for numNodes: num nodes per element
-        numNodes = mesh_p->Elements->numNodes;
-    }
-
-    /********************** Read the element data ***************************/
-    if (noError()) {
-        int chunkSize = numEle / mpi_info->size + 1, totalEle=0, nextCPU=1, chunkEle=0;
-        int *tempInts = new int[chunkSize*(2+numNodes)+1]; /* Store Id + Tag + node list (+ one int at end for chunkEle) */
-        /* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-        if (mpi_info->rank == 0) {  /* Master */
-            for (;;) {            /* Infinite loop */
-#pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*(2+numNodes)+1; i0++) tempInts[i0] = -1;
-
-                chunkEle = 0;
-                for (i0=0; i0<chunkSize; i0++) {
-                    if (totalEle >= numEle) break; /* End inner loop */
-                    scan_ret = fscanf(fileHandle_p, "%d %d", &tempInts[i0*(2+numNodes)+0], &tempInts[i0*(2+numNodes)+1]);
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    for (i1 = 0; i1 < numNodes; i1++) {
-                        scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0*(2+numNodes)+2+i1]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    scan_ret = fscanf(fileHandle_p, "\n");
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    totalEle++;
-                    chunkEle++;
-                }
 #ifdef ESYS_MPI
-                    /* Eventually we'll send chunk of elements to each CPU except 0 itself, here goes one of them */
-                    if (nextCPU < mpi_info->size) {
-                        tempInts[chunkSize*(2+numNodes)] = chunkEle;
-                        MPI_Send(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, nextCPU, 81722, mpi_info->comm);
-                    }
-#endif
-                nextCPU++;
-                /* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-                if (nextCPU > mpi_info->size) break; /* End infinite loop */
-            } /* Infinite loop */
-        }   /* End master */
-        else {  /* Worker */
-            #ifdef ESYS_MPI
-                /* Each worker receives one message */
-                MPI_Status status;
-                MPI_Recv(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, 0, 81722, mpi_info->comm, &status);
-                chunkEle = tempInts[chunkSize*(2+numNodes)];
-            #endif
-        }   /* Worker */
-
-        mesh_p->Elements->allocTable(chunkEle);
-
-        /* Copy Element data from tempInts to mesh_p */
-        if (noError()) {
-
-            mesh_p->Elements->minColor=0;
-            mesh_p->Elements->maxColor=chunkEle-1;
-            #pragma omp parallel for private (i0, i1) schedule(static)
-            for (i0=0; i0<chunkEle; i0++) {
-                mesh_p->Elements->Id[i0]    = tempInts[i0*(2+numNodes)+0];
-                mesh_p->Elements->Tag[i0]   = tempInts[i0*(2+numNodes)+1];
-                mesh_p->Elements->Owner[i0]  =mpi_info->rank;
-                mesh_p->Elements->Color[i0] = i0;
-                for (i1 = 0; i1 < numNodes; i1++) {
-                    mesh_p->Elements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0*(2+numNodes)+2+i1];
-                }
-            }
-        }
-
-        delete[] tempInts;
-    }
-    /******************** end of Read the element data **********************/
-
-    /********************** read face elements ******************************/
-    if (noError()) {
-        /* Read the element typeID */
-
-        if (mpi_info->rank == 0) {
-             scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-             FSCANF_CHECK(scan_ret, "Mesh_read")
-             typeID=ReferenceElement::getTypeId(element_type);
-        }
-        #ifdef ESYS_MPI
-            if (mpi_info->size > 1) {
-                int temp1[2];
-                temp1[0] = (int) typeID;
-                temp1[1] = numEle;
-                MPI_Bcast (temp1, 2, MPI_INT,  0, mpi_info->comm);
-                typeID = (ElementTypeId) temp1[0];
-                numEle = temp1[1];
-            }
-        #endif
-        if (typeID==NoRef) {
-            sprintf(error_msg, "Mesh_read: Unidentified element type %s", element_type);
-            setError(VALUE_ERROR, error_msg);
-        }
-        if (noError()) {
-            /* Allocate the ElementFile */
-            refFaceElements.reset(new ReferenceElementSet(typeID, order, reduced_order));
-            mesh_p->FaceElements=new ElementFile(refFaceElements, mpi_info);
-            numNodes = mesh_p->FaceElements->numNodes; // new meaning for numNodes: num nodes per element
-        }
-
-    }
-
-    /*********************** Read the face element data *********************/
-
-    if (noError()) {
-        int chunkSize = numEle / mpi_info->size + 1, totalEle=0, nextCPU=1, chunkEle=0;
-        int *tempInts = new int[chunkSize*(2+numNodes)+1]; /* Store Id + Tag + node list (+ one int at end for chunkEle) */
-        /* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-        if (mpi_info->rank == 0) {  /* Master */
-            for (;;) {            /* Infinite loop */
-                #pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*(2+numNodes)+1; i0++) tempInts[i0] = -1;
-
-                chunkEle = 0;
-                for (i0=0; i0<chunkSize; i0++) {
-                    if (totalEle >= numEle) break; /* End inner loop */
-                    scan_ret = fscanf(fileHandle_p, "%d %d", &tempInts[i0*(2+numNodes)+0], &tempInts[i0*(2+numNodes)+1]);
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    for (i1 = 0; i1 < numNodes; i1++) {
-                        scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0*(2+numNodes)+2+i1]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    scan_ret = fscanf(fileHandle_p, "\n");
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    totalEle++;
-                    chunkEle++;
-                }
-                #ifdef ESYS_MPI
-                    /* Eventually we'll send chunk of elements to each CPU except 0 itself, here goes one of them */
-                    if (nextCPU < mpi_info->size) {
-                        tempInts[chunkSize*(2+numNodes)] = chunkEle;
-                        MPI_Send(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, nextCPU, 81723, mpi_info->comm);
-                    }
-                #endif
-                nextCPU++;
-                /* Infinite loop ends when I've read a chunk for each of the worker nodes plus one more chunk for the master */
-                if (nextCPU > mpi_info->size) break; /* End infinite loop */
-            } /* Infinite loop */
-        }   /* End master */
-        else {  /* Worker */
-            #ifdef ESYS_MPI
-                /* Each worker receives one message */
-                MPI_Status status;
-                MPI_Recv(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, 0, 81723, mpi_info->comm, &status);
-                chunkEle = tempInts[chunkSize*(2+numNodes)];
-                #endif
-        }   /* Worker */
-
-        mesh_p->FaceElements->allocTable(chunkEle);
-
-        if (noError()) {
-            /* Copy Element data from tempInts to mesh_p */
-
-            mesh_p->FaceElements->minColor=0;
-            mesh_p->FaceElements->maxColor=chunkEle-1;
-            #pragma omp parallel for private (i0, i1)
-            for (i0=0; i0<chunkEle; i0++) {
-                mesh_p->FaceElements->Id[i0]    = tempInts[i0*(2+numNodes)+0];
-                mesh_p->FaceElements->Tag[i0]   = tempInts[i0*(2+numNodes)+1];
-                mesh_p->FaceElements->Owner[i0]  =mpi_info->rank;
-                mesh_p->FaceElements->Color[i0] = i0;
-                for (i1 = 0; i1 < numNodes; i1++) {
-                    mesh_p->FaceElements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0*(2+numNodes)+2+i1];
-                }
+            // Eventually we'll send chunkSize nodes to each CPU numbered
+            // 1 ... mpiInfo->size-1, here goes one of them
+            if (nextCPU < mpiInfo->size) {
+                // The message has one more int to send chunkNodes
+                tempInts[chunkSize * 3] = chunkNodes;
+                MPI_Send(tempInts, chunkSize * 3 + 1, MPI_DIM_T, nextCPU, 81720, mpiInfo->comm);
+                MPI_Send(tempCoords, chunkSize * numDim, MPI_DOUBLE, nextCPU, 81721, mpiInfo->comm);
             }
-        }
-
-        delete[] tempInts;
-    }
-    /******************* end of Read the face element data ******************/
-
-
-    /************************* read contact elements ************************/
-
-    /* Read the element typeID */
-    if (noError()) {
-        if (mpi_info->rank == 0) {
-            scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-            FSCANF_CHECK(scan_ret, "Mesh_read")
-            typeID=ReferenceElement::getTypeId(element_type);
-        }
-        #ifdef ESYS_MPI
-            if (mpi_info->size > 1) {
-                int temp1[2];
-                temp1[0] = (int) typeID;
-                temp1[1] = numEle;
-                MPI_Bcast (temp1, 2, MPI_INT,  0, mpi_info->comm);
-                typeID = (ElementTypeId) temp1[0];
-                numEle = temp1[1];
-            }
-        #endif
-        if (typeID==NoRef) {
-            sprintf(error_msg, "Mesh_read: Unidentified element type %s", element_type);
-            setError(VALUE_ERROR, error_msg);
-         }
-    }
-
-    if (noError()) {
-        /* Allocate the ElementFile */
-        refContactElements.reset(new ReferenceElementSet(typeID, order, reduced_order));
-        mesh_p->ContactElements=new ElementFile(refContactElements, mpi_info);
-        numNodes = mesh_p->ContactElements->numNodes; // new meaning for numNodes: num nodes per element
-    }
-    /******************* Read the contact element data **********************/
-    if (noError()) {
-        int chunkSize = numEle / mpi_info->size + 1, totalEle=0, nextCPU=1, chunkEle=0;
-        int *tempInts = new int[chunkSize*(2+numNodes)+1]; /* Store Id + Tag + node list (+ one int at end for chunkEle) */
-        /* Elements are specified as a list of integers...only need one message instead of two as with the nodes */
-        if (mpi_info->rank == 0) {  /* Master */
-            for (;;) {            /* Infinite loop */
-#pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*(2+numNodes)+1; i0++) tempInts[i0] = -1;
-
-                chunkEle = 0;
-                for (i0=0; i0<chunkSize; i0++) {
-                    if (totalEle >= numEle) break; /* End inner loop */
-                    scan_ret = fscanf(fileHandle_p, "%d %d", &tempInts[i0*(2+numNodes)+0], &tempInts[i0*(2+numNodes)+1]);
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    for (i1 = 0; i1 < numNodes; i1++) {
-                        scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0*(2+numNodes)+2+i1]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    scan_ret = fscanf(fileHandle_p, "\n");
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    totalEle++;
-                    chunkEle++;
-                }
-#ifdef ESYS_MPI
-                // Eventually we'll send chunk of elements to each CPU except
-                // 0 itself, here goes one of them
-                if (nextCPU < mpi_info->size) {
-                    tempInts[chunkSize*(2+numNodes)] = chunkEle;
-                    MPI_Send(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, nextCPU, 81724, mpi_info->comm);
-                }
 #endif
-                nextCPU++;
-                // Infinite loop ends when I've read a chunk for each of the
-                // worker nodes plus one more chunk for the master
-                if (nextCPU > mpi_info->size)
-                    break; // End infinite loop
-            } // Infinite loop
-        } // End master
-        else { // Worker
+            nextCPU++;
+            // Infinite loop ends when I've read a chunk for each of the worker
+            // nodes plus one more chunk for the master
+            if (nextCPU > mpiInfo->size)
+                break; // End infinite loop
+        } // Infinite loop
+    } // End master
+    else { // Worker
 #ifdef ESYS_MPI
-            // Each worker receives one message
-            MPI_Status status;
-            MPI_Recv(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, 0, 81724, mpi_info->comm, &status);
-            chunkEle = tempInts[chunkSize*(2+numNodes)] ;
+        // Each worker receives two messages
+        MPI_Status status;
+        MPI_Recv(tempInts, chunkSize * 3 + 1, MPI_DIM_T, 0, 81720, mpiInfo->comm, &status);
+        MPI_Recv(tempCoords, chunkSize * numDim, MPI_DOUBLE, 0, 81721, mpiInfo->comm, &status);
+        // How many nodes are in this worker's chunk?
+        chunkNodes = tempInts[chunkSize * 3];
 #endif
-        } // Worker
-
-        // Copy Element data from tempInts to mesh_p
-        mesh_p->ContactElements->allocTable(chunkEle);
-
-        if (noError()) {
-            mesh_p->ContactElements->minColor=0;
-            mesh_p->ContactElements->maxColor=chunkEle-1;
-#pragma omp parallel for private (i0, i1)
-            for (i0=0; i0<chunkEle; i0++) {
-                mesh_p->ContactElements->Id[i0] = tempInts[i0*(2+numNodes)+0];
-                mesh_p->ContactElements->Tag[i0]    = tempInts[i0*(2+numNodes)+1];
-                mesh_p->ContactElements->Owner[i0]  =mpi_info->rank;
-                mesh_p->ContactElements->Color[i0] = i0;
-                for (i1 = 0; i1 < numNodes; i1++) {
-                    mesh_p->ContactElements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0*(2+numNodes)+2+i1];
-                }
-            }
-        }
-        delete[] tempInts;
-    } // end of Read the contact element data
-
-    // ****************** read nodal elements ******************
-
-    // ***************  Read the element typeID ***********
-
-    if (noError()) {
-        if (mpi_info->rank == 0) {
-            scan_ret = fscanf(fileHandle_p, "%s %d\n", element_type, &numEle);
-            FSCANF_CHECK(scan_ret, "Mesh_read")
-            typeID=ReferenceElement::getTypeId(element_type);
-        }
-#ifdef ESYS_MPI
-        if (mpi_info->size > 1) {
-            int temp1[2];
-            temp1[0] = (int) typeID;
-            temp1[1] = numEle;
-            MPI_Bcast (temp1, 2, MPI_INT,  0, mpi_info->comm);
-            typeID = (ElementTypeId) temp1[0];
-            numEle = temp1[1];
+    } // Worker
+
+    // Copy node data from tempMem to domain
+    NodeFile* nodes = domain->getNodes();
+    nodes->allocTable(chunkNodes);
+
+#pragma omp parallel for
+    for (index_t i0 = 0; i0 < chunkNodes; i0++) {
+        nodes->Id[i0] = tempInts[0 + i0];
+        nodes->globalDegreesOfFreedom[i0] = tempInts[chunkSize + i0];
+        nodes->Tag[i0] = tempInts[chunkSize * 2 + i0];
+        for (int i1 = 0; i1 < numDim; i1++) {
+            nodes->Coordinates[INDEX2(i1, i0, numDim)] = tempCoords[i0 * numDim + i1];
         }
-#endif
-        if (typeID==NoRef) {
-            sprintf(error_msg, "Mesh::read: Unidentified element type %s", element_type);
-            setError(VALUE_ERROR, error_msg);
-         }
-    }
-
-    if (noError()) {
-        // Allocate the ElementFile
-        refPoints.reset(new ReferenceElementSet(typeID, order, reduced_order));
-        mesh_p->Points=new ElementFile(refPoints, mpi_info);
-        // New meaning for numNodes: num nodes per element
-        numNodes = mesh_p->Points->numNodes;
     }
-
-    // ******************* Read the nodal element data ****************
-    if (noError()) {
-        int chunkSize = numEle / mpi_info->size + 1, totalEle=0, nextCPU=1, chunkEle=0;
-        // Store Id + Tag + node list (+ one int at end for chunkEle)
-        int *tempInts = new int[chunkSize*(2+numNodes)+1];
-        // Elements are specified as a list of integers...only need one
-        // message instead of two as with the nodes
-        if (mpi_info->rank == 0) {  // Master
-            for (;;) { // Infinite loop
-#pragma omp parallel for private (i0) schedule(static)
-                for (i0=0; i0<chunkSize*(2+numNodes)+1; i0++) tempInts[i0] = -1;
-
-                chunkEle = 0;
-                for (i0=0; i0<chunkSize; i0++) {
-                    if (totalEle >= numEle) break; /* End inner loop */
-                    scan_ret = fscanf(fileHandle_p, "%d %d", &tempInts[i0*(2+numNodes)+0], &tempInts[i0*(2+numNodes)+1]);
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    for (i1 = 0; i1 < numNodes; i1++) {
-                        scan_ret = fscanf(fileHandle_p, " %d", &tempInts[i0*(2+numNodes)+2+i1]);
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                    }
-                    scan_ret = fscanf(fileHandle_p, "\n");
-                    FSCANF_CHECK(scan_ret, "Mesh_read")
-                    totalEle++;
-                    chunkEle++;
-                }
-#ifdef ESYS_MPI
-                // Eventually we'll send chunk of elements to each CPU
-                // except 0 itself, here goes one of them
-                if (nextCPU < mpi_info->size) {
-                    tempInts[chunkSize*(2+numNodes)] = chunkEle;
-                    MPI_Send(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, nextCPU, 81725, mpi_info->comm);
-                }
-#endif
-                nextCPU++;
-                // Infinite loop ends when I've read a chunk for each of
-                // the worker nodes plus one more chunk for the master
-                if (nextCPU > mpi_info->size)
-                    break; // End infinite loop
-            } // Infinite loop
-        }   // End master
-        else {  // Worker
-#ifdef ESYS_MPI
-                // Each worker receives one message
-                MPI_Status status;
-                MPI_Recv(tempInts, chunkSize*(2+numNodes)+1, MPI_INT, 0, 81725, mpi_info->comm, &status);
-                chunkEle = tempInts[chunkSize*(2+numNodes)];
-#endif
-        } // Worker
-
-        // Copy Element data from tempInts to mesh_p
-        mesh_p->Points->allocTable(chunkEle);
-
-        if (noError()) {
-            mesh_p->Points->minColor=0;
-            mesh_p->Points->maxColor=chunkEle-1;
-#pragma omp parallel for private (i0, i1) schedule(static)
-            for (i0=0; i0<chunkEle; i0++) {
-                mesh_p->Points->Id[i0]  = tempInts[i0*(2+numNodes)+0];
-                mesh_p->Points->Tag[i0] = tempInts[i0*(2+numNodes)+1];
-                mesh_p->Points->Owner[i0]  =mpi_info->rank;
-                mesh_p->Points->Color[i0] = i0;
-                for (i1 = 0; i1 < numNodes; i1++) {
-                    mesh_p->Points->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0*(2+numNodes)+2+i1];
-                }
-            }
+    delete[] tempInts;
+    delete[] tempCoords;
+
+    /*************************** read elements ******************************/
+    domain->setElements(readElementFile(fileHandle, order, reducedOrder, mpiInfo));
+
+    /************************ read face elements ****************************/
+    domain->setFaceElements(readElementFile(fileHandle, order, reducedOrder, mpiInfo));
+
+    /************************ read contact elements ****************************/
+    domain->setContactElements(readElementFile(fileHandle, order, reducedOrder, mpiInfo));
+
+    /************************ read nodal elements ***************************/
+    domain->setPoints(readElementFile(fileHandle, order, reducedOrder, mpiInfo));
+
+    /************************  get the name tags ****************************/
+    std::string remainder;
+    size_t len = 0;
+    int tag_key;
+    if (mpiInfo->rank == 0) { // Master
+        // Read the word 'Tags'
+        if (!fileHandle.eof()) {
+            std::getline(fileHandle, name);
+            if (!fileHandle.good())
+                throw IOError("Mesh::read: Scan error while reading tag header");
         }
+        // Read rest of file in one chunk, after using seek to find length
+        std::ios::pos_type cur_pos = fileHandle.tellg();
+        fileHandle.seekg(0, std::ios::end);
+        std::ios::pos_type end_pos = fileHandle.tellg();
+        fileHandle.seekg(cur_pos);
+        remainder.resize(end_pos - cur_pos + 1);
+        if (!fileHandle.eof()) {
+            fileHandle.read(&remainder[0], end_pos-cur_pos);
+            if (fileHandle.bad())
+                throw IOError("Mesh::read: Error reading remainder");
+            remainder[end_pos - cur_pos] = 0;
+        }
+        len = remainder.find_last_not_of(' ');
+        remainder = remainder.substr(0, len+1);
+    } // Master
 
-        delete[] tempInts;
-    } // ************** end of Read the nodal element data ***************
-
-    // *****************  get the name tags ********************************
-    if (noError()) {
-        char *remainder=0, *ptr;
-        size_t len=0;
-#ifdef ESYS_MPI
-        int len_i;
-#endif
-        int tag_key;
-        if (mpi_info->rank == 0) {  // Master
-            // Read the word 'Tag'
-            if (!feof(fileHandle_p)) {
-                scan_ret = fscanf(fileHandle_p, "%s\n", name);
-                FSCANF_CHECK(scan_ret, "Mesh_read")
-            }
-
-#ifdef _WIN32
-            // windows ftell lies on unix formatted text files
-                remainder = NULL;
-                len=0;
-                while (1) {
-                    size_t malloc_chunk = 1024;
-                    size_t buff_size = 0;
-                    int ch;
-
-                    ch = fgetc(fileHandle_p);
-                    if( ch == '\r' )
-                        continue;
-
-                    if (len+1 > buff_size) {
-                        TMPMEMREALLOC(remainder,remainder,buff_size+malloc_chunk,char);
-                    }
-                    if (ch == EOF) {
-                        // hit EOF
-                        remainder[len] = (char)0;
-                        break;
-                    }
-                    remainder[len] = (char)ch;
-                    len++;
-                }
-#else
-                // Read rest of file in one chunk, after using seek to find length
-                {
-                    long cur_pos, end_pos;
-
-                    cur_pos = ftell(fileHandle_p);
-                    fseek(fileHandle_p, 0L, SEEK_END);
-                    end_pos = ftell(fileHandle_p);
-                    fseek(fileHandle_p, (long)cur_pos, SEEK_SET);
-                    remainder = new char[end_pos-cur_pos+1];
-                    if (!feof(fileHandle_p)) {
-                        scan_ret = fread(remainder, (size_t) end_pos-cur_pos,
-                                             sizeof(char), fileHandle_p);
-
-                        FSCANF_CHECK(scan_ret, "Mesh_read")
-                        remainder[end_pos-cur_pos] = 0;
-                    }
-                }
-#endif
-            len = strlen(remainder);
-            // trim the string
-            while ((len>1) && isspace(remainder[--len])) {remainder[len]=0;}
-            len = strlen(remainder);
-        } // Master
 #ifdef ESYS_MPI
-            len_i=(int) len;
-            MPI_Bcast(&len_i, 1, MPI_INT,  0, mpi_info->comm);
-            len=(size_t) len_i;
-            if (mpi_info->rank != 0) {
-                remainder = new char[len+1];
-                remainder[0] = 0;
-            }
-            if (MPI_Bcast (remainder, len+1, MPI_CHAR,  0, mpi_info->comm) !=
-                    MPI_SUCCESS)
-                setError(ESYS_MPI_ERROR, "Mesh_read: broadcast of remainder failed");
+    int len_i = static_cast<int>(len);
+    MPI_Bcast(&len_i, 1, MPI_INT, 0, mpiInfo->comm);
+    len = static_cast<size_t>(len_i);
+    if (mpiInfo->rank != 0) {
+        remainder.resize(len + 1);
+    }
+    if (MPI_Bcast(&remainder[0], len+1, MPI_CHAR, 0, mpiInfo->comm) != MPI_SUCCESS)
+        throw FinleyException("Mesh::read: broadcast of remainder failed");
 #endif
 
-        if (remainder[0]) {
-            ptr = remainder;
-            do {
-                sscanf(ptr, "%s %d\n", name, &tag_key);
-                if (*name)
-                    mesh_p->addTagMap(name, tag_key);
-                ptr++;
-            } while(NULL != (ptr = strchr(ptr, '\n')) && *ptr);
+    std::stringstream rem;
+    rem << remainder;
+    while (std::getline(rem, line)) {
+        size_t pos = line.find(' ');
+        if (pos != std::string::npos) {
+            name = line.substr(0, pos);
+            tag_key = std::stoi(line.substr(pos+1));
+            domain->setTagMap(name, tag_key);
         }
-        if (remainder)
-            delete[] remainder;
     }
 
     // close file
-    if (mpi_info->rank == 0)
-        fclose(fileHandle_p);
-
-    // resolve id's and rearrange elements
-    if (noError()) mesh_p->resolveNodeIds();
-    if (noError()) mesh_p->prepare(optimize);
+    if (mpiInfo->rank == 0)
+        fileHandle.close();
 
-    // that's it
-    if (!noError()) {
-        delete mesh_p;
-        mesh_p=NULL;
-    }
-    return mesh_p;
+    domain->resolveNodeIds();
+    domain->prepare(optimize);
+    return domain->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_readGmsh.cpp b/finley/src/Mesh_readGmsh.cpp
index 8e49896..5b66d99 100644
--- a/finley/src/Mesh_readGmsh.cpp
+++ b/finley/src/Mesh_readGmsh.cpp
@@ -14,19 +14,12 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
+#include "FinleyException.h"
 
-/****************************************************************************
+#include <escript/index.h>
 
-  Finley: read mesh from gmsh file
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Mesh.h"
 #include <cstdio>
-#include "CPPAdapter/FinleyAdapterException.h"
 
 //can't return because the flag need to be shared across all nodes
 #define SSCANF_CHECK(scan_ret) { if (scan_ret == EOF) { errorFlag = 1;} }
@@ -42,50 +35,58 @@
     error flags include:
 
         0 - all ok
-        1 - early eof
+        1 - early EOF
         2 - EOF before nodes section found
         3 - EOF before elements section found
-        4 - throw error_msg
-        5 - eof at apropriate time.
+        4 - throw errorMsg
+        5 - EOF at apropriate time.
         6 - !noError
 */
 
+namespace {
+
+using namespace finley;
+using escript::IOError;
+
 struct ElementInfo {
-    finley::ElementTypeId type;
+    ElementTypeId type;
     int id;
     int dim;
-    int *vertex;
+    int* vertex;
     int tag;
 };
 
-bool is_node_string(char *line) {
-    if (line == NULL)
+inline bool is_node_string(const char* line)
+{
+    if (!line)
         return false;
     return !strncmp(line, "$NOD", 4) || !strncmp(line, "$NOE", 4)
             || !strncmp(line, "$Nodes", 6);
 }
 
-bool is_endnode_string(char *line) {
-    if (line == NULL)
+inline bool is_endnode_string(const char* line)
+{
+    if (!line)
         return false;
     return !strncmp(line, "$ENDNOD", 7) || !strncmp(line, "$ENDNOE", 7)
             || !strncmp(line, "$EndNodes", 9);
 }
 
-bool get_line(std::vector<char>& line, FILE *file) {
+inline bool get_line(std::vector<char>& line, FILE* file)
+{
     int capacity = 1024;
     line.clear();
     line.resize(capacity);
     char *tmp = &line[0];
     char *res = NULL;
-    //not terribly efficient, but any line longer than 1024 
+    //not terribly efficient, but any line longer than 1024
     //is probably already bad
     while ((res = fgets(tmp, 1023, file)) == tmp
             && strchr(tmp, '\n') == NULL) {
         capacity += 1024;
         line.resize(capacity);
         tmp = strchr(&line[0], '\0'); //this bit is awful, O(n) instead of O(1)
-        if (capacity > LenString_MAX) {//madness
+        if (capacity > 1024) { //madness
             res = NULL;
             break;
         }
@@ -93,19 +94,19 @@ bool get_line(std::vector<char>& line, FILE *file) {
     return res == tmp; //true if line read, false if EOF without or without \n
 }
 
-namespace finley {
-
-char *next_space(char **position, int count) {
+inline char* next_space(char** position, int count)
+{
     for (int i = 0; i < count; i++) {
         *position = strchr(*position, ' ');
-        if ((*position)++ == NULL)//move off the space
+        if ((*position)++ == NULL) //move off the space
             return NULL;
     }
     return *position;
 }
 
-int getSingleElement(FILE *f, int dim, double version, struct ElementInfo& e,
-        char *error_msg, const char *fname, bool useMacroElements)
+int getSingleElement(FILE* f, int dim, double version, struct ElementInfo& e,
+        std::string& errorMsg, const std::string& filename,
+        bool useMacroElements)
 {
     int gmsh_type = -1;
 
@@ -114,7 +115,7 @@ int getSingleElement(FILE *f, int dim, double version, struct ElementInfo& e,
         return EARLY_EOF;
     char *position = &line[0];
     if (sscanf(position, "%d %d", &e.id, &gmsh_type) != 2) {
-        sprintf(error_msg, "malformed mesh file");
+        errorMsg = "malformed mesh file";
         return THROW_ERROR;
     }
     if (next_space(&position, 2) == NULL)
@@ -199,457 +200,292 @@ int getSingleElement(FILE *f, int dim, double version, struct ElementInfo& e,
             e.dim=0;
             break;
         default:
-            e.type=NoRef;
-            e.dim=-1;
-            sprintf(error_msg,"Unexpected gmsh element type %d in mesh file %s.", gmsh_type, fname);
-            return THROW_ERROR;
+            {
+                e.type=NoRef;
+                e.dim=-1;
+                std::stringstream ss;
+                ss << "readGmsh: Unexpected gmsh element type "
+                    << gmsh_type << " in mesh file " << filename;
+                errorMsg = ss.str();
+                return THROW_ERROR;
+            }
     }
-    if (version <= 1.0){
+    if (version <= 1.0) {
         int tmp = 0;
         if (sscanf(position, "%d %*d %d", &e.tag, &tmp) == 0
-                || next_space(&position, 3) == NULL )
+                || next_space(&position, 3) == NULL)
             return EARLY_EOF;
         if (tmp != numNodesPerElement) {
-            sprintf(error_msg,"Illegal number of nodes for element %d in mesh file %s.", e.id, fname);
+            std::stringstream ss;
+            ss << "readGmsh: Illegal number of nodes for element " << e.id
+                << " in mesh file " << filename;
+            errorMsg = ss.str();
             return THROW_ERROR;
         }
     } else {
         e.tag = 1;
-        int numTags=0; //this is garbage and never used
-        if (sscanf(position, "%d", &numTags) == 0 
+        int numTags = 0; //this is garbage and never used
+        if (sscanf(position, "%d", &numTags) == 0
                 || next_space(&position, 1) == NULL)
             return EARLY_EOF;
-        if (sscanf(position, "%d", &e.tag) == 0 
+        if (sscanf(position, "%d", &e.tag) == 0
                 || next_space(&position, numTags) == NULL)
             return EARLY_EOF;
-        /* ignore any other tags, second tag would be elementary id,
-         third tag would be partition id */
+        // ignore any other tags, second tag would be elementary id,
+        // third tag would be partition id
     }
 
-    if (!noError()) {
-        return ERROR;
-    }
-    for(int j = 0; j < numNodesPerElement; j++) {
-        if (sscanf(position, "%d", e.vertex+j) == 0 
+    for (int j = 0; j < numNodesPerElement; j++) {
+        if (sscanf(position, "%d", e.vertex+j) == 0
                 || next_space(&position, 1) == NULL)
             return EARLY_EOF;
     }
     return 0;
 }
 
-int getElementsMaster(esysUtils::JMPI& mpi_info, Mesh * mesh_p, FILE * fileHandle_p,
-        char * error_msg, bool useMacroElements, const std::string fname,
-        int numDim, double version, int order, int reduced_order) {
+int getElementsMaster(escript::JMPI& mpiInfo, FinleyDomain* dom,
+                      FILE* fileHandle, std::string& errorMsg,
+                      bool useMacroElements, const std::string& filename,
+                      int numDim, double version, int order, int reducedOrder)
+{
     /*
      *  This function should read in the elements and distribute
      *  them to the apropriate process.
      */
-    int errorFlag=0;
-    ElementTypeId final_element_type = NoRef;
-    ElementTypeId final_face_element_type = NoRef;
-    ElementTypeId contact_element_type = NoRef;
-    int numElements=0, numFaceElements=0, totalNumElements=0;
+    int errorFlag = 0;
+    ElementTypeId finalElementType = NoRef;
+    ElementTypeId finalFaceElementType = NoRef;
+    ElementTypeId contactElementType = NoRef;
+    int numElements = 0, numFaceElements = 0, totalNumElements = 0;
     const_ReferenceElementSet_ptr refPoints, refContactElements;
     const_ReferenceElementSet_ptr refFaceElements, refElements;
-    int *id, *tag;
-    ElementTypeId * element_type;
     std::vector<char> line;
-    if (!get_line(line, fileHandle_p))
+    if (!get_line(line, fileHandle))
         errorFlag = EARLY_EOF;
     int scan_ret = sscanf(&line[0], "%d", &totalNumElements);
     SSCANF_CHECK(scan_ret);
 
 #ifdef ESYS_MPI
-    // Broadcast numNodes if there are multiple mpi procs
-    if (mpi_info->size > 1) {
+    // Broadcast numNodes if there are multiple MPI procs
+    if (mpiInfo->size > 1) {
         int msg = totalNumElements;
-        MPI_Bcast(&msg, 1, MPI_INT, 0, mpi_info->comm);
+        MPI_Bcast(&msg, 1, MPI_INT, 0, mpiInfo->comm);
     }
 #endif
 
-    int chunkSize = totalNumElements / mpi_info->size + 1, chunkElements=0;
-    int chunkFaceElements=0, chunkOtherElements=0;
-    id = new int[chunkSize+1];
-    tag = new int[chunkSize+1];
-    std::vector<int>vertices(chunkSize*MAX_numNodes_gmsh, -1);
-    element_type = new ElementTypeId[chunkSize+1];
-    std::vector<int> elementIndices (chunkSize, -1);
-    std::vector<int> faceElementIndices (chunkSize, -1);
-
-
-
-#ifdef ESYS_MPI
-    int chunkInfo[2];//chunkInfo stores the number of element and number of face elements
-    int cpuId = 0;
-#endif
-
+    int chunkSize = totalNumElements / mpiInfo->size;
+    int rest = totalNumElements - (mpiInfo->size-1)*chunkSize;
+    const size_t storage = std::max(chunkSize, rest);
+    std::vector<int> id(storage+1);
+    std::vector<int> tag(storage+1);
+    std::vector<int> vertices(storage*MAX_numNodes_gmsh, -1);
+    std::vector<ElementTypeId> elementType(storage+1);
+    std::vector<int> elementIndices(storage, -1);
+    std::vector<int> faceElementIndices(storage, -1);
 
 #pragma omp parallel for schedule(static)
-    for (int i=0; i<chunkSize; i++) {
+    for (int i=0; i<storage; i++) {
         id[i] = -1;
         tag[i] = -1;
-        element_type[i] = NoRef;
+        elementType[i] = NoRef;
     }
 
-    /* read all in */
-    for(int e = 0, count = 0; e < totalNumElements; e++, count++) {
+    int cpuId = 0;
+    int chunkElements = 0;
+    int chunkFaceElements = 0;
+    int chunkOtherElements = 0;
+
+    // read all in
+    for (index_t e = 0, count = 0; e < totalNumElements; e++, count++) {
+        if (cpuId >= mpiInfo->size-1) {
+            chunkSize = rest;
+        }
+
         struct ElementInfo element = {NoRef, 0, 0, &vertices[count*MAX_numNodes_gmsh], 0};
-        getSingleElement(fileHandle_p, numDim, version,
-                element, error_msg, fname.c_str(), useMacroElements);
-        element_type[count] = element.type;
+        getSingleElement(fileHandle, numDim, version, element, errorMsg,
+                         filename, useMacroElements);
+        elementType[count] = element.type;
         id[count] = element.id;
         tag[count] = element.tag;
 
-        /* for tet10 the last two nodes need to be swapped */
-        if ((element.type==Tet10) || (element.type == Tet10Macro)) {
-            int vertex = vertices[INDEX2(9,count,MAX_numNodes_gmsh)];
-            vertices[INDEX2(9,count,MAX_numNodes_gmsh)] = vertices[INDEX2(8,count,MAX_numNodes_gmsh)];
-            vertices[INDEX2(8,count,MAX_numNodes_gmsh)] = vertex;
+        // for tet10 the last two nodes need to be swapped
+        if (element.type == Tet10 || element.type == Tet10Macro) {
+            int vertex = vertices[INDEX2(9, count, MAX_numNodes_gmsh)];
+            vertices[INDEX2(9, count, MAX_numNodes_gmsh)] = vertices[INDEX2(8, count, MAX_numNodes_gmsh)];
+            vertices[INDEX2(8, count, MAX_numNodes_gmsh)] = vertex;
         }
 
         if (element.dim == numDim) {
-            if (final_element_type == NoRef) {
-               final_element_type = element.type;
-            } else if (final_element_type != element.type) {
-                sprintf(error_msg,"Finley can only handle a single type of internal elements.");
+            if (finalElementType == NoRef) {
+               finalElementType = element.type;
+            } else if (finalElementType != element.type) {
+                errorMsg = "Finley can only handle a single type of internal elements.";
                 errorFlag = THROW_ERROR;
             }
-            elementIndices[chunkElements]=count;
+            elementIndices[chunkElements] = count;
             numElements++;
             chunkElements++;
         } else if (element.dim == numDim-1) {
-            if (final_face_element_type == NoRef) {
-               final_face_element_type = element.type;
-            } else if (final_face_element_type != element.type) {
-               sprintf(error_msg,"Finley can only handle a single type of face elements.");
+            if (finalFaceElementType == NoRef) {
+               finalFaceElementType = element.type;
+            } else if (finalFaceElementType != element.type) {
+               errorMsg = "Finley can only handle a single type of face elements.";
                errorFlag = THROW_ERROR;
             }
-            faceElementIndices[chunkFaceElements]=count;
+            faceElementIndices[chunkFaceElements] = count;
             numFaceElements++;
             chunkFaceElements++;
         } else {
             chunkOtherElements++;
         }
 #ifdef ESYS_MPI
-        if(count < chunkSize - 1)
+        if (count < chunkSize - 1)
             continue;
-        chunkInfo[0]=chunkElements;
-        chunkInfo[1]=chunkFaceElements;
 
-        if(cpuId++ > mpi_info->size) {
+        // the last chunk is left for the master process
+        if (++cpuId >= mpiInfo->size) {
             continue;
         }
-        if(errorFlag){
-            for(; cpuId<mpi_info->size; cpuId++) {
-                MPI_Send(&errorFlag, 1, MPI_INT, cpuId, 81719, mpi_info->comm);
+
+        if (errorFlag) {
+            for(; cpuId < mpiInfo->size; cpuId++) {
+                MPI_Send(&errorFlag, 1, MPI_INT, cpuId, 81719, mpiInfo->comm);
             }
             break;
         }
-        MPI_Send(&errorFlag, 1, MPI_INT, cpuId, 81719, mpi_info->comm);
-        MPI_Send(&vertices[0], chunkSize*MAX_numNodes_gmsh, MPI_INT, cpuId, 81720, mpi_info->comm);
-        MPI_Send(id, chunkSize, MPI_INT, cpuId, 81721, mpi_info->comm);
-        MPI_Send(tag, chunkSize, MPI_INT, cpuId, 81722, mpi_info->comm);
-        MPI_Send(element_type, chunkSize, MPI_INT, cpuId, 81723, mpi_info->comm);
-        MPI_Send(chunkInfo, 2, MPI_INT, cpuId, 81724, mpi_info->comm);
-        MPI_Send(&(elementIndices[0]), chunkElements, MPI_INT, cpuId, 81725, mpi_info->comm);
-        MPI_Send(&(faceElementIndices[0]), chunkFaceElements, MPI_INT, cpuId, 81726, mpi_info->comm);
+        int chunkInfo[2];
+        chunkInfo[0] = chunkElements;
+        chunkInfo[1] = chunkFaceElements;
+
+        MPI_Send(&errorFlag, 1, MPI_INT, cpuId, 81719, mpiInfo->comm);
+        MPI_Send(&vertices[0], chunkSize*MAX_numNodes_gmsh, MPI_INT, cpuId, 81720, mpiInfo->comm);
+        MPI_Send(&id[0], chunkSize, MPI_INT, cpuId, 81721, mpiInfo->comm);
+        MPI_Send(&tag[0], chunkSize, MPI_INT, cpuId, 81722, mpiInfo->comm);
+        MPI_Send(&elementType[0], chunkSize, MPI_INT, cpuId, 81723, mpiInfo->comm);
+        MPI_Send(chunkInfo, 2, MPI_INT, cpuId, 81724, mpiInfo->comm);
+        MPI_Send(&elementIndices[0], chunkElements, MPI_INT, cpuId, 81725, mpiInfo->comm);
+        MPI_Send(&faceElementIndices[0], chunkFaceElements, MPI_INT, cpuId, 81726, mpiInfo->comm);
 
         // reset arrays for next cpu
 #pragma omp parallel for schedule(static)
-        for (int i=0; i<chunkSize*MAX_numNodes_gmsh; i++)
+        for (index_t i = 0; i < chunkSize*MAX_numNodes_gmsh; i++)
             vertices[i] = -1;
 #pragma omp parallel for schedule(static)
-        for (int i=0; i<chunkSize; i++) {
+        for (index_t i = 0; i < chunkSize; i++) {
             id[i] = -1;
             tag[i] = -1;
-            element_type[i] = NoRef;
+            elementType[i] = NoRef;
         }
-        chunkElements=0;
-        chunkFaceElements=0;
-        chunkOtherElements=0;
+        chunkElements = 0;
+        chunkFaceElements = 0;
+        chunkOtherElements = 0;
         count = -1;
 #endif
     }
 
 #ifdef ESYS_MPI
-    if(mpi_info->size>1){
-        MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpi_info->comm);
-    }
+    if (mpiInfo->size > 1)
+        MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpiInfo->comm);
 #endif
-    if(errorFlag){
+    if(errorFlag)
         return errorFlag;
-    }
 
     // all elements have been read and shared, now we have to identify the
     // elements for finley
-    if (!noError()) {
-        return ERROR;
-    }
-    /* first we have to identify the elements to define Elements and FaceElements */
-    if (final_element_type == NoRef) {
-        if (numDim==1) {
-           final_element_type=Line2;
-        } else if (numDim==2) {
-           final_element_type=Tri3;
-        } else if (numDim==3) {
-           final_element_type=Tet4;
+    if (finalElementType == NoRef) {
+        if (numDim == 1) {
+           finalElementType = Line2;
+        } else if (numDim == 2) {
+           finalElementType = Tri3;
+        } else if (numDim == 3) {
+           finalElementType = Tet4;
         }
     }
-    if (final_face_element_type == NoRef) {
-        if (numDim==1) {
-           final_face_element_type=Point1;
-        } else if (numDim==2) {
-           final_face_element_type=Line2;
-        } else if (numDim==3) {
-           final_face_element_type=Tri3;
+    if (finalFaceElementType == NoRef) {
+        if (numDim == 1) {
+           finalFaceElementType = Point1;
+        } else if (numDim == 2) {
+           finalFaceElementType = Line2;
+        } else if (numDim == 3) {
+           finalFaceElementType = Tri3;
         }
     }
-    if (final_face_element_type == Line2) {
-        contact_element_type=Line2_Contact;
-    } else  if ( (final_face_element_type == Line3) || (final_face_element_type == Line3Macro) ) {
-        contact_element_type=Line3_Contact;
-    } else  if (final_face_element_type == Tri3) {
-        contact_element_type=Tri3_Contact;
-    } else  if ( (final_face_element_type == Tri6) || (final_face_element_type == Tri6Macro)) {
-        contact_element_type=Tri6_Contact;
+    if (finalFaceElementType == Line2) {
+        contactElementType = Line2_Contact;
+    } else if (finalFaceElementType == Line3 || finalFaceElementType == Line3Macro) {
+        contactElementType = Line3_Contact;
+    } else if (finalFaceElementType == Tri3) {
+        contactElementType = Tri3_Contact;
+    } else if (finalFaceElementType == Tri6 || finalFaceElementType == Tri6Macro) {
+        contactElementType = Tri6_Contact;
     } else {
-        contact_element_type=Point1_Contact;
+        contactElementType = Point1_Contact;
     }
 
 #ifdef ESYS_MPI
     // Broadcast numNodes if there are multiple mpi procs
-    if (mpi_info->size > 1) {
-        int msg[3] = {final_element_type, final_face_element_type, 
-                contact_element_type};
-        MPI_Bcast(msg, 3, MPI_INT,  0, mpi_info->comm);
+    if (mpiInfo->size > 1) {
+        int msg[3] = {finalElementType, finalFaceElementType,
+                contactElementType};
+        MPI_Bcast(msg, 3, MPI_INT,  0, mpiInfo->comm);
     }
 #endif
 
+    refElements.reset(new ReferenceElementSet(finalElementType, order, reducedOrder));
+    refFaceElements.reset(new ReferenceElementSet(finalFaceElementType, order, reducedOrder));
+    refContactElements.reset(new ReferenceElementSet(contactElementType, order, reducedOrder));
+    refPoints.reset(new ReferenceElementSet(Point1, order, reducedOrder));
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    dom->setElements(elements);
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    dom->setFaceElements(faces);
+    ElementFile* contacts = new ElementFile(refContactElements, mpiInfo);
+    dom->setContactElements(contacts);
+    ElementFile* points = new ElementFile(refPoints, mpiInfo);
+    dom->setPoints(points);
+
+    elements->allocTable(chunkElements);
+    faces->allocTable(chunkFaceElements);
+    contacts->allocTable(0);
+    points->allocTable(0);
+    elements->minColor = 0;
+    elements->maxColor = chunkElements - 1;
+    faces->minColor = 0;
+    faces->maxColor = chunkFaceElements - 1;
+    contacts->minColor = 0;
+    contacts->maxColor = 0;
+    points->minColor = 0;
+    points->maxColor = 0;
 
-
-    refElements.reset(new ReferenceElementSet(final_element_type, order, reduced_order));
-    refFaceElements.reset(new ReferenceElementSet(final_face_element_type, order, reduced_order));
-    refContactElements.reset(new ReferenceElementSet(contact_element_type, order, reduced_order));
-    refPoints.reset(new ReferenceElementSet(Point1, order, reduced_order));
-    mesh_p->Elements=new ElementFile(refElements, mpi_info);
-    mesh_p->FaceElements=new ElementFile(refFaceElements, mpi_info);
-    mesh_p->ContactElements=new ElementFile(refContactElements, mpi_info);
-    mesh_p->Points=new ElementFile(refPoints, mpi_info);
-
-
-    if (!noError())
-        return ERROR;
-
-    mesh_p->Elements->allocTable(chunkElements);
-    mesh_p->FaceElements->allocTable(chunkFaceElements);
-    mesh_p->ContactElements->allocTable(0);
-    mesh_p->Points->allocTable(0);
-
-    if (!noError())
-        return ERROR;
-
-    mesh_p->Elements->minColor=0;
-    mesh_p->Elements->maxColor=chunkElements-1;
-    mesh_p->FaceElements->minColor=0;
-    mesh_p->FaceElements->maxColor=chunkFaceElements-1;
-    mesh_p->ContactElements->minColor=0;
-    mesh_p->ContactElements->maxColor=0;
-    mesh_p->Points->minColor=0;
-    mesh_p->Points->maxColor=0;
-#pragma omp parallel for schedule(static)
-    for(int e = 0; e < chunkElements; e++) {
-        mesh_p->Elements->Id[e]=id[elementIndices[e]];
-        mesh_p->Elements->Tag[e]=tag[elementIndices[e]];
-        mesh_p->Elements->Color[e]=elementIndices[e];
-        mesh_p->Elements->Owner[e]=mpi_info->rank;
-        for (int j = 0; j < mesh_p->Elements->numNodes; ++j)  {
-            int vertex = vertices[INDEX2(j,elementIndices[e],MAX_numNodes_gmsh)];
-            mesh_p->Elements->Nodes[INDEX2(j, e, mesh_p->Elements->numNodes)]=vertex;
-        }
-    }
-
-#pragma omp parallel for schedule(static)
-    for (int e = 0; e < chunkFaceElements; e++) {
-        mesh_p->FaceElements->Id[e]=id[faceElementIndices[e]];
-        mesh_p->FaceElements->Tag[e]=tag[faceElementIndices[e]];
-        mesh_p->FaceElements->Color[e]=e;
-        mesh_p->FaceElements->Owner[e]=mpi_info->rank;
-        for (int j=0; j<mesh_p->FaceElements->numNodes; ++j) {
-            int faceVertex = vertices[INDEX2(j,faceElementIndices[e],MAX_numNodes_gmsh)];
-            mesh_p->FaceElements->Nodes[INDEX2(j, e, mesh_p->FaceElements->numNodes)]= faceVertex;
-        }
-    }
-
-    /* and clean up */
-    delete[] id;
-    delete[] tag;
-    delete[] element_type;
-    return errorFlag;
-}
-
-int getElementsSlave(esysUtils::JMPI& mpi_info, Mesh *mesh_p, FILE *fileHandle_p,
-        char *error_msg, bool useMacroElements, const std::string fname,
-        int numDim, double version, int order, int reduced_order) {
-    /*
-     *  This function should read in the elements and distribute
-     *  them to the apropriate process.
-     */
-#ifndef ESYS_MPI
-    sprintf(error_msg, "Slave function called in non-MPI build");
-    return THROW_ERROR; // calling the slave from a non-mpi process is an awful idea
-#else
-
-    if (mpi_info->size == 1) {
-        sprintf(error_msg, "Slave function called with no master");
-        return THROW_ERROR; //again, sillyness
-    }
-
-    int errorFlag=0;
-
-    ElementTypeId final_element_type = NoRef;
-    ElementTypeId final_face_element_type = NoRef;
-    ElementTypeId contact_element_type = NoRef;
-    int totalNumElements=0;
-    const_ReferenceElementSet_ptr refPoints, refContactElements;
-    const_ReferenceElementSet_ptr refFaceElements, refElements;
-    ElementTypeId * element_type;
-
-    int msg = 0;
-    MPI_Bcast(&msg, 1, MPI_INT, 0, mpi_info->comm);
-    totalNumElements = msg;
-
-    int chunkSize = totalNumElements / mpi_info->size + 1, chunkElements=0;
-    int chunkFaceElements=0;
-    int *id = new int[chunkSize+1];
-    int *tag = new int[chunkSize+1];
-    std::vector<int>vertices(chunkSize*MAX_numNodes_gmsh, -1);
-    element_type = new ElementTypeId[chunkSize+1];
-    std::vector<int> elementIndices (chunkSize, -1);
-    std::vector<int> faceElementIndices (chunkSize, -1);
-
-    int chunkInfo[2];//chunkInfo stores the number of element and number of face elements
-
-#pragma omp parallel for schedule(static)
-    for (int i=0; i<chunkSize; i++) {
-        id[i] = -1;
-        tag[i] = -1;
-        element_type[i] = NoRef;
-    }
-
-
-    /* Each worker receives messages */
-    MPI_Status status;
-
-    MPI_Recv(&errorFlag, 1, MPI_INT,0, 81719, mpi_info->comm, &status);
-    if (errorFlag) {
-        return errorFlag;
-    }
-    MPI_Recv(&vertices[0], chunkSize*MAX_numNodes_gmsh, MPI_INT, 0, 81720, mpi_info->comm, &status);
-    MPI_Recv(id, chunkSize, MPI_INT, 0, 81721, mpi_info->comm, &status);
-    MPI_Recv(tag, chunkSize, MPI_INT, 0, 81722, mpi_info->comm, &status);
-    MPI_Recv(element_type, chunkSize, MPI_INT, 0, 81723, mpi_info->comm, &status);
-    MPI_Recv(chunkInfo, 2, MPI_INT, 0, 81724, mpi_info->comm, &status);
-    chunkElements = chunkInfo[0];
-    chunkFaceElements = chunkInfo[1];
-    MPI_Recv(&(elementIndices[0]), chunkElements, MPI_INT, 0, 81725, mpi_info->comm,&status);
-    MPI_Recv(&(faceElementIndices[0]), chunkFaceElements, MPI_INT, 0, 81726, mpi_info->comm,&status);
-
-
-    MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpi_info->comm);
-    if(errorFlag){
-        return errorFlag;
-    }
-
-    // all elements have been read and shared, now we have to identify the
-    // elements for finley
-    if (!noError()) {
-        return ERROR;
-    }
-
-    int numNodes[3] = {0,0,0};
-    MPI_Bcast(numNodes, 3, MPI_INT,  0, mpi_info->comm);
-    final_element_type = static_cast<ElementTypeId>(numNodes[0]);
-    final_face_element_type = static_cast<ElementTypeId>(numNodes[1]);
-    contact_element_type = static_cast<ElementTypeId>(numNodes[2]);
-
-
-
-    refElements.reset(new ReferenceElementSet(final_element_type, order, reduced_order));
-    refFaceElements.reset(new ReferenceElementSet(final_face_element_type, order, reduced_order));
-    refContactElements.reset(new ReferenceElementSet(contact_element_type, order, reduced_order));
-    refPoints.reset(new ReferenceElementSet(Point1, order, reduced_order));
-    mesh_p->Elements=new ElementFile(refElements, mpi_info);
-    mesh_p->FaceElements=new ElementFile(refFaceElements, mpi_info);
-    mesh_p->ContactElements=new ElementFile(refContactElements, mpi_info);
-    mesh_p->Points=new ElementFile(refPoints, mpi_info);
-
-
-    if (!noError())
-        return ERROR;
-
-    mesh_p->Elements->allocTable(chunkElements);
-    mesh_p->FaceElements->allocTable(chunkFaceElements);
-    mesh_p->ContactElements->allocTable(0);
-    mesh_p->Points->allocTable(0);
-
-    if (!noError())
-        return ERROR;
-
-    mesh_p->Elements->minColor=0;
-    mesh_p->Elements->maxColor=chunkElements-1;
-    mesh_p->FaceElements->minColor=0;
-    mesh_p->FaceElements->maxColor=chunkFaceElements-1;
-    mesh_p->ContactElements->minColor=0;
-    mesh_p->ContactElements->maxColor=0;
-    mesh_p->Points->minColor=0;
-    mesh_p->Points->maxColor=0;
 #pragma omp parallel for schedule(static)
-    for(int e = 0; e < chunkElements; e++) {
-        mesh_p->Elements->Id[e]=id[elementIndices[e]];
-        mesh_p->Elements->Tag[e]=tag[elementIndices[e]];
-        mesh_p->Elements->Color[e]=elementIndices[e];
-        mesh_p->Elements->Owner[e]=mpi_info->rank;
-        for (int j = 0; j < mesh_p->Elements->numNodes; ++j)  {
-            int vertex = vertices[INDEX2(j,elementIndices[e],MAX_numNodes_gmsh)];
-            mesh_p->Elements->Nodes[INDEX2(j, e, mesh_p->Elements->numNodes)]=vertex;
+    for (index_t e = 0; e < chunkElements; e++) {
+        elements->Id[e] = id[elementIndices[e]];
+        elements->Tag[e] = tag[elementIndices[e]];
+        elements->Color[e] = elementIndices[e];
+        elements->Owner[e] = mpiInfo->rank;
+        for (int j = 0; j < elements->numNodes; ++j)  {
+            int vertex = vertices[INDEX2(j, elementIndices[e], MAX_numNodes_gmsh)];
+            elements->Nodes[INDEX2(j, e, elements->numNodes)] = vertex;
         }
     }
 
 #pragma omp parallel for schedule(static)
-    for (int e = 0; e < chunkFaceElements; e++) {
-        mesh_p->FaceElements->Id[e]=id[faceElementIndices[e]];
-        mesh_p->FaceElements->Tag[e]=tag[faceElementIndices[e]];
-        mesh_p->FaceElements->Color[e]=e;
-        mesh_p->FaceElements->Owner[e]=mpi_info->rank;
-        for (int j=0; j<mesh_p->FaceElements->numNodes; ++j) {
-            int faceVertex = vertices[INDEX2(j,faceElementIndices[e],MAX_numNodes_gmsh)];
-            mesh_p->FaceElements->Nodes[INDEX2(j, e, mesh_p->FaceElements->numNodes)]= faceVertex;
+    for (index_t e = 0; e < chunkFaceElements; e++) {
+        faces->Id[e] = id[faceElementIndices[e]];
+        faces->Tag[e] = tag[faceElementIndices[e]];
+        faces->Color[e] = e;
+        faces->Owner[e] = mpiInfo->rank;
+        for (int j = 0; j < faces->numNodes; ++j) {
+            int faceVertex = vertices[INDEX2(j, faceElementIndices[e], MAX_numNodes_gmsh)];
+            faces->Nodes[INDEX2(j, e, faces->numNodes)] = faceVertex;
         }
     }
 
-    /* and clean up */
-    delete[] id;
-    delete[] tag;
-    delete[] element_type;
     return errorFlag;
-#endif //#ifndef ESYS_MPI -> #else
-}
-
-int getElements(esysUtils::JMPI& mpi_info, Mesh * mesh_p, FILE * fileHandle_p,
-        char * error_msg, bool useMacroElements, const std::string fname,
-        int numDim, double version, int order, int reduced_order) {
-    if (mpi_info->rank == 0) {
-        return getElementsMaster(mpi_info, mesh_p, fileHandle_p,
-                error_msg, useMacroElements, fname,
-                numDim, version, order, reduced_order);
-    }
-    return getElementsSlave(mpi_info, mesh_p, fileHandle_p,
-                error_msg, useMacroElements, fname,
-                numDim, version, order, reduced_order);
 }
 
-int gather_nodes(FILE *f, std::map<int,int>& tags, char *error_msg,
-        int dim, double version, const char *fname)
+int gather_nodes(FILE* f, std::map<int,int>& tags, std::string& errorMsg,
+                 int dim, double version, const std::string& filename)
 {
     int numNodes=0;
     std::vector<char> line;
@@ -660,29 +496,33 @@ int gather_nodes(FILE *f, std::map<int,int>& tags, char *error_msg,
         return EARLY_EOF;
     for (int node = 0; node < numNodes; node++) {
         int tmp = 0;
-        std::vector<char> line;
         if (!get_line(line, f))
             return EARLY_EOF;
         int scan_ret = sscanf(&line[0], "%d", &tmp);
         if (scan_ret == EOF) {
             return EARLY_EOF;
         } else if (scan_ret != 1) {
-            sprintf(error_msg, "malformed meshfile");
+            errorMsg = "malformed meshfile";
             return THROW_ERROR;
         }
         tags[tmp] = -1;
-
     }
     if (!get_line(line, f))
         return EARLY_EOF;
     if (!is_endnode_string(&line[0])) {
-        sprintf(error_msg, "malformed meshfile, expected '$EndNodes', got '%s'", &line[0]);
+        std::stringstream ss;
+        ss << "readGmsh: malformed mesh file. Expected '$EndNodes', got '"
+            << &line[0] << "'";
+        errorMsg = ss.str();
         return THROW_ERROR;
     }
     if (!get_line(line, f))
         return EARLY_EOF;
     if (strncmp(&line[0], "$ELM", 4) && strncmp(&line[0], "$Elements", 9)) {
-        sprintf(error_msg, "malformed meshfile, expected '$Elements', got '%s'", &line[0]);
+        std::stringstream ss;
+        ss << "readGmsh: malformed mesh file. Expected '$Elements', got '"
+            << &line[0] << "'";
+        errorMsg = ss.str();
         return THROW_ERROR;
     }
     int numElements = -1;
@@ -692,7 +532,7 @@ int gather_nodes(FILE *f, std::map<int,int>& tags, char *error_msg,
     if (scan_ret == EOF) {
         return EARLY_EOF;
     } else if (scan_ret != 1) {
-        sprintf(error_msg, "malformed meshfile");
+        errorMsg = "readGmsh: malformed mesh file";
         return THROW_ERROR;
     }
     struct ElementInfo e;
@@ -700,11 +540,14 @@ int gather_nodes(FILE *f, std::map<int,int>& tags, char *error_msg,
     e.vertex = &v[0];
 
     for (int element = 0; element < numElements; element++) {
-        getSingleElement(f, dim, version, e, error_msg, fname, false);
+        getSingleElement(f, dim, version, e, errorMsg, filename, false);
         for (int i = 0; i < MAX_numNodes_gmsh && v[i] >= 0; i++) {
             std::map<int,int>::iterator it = tags.find(v[i]);
             if (it == tags.end()) {
-                sprintf(error_msg, "element contains unknown node (node %d)", v[i]);
+                std::stringstream ss;
+                ss << "readGmsh: element contains unknown node (node " << v[i]
+                    << ")";
+                errorMsg = ss.str();
                 return THROW_ERROR;
             }
             // the first tagged element using a node tags that node too
@@ -715,58 +558,53 @@ int gather_nodes(FILE *f, std::map<int,int>& tags, char *error_msg,
     return 0;
 }
 
-int getNodesMaster(esysUtils::JMPI& mpi_info, Mesh *mesh_p, FILE *fileHandle_p,
-        int numDim, char *error_msg, std::map< int, int>& tags, int errorFlag)
+int getNodesMaster(escript::JMPI& mpiInfo, FinleyDomain* dom, FILE* fileHandle,
+                   int numDim, std::string& errorMsg, std::map<int, int>& tags)
 {
-    int numNodes=0;
+    int errorFlag = 0;
     std::vector<char> line;
-    if (!get_line(line, fileHandle_p))
+    if (!get_line(line, fileHandle))
         errorFlag = EARLY_EOF;
+
+    int numNodes = 0;
     int scan_ret = sscanf(&line[0], "%d", &numNodes);
     SSCANF_CHECK(scan_ret);
 #ifdef ESYS_MPI
     // Broadcast numNodes if there are multiple mpi procs
-    if (mpi_info->size > 1) {
-        MPI_Bcast(&numNodes, 1, MPI_INT,  0, mpi_info->comm);
-    }
+    if (mpiInfo->size > 1)
+        MPI_Bcast(&numNodes, 1, MPI_INT,  0, mpiInfo->comm);
 #endif
-    int chunkSize;
-    if(mpi_info->size > 1) {
-        chunkSize = (numNodes / mpi_info->size) + 1;
-    } else {
-        chunkSize = (numNodes / mpi_info->size);
-    }
-    int totalNodes=0, chunkNodes = 0;
-    int *tempInts = new int[chunkSize+1];        /* Stores the integer message data */
-    double *tempCoords = new double[chunkSize*numDim]; /* Stores the double message data */
+    int chunkSize = numNodes / mpiInfo->size;
+    const int rest = numNodes - (mpiInfo->size-1)*chunkSize;
+    const size_t storage = std::max(chunkSize, rest);
+    std::vector<int> tempInts(storage+1, -1);
+    std::vector<double> tempCoords(storage*numDim, -1.);
+
+    int totalNodes = 0;
+
+    for (int nextCPU = mpiInfo->size-1; nextCPU >= 0; nextCPU--) {
+        if (nextCPU == 0)
+            chunkSize = rest;
 
-    for (int nextCPU = mpi_info->size-1; nextCPU >= 0; nextCPU--) {
-//intialise arrays
-#pragma omp parallel for schedule(static)
-        for (int i=0; i<chunkSize+1; i++)
-            tempInts[i] = -1;
-#pragma omp parallel for schedule(static)
-        for (int i=0; i<chunkSize*numDim; i++)
-            tempCoords[i] = -1.0;
         if (!errorFlag) {
-            if (nextCPU ==0) {
-                chunkSize = numNodes-totalNodes;
-            }
             //read in chunksize nodes
-            for (chunkNodes=0; chunkNodes<chunkSize; chunkNodes++) {
-                if(totalNodes > numNodes) {
-                    sprintf(error_msg, "too many nodes %d > %d", totalNodes, numNodes);
+            for (int chunkNodes = 0; chunkNodes < chunkSize; chunkNodes++) {
+                if (totalNodes > numNodes) {
+                    std::stringstream ss;
+                    ss << "readGmsh: too many nodes (" << totalNodes << " < "
+                        << numNodes << ")";
+                    errorMsg = ss.str();
                     errorFlag = THROW_ERROR;
                     break;
                 }
                 std::vector<char> line;
-                if (!get_line(line, fileHandle_p))
+                if (!get_line(line, fileHandle))
                     errorFlag = EARLY_EOF;
-                
+
                 if (is_endnode_string(&line[0])) {
-                    sprintf(error_msg, "found end node string while still reading nodes");
+                    errorMsg = "readGmsh: found end node string while still reading nodes!";
                     errorFlag = THROW_ERROR;
-                    break;   
+                    break;
                 } else {
                     if (1 == numDim) {
                         scan_ret = sscanf(&line[0], "%d %le\n", &tempInts[chunkNodes], &tempCoords[0+chunkNodes*numDim]);
@@ -784,127 +622,53 @@ int getNodesMaster(esysUtils::JMPI& mpi_info, Mesh *mesh_p, FILE *fileHandle_p,
         }
 #ifdef ESYS_MPI
         if (nextCPU != 0) {
-            /* if there was an error, we have to stop them waiting for more */
-            MPI_Send(&errorFlag, 1, MPI_INT, nextCPU, 81719, mpi_info->comm);
-            /* send out this chunk of mesh to the next waiting node */
+            // if there was an error, we have to stop them waiting for more
+            MPI_Send(&errorFlag, 1, MPI_INT, nextCPU, 81719, mpiInfo->comm);
+            // send out this chunk of mesh to the next waiting node
             if (!errorFlag) {
-                tempInts[chunkSize] = chunkNodes;   /* The message has one more int to send chunkNodes */
-                MPI_Send(tempInts, chunkSize+1, MPI_INT, nextCPU, 81720, mpi_info->comm);
-                MPI_Send(tempCoords, chunkSize*numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm);
+                // The message has one more int to send chunkSize
+                tempInts[chunkSize] = chunkSize;
+                MPI_Send(&tempInts[0], chunkSize+1, MPI_INT, nextCPU, 81720, mpiInfo->comm);
+                if (chunkSize > 0)
+                    MPI_Send(&tempCoords[0], chunkSize*numDim, MPI_DOUBLE, nextCPU, 81721, mpiInfo->comm);
             }
         }
 #endif
     }
 
-
 #ifdef ESYS_MPI
-    if(mpi_info->size>1){
-        MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpi_info->comm);
-    }
+    if (mpiInfo->size > 1)
+        MPI_Bcast(&errorFlag, 1, MPI_INT, 0, mpiInfo->comm);
 #endif
-    if(errorFlag){
-        return errorFlag;
-    }
-
-    if (!noError()) return ERROR;
-    mesh_p->Nodes->allocTable(chunkNodes);
-    if (!noError()) return ERROR;
-
-#pragma omp parallel for schedule(static)
-    for (int i=0; i<chunkNodes; i++) {
-        mesh_p->Nodes->Id[i] = tempInts[i];
-        mesh_p->Nodes->globalDegreesOfFreedom[i] = tempInts[i];
-        int tag = tags[tempInts[i]];
-        if (tag == -1) {
-            mesh_p->Nodes->Tag[i] = tempInts[i]; //set tag to node label
-        } else {
-            mesh_p->Nodes->Tag[i] = tag; //set tag of element
-        }
-        for (int j=0; j<numDim; j++) {
-            mesh_p->Nodes->Coordinates[INDEX2(j,i,numDim)] = tempCoords[i*numDim+j];
-        }
-
-    }
-
-    delete[] tempInts;
-    delete[] tempCoords;
-    return errorFlag;
-}
-
-int getNodesSlave(esysUtils::JMPI& mpi_info, Mesh *mesh_p, FILE *fileHandle_p,
-        int numDim, char *error_msg, std::map< int, int>& tags, int errorFlag)
-{
-#ifndef ESYS_MPI
-    throw FinleyAdapterException("slave function called in non-MPI build");
-#else
-
-    if (mpi_info->size == 1)
-        throw FinleyAdapterException("slave function called without master");
-
-    int numNodes=0;
-
-    // Broadcast numNodes if there are multiple mpi procs
-    MPI_Bcast(&numNodes, 1, MPI_INT,  0, mpi_info->comm);
-    int chunkSize = (numNodes / mpi_info->size) + 1, chunkNodes=0;
-    int *tempInts = new int[chunkSize+1];        /* Stores the integer message data */
-    double *tempCoords = new double[chunkSize*numDim]; /* Stores the double message data */
-    /* Each worker receives two messages */
-    MPI_Status status;
-    MPI_Recv(&errorFlag, 1, MPI_INT,0, 81719, mpi_info->comm, &status);
-    if(!errorFlag){
-        MPI_Recv(tempInts, chunkSize+1, MPI_INT, 0, 81720, mpi_info->comm, &status);
-        MPI_Recv(tempCoords, chunkSize*numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status);
-        chunkNodes = tempInts[chunkSize];   /* How many nodes are in this workers chunk? */
-    }
-
-
-    MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpi_info->comm);
-    if(errorFlag){
+    if (errorFlag)
         return errorFlag;
-    }
 
-    if (!noError()) return ERROR;
-    mesh_p->Nodes->allocTable(chunkNodes);
-    if (!noError()) return ERROR;
+    NodeFile* nodes = dom->getNodes();
+    nodes->allocTable(chunkSize);
 
 #pragma omp parallel for schedule(static)
-    for (int i=0; i<chunkNodes; i++) {
-        mesh_p->Nodes->Id[i] = tempInts[i];
-        mesh_p->Nodes->globalDegreesOfFreedom[i] = tempInts[i];
+    for (index_t i = 0; i < chunkSize; i++) {
+        nodes->Id[i] = tempInts[i];
+        nodes->globalDegreesOfFreedom[i] = tempInts[i];
         int tag = tags[tempInts[i]];
         if (tag == -1) {
-            mesh_p->Nodes->Tag[i] = tempInts[i]; //set tag to node label
+            nodes->Tag[i] = tempInts[i]; //set tag to node label
         } else {
-            mesh_p->Nodes->Tag[i] = tag; //set tag of element
+            nodes->Tag[i] = tag; //set tag of element
         }
         for (int j=0; j<numDim; j++) {
-            mesh_p->Nodes->Coordinates[INDEX2(j,i,numDim)] = tempCoords[i*numDim+j];
+            nodes->Coordinates[INDEX2(j,i,numDim)] = tempCoords[i*numDim+j];
         }
-
     }
 
-    delete[] tempInts;
-    delete[] tempCoords;
     return errorFlag;
-#endif //#ifndef ESYS_MPI -> #else
-}
-
-int getNodes(esysUtils::JMPI& mpi_info, Mesh *mesh_p, FILE *fileHandle_p,
-        int numDim, char *error_msg, std::map< int, int>& tags, int errorFlag)
-{
-    if (mpi_info->rank == 0)
-        return getNodesMaster(mpi_info, mesh_p, fileHandle_p, numDim, error_msg,
-                tags, errorFlag);
-
-    return getNodesSlave(mpi_info, mesh_p, fileHandle_p, numDim, error_msg,
-                tags, errorFlag);
 }
 
 int get_next_state(FILE *f, bool nodesRead, bool elementsRead, int *logicFlag) {
     std::vector<char> line;
     do {
         if (!get_line(line, f)) { //got no line
-            //check to see we atleast have some nodes and elements
+            //check to see we at least have some nodes and elements
             if (!nodesRead) {
                 //EOF before nodes section found
                 return MISSING_NODES;
@@ -930,332 +694,477 @@ int get_next_state(FILE *f, bool nodesRead, bool elementsRead, int *logicFlag) {
     return 0;
 }
 
-void recv_state(esysUtils::JMPI& mpi_info, int *error, int *logic) {
-#ifdef ESYS_MPI
-    int flags[2] = {0};
-    // Broadcast line
-    MPI_Bcast(&flags, 2, MPI_INT, 0, mpi_info->comm);
-    *error = flags[0];
-    if (logic)
-        *logic = flags[1];
-#endif
-}
-
-void send_state(esysUtils::JMPI& mpi_info, int error, int logic) {
+void send_state(escript::JMPI& mpiInfo, int error, int logic)
+{
 #ifdef ESYS_MPI
     int flags[2] = {error, logic};
     // Broadcast line
-    if (mpi_info->size > 1) {
-        MPI_Bcast(&flags, 2, MPI_INT,  0, mpi_info->comm);
+    if (mpiInfo->size > 1) {
+        MPI_Bcast(&flags, 2, MPI_INT,  0, mpiInfo->comm);
     }
 #endif
 }
 
-int check_error(int error, FILE *f, char *error_msg) {
-        //handle errors
+int check_error(int error, FILE* f, const std::string& errorMsg)
+{
+    //handle errors
     switch(error) {
         case 0:
             break;
         case ERROR:
-            throw FinleyAdapterException("ERROR set for unknown reason");
-        case EARLY_EOF: //early eof while scanning
-            throw FinleyAdapterException("early eof while scanning");
+            throw FinleyException("ERROR set for unknown reason");
+        case EARLY_EOF: //early EOF while scanning
+            throw IOError("early EOF while scanning");
         case MISSING_NODES:  //EOF before nodes section found
-            throw FinleyAdapterException("EOF before nodes section found");
+            throw IOError("EOF before nodes section found");
         case MISSING_ELEMENTS:
-            throw FinleyAdapterException("EOF before elements section found");
-        case THROW_ERROR: // throw error_msg
-            throw FinleyAdapterException(error_msg);
-        case SUCCESS: // eof at apropriate time.
+            throw IOError("EOF before elements section found");
+        case THROW_ERROR: // throw errorMsg
+            throw IOError(errorMsg);
+        case SUCCESS: // EOF at apropriate time.
             if (f)
                 fclose(f);
             break;
         default:
-            throw FinleyAdapterException("an unknown error has occured in readGmsh");
+            throw FinleyException("an unknown error has occured in readGmsh");
 
     }
     return error;
 }
 
-Mesh* Mesh::readGmshMaster(esysUtils::JMPI& mpi_info, const std::string fname, int numDim, int order,
-                     int reduced_order, bool optimize, bool useMacroElements)
+FinleyDomain* readGmshMaster(escript::JMPI& mpiInfo,
+                             const std::string& filename, int numDim,
+                             int order, int reducedOrder, bool optimize,
+                             bool useMacroElements)
 {
     double version = 1.0;
-    bool nodesRead=false, elementsRead=false;
-    int format = 0, size = sizeof(double), scan_ret,  errorFlag=0, logicFlag=0;
+    bool nodesRead = false, elementsRead = false;
+    int format = 0, size = sizeof(double), scan_ret, errorFlag = 0, logicFlag = 0;
     std::vector<char> line;
-    char error_msg[LenErrorMsg_MAX];
     std::map<int,int> nodeTags;
-#ifdef Finley_TRACE
-    double time0=timer();
-#endif
-    FILE * fileHandle_p = NULL;
-
-    resetError();
-    std::size_t found = fname.find("\n");
-    if (found!=std::string::npos){
-        sprintf(error_msg, "file %s contains newline characters.", fname.c_str());
-        errorFlag=THROW_ERROR;
-        send_state(mpi_info, errorFlag, logicFlag);
-        throw FinleyAdapterException(error_msg);
+    std::string errorMsg;
+
+    size_t found = filename.find("\n");
+    if (found != std::string::npos) {
+        errorFlag = THROW_ERROR;
+        send_state(mpiInfo, errorFlag, logicFlag);
+        throw escript::ValueError("readGmsh: filename contains newline characters!");
     }
 
     // allocate mesh
-    Mesh* mesh_p = new Mesh(fname, numDim, mpi_info);
+    FinleyDomain* dom = new FinleyDomain(filename, numDim, mpiInfo);
 
     // get file handle
-    fileHandle_p = fopen(fname.c_str(), "r");
-    if (fileHandle_p==NULL) {
-        sprintf(error_msg, "Opening Gmsh file %s for reading failed.", fname.c_str());
-        errorFlag=THROW_ERROR;
-        send_state(mpi_info, errorFlag, logicFlag);
-        throw FinleyAdapterException(error_msg);
+    FILE* fileHandle = fopen(filename.c_str(), "r");
+    if (!fileHandle) {
+        std::stringstream ss;
+        ss << "readGmsh: opening file " << filename << " for reading failed.";
+        errorMsg = ss.str();
+        errorFlag = THROW_ERROR;
+        send_state(mpiInfo, errorFlag, logicFlag);
+        throw IOError(errorMsg);
     }
-    /* start reading */
-    while(noError() && errorFlag==0) {
-        /* find line starting with $ */
-        logicFlag=0;
-        errorFlag = get_next_state(fileHandle_p, nodesRead, elementsRead, &logicFlag);
-        if (!errorFlag && !noError())
-            errorFlag = ERROR;
-        send_state(mpi_info, errorFlag, logicFlag);
+    // start reading
+    while (!errorFlag) {
+        // find line starting with $
+        logicFlag = 0;
+        errorFlag = get_next_state(fileHandle, nodesRead, elementsRead, &logicFlag);
+        send_state(mpiInfo, errorFlag, logicFlag);
         //pre-logic error check
-        if (check_error(errorFlag, fileHandle_p, error_msg) == SUCCESS)
+        if (check_error(errorFlag, fileHandle, errorMsg) == SUCCESS)
             break;
-        /* format */
-        if (logicFlag == 1 && errorFlag ==0) {
+        // format
+        if (logicFlag == 1 && !errorFlag) {
             std::vector<char> fmt;
-            if (!get_line(fmt, fileHandle_p))
+            if (!get_line(fmt, fileHandle))
                 errorFlag = EARLY_EOF;
             scan_ret = sscanf(&fmt[0], "%lf %d %d\n", &version, &format, &size);
             SSCANF_CHECK(scan_ret);
         }
-        /* nodes are read */
-        else if (logicFlag == 2 && errorFlag ==0) {
-            nodesRead=true;
+        // nodes are read
+        else if (logicFlag == 2 && !errorFlag) {
+            nodesRead = true;
             std::vector<int> sendable_map;
-            long current = ftell(fileHandle_p);
-            errorFlag = gather_nodes(fileHandle_p, nodeTags, error_msg,
-                    numDim, version, fname.c_str());
-            if (!errorFlag && fseek(fileHandle_p, current, SEEK_SET) < 0) {
-                sprintf(error_msg, "Error in file operation");
+            long current = ftell(fileHandle);
+            errorFlag = gather_nodes(fileHandle, nodeTags, errorMsg,
+                    numDim, version, filename.c_str());
+            if (!errorFlag && fseek(fileHandle, current, SEEK_SET) < 0) {
+                errorMsg = "Error in file operation";
                 errorFlag = THROW_ERROR;
             }
-            send_state(mpi_info, errorFlag, logicFlag);
-            check_error(errorFlag, fileHandle_p, error_msg);
+            send_state(mpiInfo, errorFlag, logicFlag);
+            check_error(errorFlag, fileHandle, errorMsg);
 #ifdef ESYS_MPI
-            int mapsize = 2*nodeTags.size();
+            int mapsize = 2 * nodeTags.size();
             sendable_map.resize(mapsize);
             std::map<int,int>::iterator i = nodeTags.begin();
             for (int j = 0; i != nodeTags.end(); i++, j += 2) {
                 sendable_map[j] = i->first;
                 sendable_map[j + 1] = i->second;
             }
-            if (mpi_info->size > 1) {
-                MPI_Bcast(&mapsize, 1, MPI_INT, 0, mpi_info->comm);
+            if (mpiInfo->size > 1) {
+                MPI_Bcast(&mapsize, 1, MPI_INT, 0, mpiInfo->comm);
                 sendable_map.resize(mapsize);
-                MPI_Bcast(&sendable_map[0], mapsize, MPI_INT, 0, mpi_info->comm);
+                MPI_Bcast(&sendable_map[0], mapsize, MPI_INT, 0, mpiInfo->comm);
                 for (int j = 0; j < mapsize; j += 2)
                     nodeTags[sendable_map[j]] = sendable_map[j + 1];
             }
 #endif
-            errorFlag = getNodes(mpi_info, mesh_p, fileHandle_p, numDim,
-                    error_msg, nodeTags, errorFlag);
+            errorFlag = getNodesMaster(mpiInfo, dom, fileHandle, numDim,
+                                       errorMsg, nodeTags);
         }
-
-        /* elements */
-        else if(logicFlag==3 && errorFlag ==0) {
-            elementsRead=true;
-            errorFlag=getElements(mpi_info, mesh_p, fileHandle_p, error_msg, useMacroElements,
-                    fname, numDim, version, order, reduced_order);
+        // elements
+        else if (logicFlag == 3 && !errorFlag) {
+            elementsRead = true;
+            errorFlag = getElementsMaster(mpiInfo, dom, fileHandle, errorMsg,
+                            useMacroElements, filename, numDim, version, order,
+                            reducedOrder);
         }
-         /* name tags (thanks to Antoine Lefebvre, antoine.lefebvre2 at mail.mcgill.ca ) */
-        else if (logicFlag==4 && errorFlag ==0) {
-            if (!noError())
-                errorFlag = ERROR;
+        // name tags
+        // (thanks to Antoine Lefebvre, antoine.lefebvre2 at mail.mcgill.ca)
+        else if (logicFlag == 4 && !errorFlag) {
             std::vector<char> names;
-            if (!get_line(names, fileHandle_p))
+            if (!get_line(names, fileHandle))
                 errorFlag = EARLY_EOF;
-            int numNames=0;
+            int numNames = 0;
             scan_ret = sscanf(&names[0], "%d", &numNames);
             SSCANF_CHECK(scan_ret);
 #ifdef ESYS_MPI
             // Broadcast numNames if there are multiple mpi procs
-            if (mpi_info->size > 1) {
-                MPI_Bcast(&numNames, 1, MPI_INT,  0, mpi_info->comm);
-            }
+            if (mpiInfo->size > 1)
+                MPI_Bcast(&numNames, 1, MPI_INT,  0, mpiInfo->comm);
 #endif
             for (int i = 0; i < numNames; i++) {
                 std::vector<char> line;
-                char name[LenString_MAX] = {0};
-                if (!get_line(line, fileHandle_p))
+                char name[1024] = {0};
+                if (!get_line(line, fileHandle))
                     errorFlag = EARLY_EOF;
-                int tag_info[2] = {0};
-                char *position = &line[0];
+                int tag_info[2] = { 0 };
+                char* position = &line[0];
                 //skip the first int, it's the physical dimension
-                if (next_space(&position, 1) == NULL 
-                        || sscanf(position, "%d", tag_info) != 1 
+                if (next_space(&position, 1) == NULL
+                        || sscanf(position, "%d", tag_info) != 1
                         || next_space(&position, 1) == NULL
                         || sscanf(position, "%s", name) != 1) {
-                    setError(IO_ERROR,"Mesh_readGmsh: bad tagname");
-                }
-                if (!noError())
                     errorFlag = ERROR;
+                }
                 name[strlen(name)-1]='\0'; //strip trailing "
-                //mpi broadcast the tag info
 
 #ifdef ESYS_MPI
-                if (mpi_info->size > 1) {
-                    tag_info[1]=strlen(name) + 1; //include \0
-                    MPI_Bcast(tag_info, 2, MPI_INT,  0, mpi_info->comm);
-                    MPI_Bcast(&name, tag_info[1], MPI_CHAR,  0, mpi_info->comm);
+                // broadcast the tag info
+                if (mpiInfo->size > 1) {
+                    tag_info[1] = strlen(name) + 1; //include \0
+                    MPI_Bcast(tag_info, 2, MPI_INT,  0, mpiInfo->comm);
+                    MPI_Bcast(&name, tag_info[1], MPI_CHAR,  0, mpiInfo->comm);
                 }
 #endif
-                mesh_p->addTagMap(name+1, tag_info[0]); //skip leading "
-
+                dom->setTagMap(name+1, tag_info[0]); //skip leading "
             }
         }
 
-        if (!get_line(line, fileHandle_p)) {
+        if (!get_line(line, fileHandle)) {
             errorFlag = EARLY_EOF;
         }
         if (line[0] != '$') {
             errorFlag = THROW_ERROR;
-            snprintf(error_msg, 50, "expected closing tag, got:%s...\n", &line[0]);
+            std::stringstream ss;
+            ss << "readGmsh: expected closing tag, got '"
+                << &line[0] << "'...";
+            errorMsg = ss.str();
         }
-        send_state(mpi_info, errorFlag, logicFlag);
+        send_state(mpiInfo, errorFlag, logicFlag);
         //post logic error check, throws if relevant
-        check_error(errorFlag, fileHandle_p, error_msg);
+        check_error(errorFlag, fileHandle, errorMsg);
     }
-    // clean up
-    if (!noError()) {
-        delete mesh_p;
-        return NULL;
-    }    
-    // resolve id's
-    if (noError())
-        mesh_p->resolveNodeIds();
-    // rearrange elements
-    if (noError())
-        mesh_p->prepare(optimize);
+    return dom;
+}
+
+// slave-only functions follow
+
+#ifdef ESYS_MPI
+int getNodesSlave(escript::JMPI& mpiInfo, FinleyDomain* dom, int numDim,
+                  std::string& errorMsg, std::map<int, int>& tags)
+{
+    if (mpiInfo->size == 1)
+        throw FinleyException("Slave function called without master!");
+
+    int errorFlag = 0;
+    int numNodes = 0;
+
+    // get numNodes from the master
+    MPI_Bcast(&numNodes, 1, MPI_INT,  0, mpiInfo->comm);
+    int chunkSize = numNodes / mpiInfo->size, chunkNodes = 0;
+    std::vector<int> tempInts(chunkSize+1); // Stores the integer message data
+    std::vector<double> tempCoords(chunkSize*numDim); // Stores the double message data
+    // Each worker receives two messages
+    MPI_Status status;
+    MPI_Recv(&errorFlag, 1, MPI_INT, 0, 81719, mpiInfo->comm, &status);
+    if (!errorFlag) {
+        MPI_Recv(&tempInts[0], chunkSize+1, MPI_INT, 0, 81720, mpiInfo->comm, &status);
+        if (chunkSize > 0)
+            MPI_Recv(&tempCoords[0], chunkSize*numDim, MPI_DOUBLE, 0, 81721, mpiInfo->comm, &status);
+        chunkNodes = tempInts[chunkSize]; // How many nodes are in this worker's chunk?
+    }
+
+    MPI_Bcast(&errorFlag, 1, MPI_INT, 0, mpiInfo->comm);
+    if (errorFlag)
+        return errorFlag;
+
+    NodeFile* nodes = dom->getNodes();
+    nodes->allocTable(chunkNodes);
+
+#pragma omp parallel for schedule(static)
+    for (index_t i = 0; i < chunkNodes; i++) {
+        nodes->Id[i] = tempInts[i];
+        nodes->globalDegreesOfFreedom[i] = tempInts[i];
+        int tag = tags[tempInts[i]];
+        if (tag == -1) {
+            nodes->Tag[i] = tempInts[i]; //set tag to node label
+        } else {
+            nodes->Tag[i] = tag; //set tag of element
+        }
+        for (int j = 0; j < numDim; j++) {
+            nodes->Coordinates[INDEX2(j,i,numDim)] = tempCoords[i*numDim+j];
+        }
+    }
+
+    return errorFlag;
+}
+
+int getElementsSlave(escript::JMPI& mpiInfo, FinleyDomain* dom,
+                     std::string& errorMsg, bool useMacroElements,
+                     int numDim, double version, int order, int reducedOrder)
+{
+    /*
+     *  This function should read in the elements and distribute
+     *  them to the apropriate process.
+     */
+    if (mpiInfo->size == 1) {
+        errorMsg = "Slave function called with no master";
+        return THROW_ERROR; //again, sillyness
+    }
+
+    int errorFlag = 0;
+
+    ElementTypeId finalElementType = NoRef;
+    ElementTypeId finalFaceElementType = NoRef;
+    ElementTypeId contactElementType = NoRef;
+    const_ReferenceElementSet_ptr refPoints, refContactElements;
+    const_ReferenceElementSet_ptr refFaceElements, refElements;
+
+    int totalNumElements = 0;
+    MPI_Bcast(&totalNumElements, 1, MPI_INT, 0, mpiInfo->comm);
+
+    int chunkSize = totalNumElements / mpiInfo->size, chunkElements = 0;
+    int chunkFaceElements = 0;
+    std::vector<int> id(chunkSize+1);
+    std::vector<int> tag(chunkSize+1);
+    std::vector<int> vertices(chunkSize*MAX_numNodes_gmsh, -1);
+    std::vector<ElementTypeId> elementType(chunkSize+1);
+    std::vector<int> elementIndices(chunkSize, -1);
+    std::vector<int> faceElementIndices (chunkSize, -1);
+
+    //chunkInfo stores the number of elements and number of face elements
+    int chunkInfo[2];
+
+#pragma omp parallel for schedule(static)
+    for (int i = 0; i < chunkSize; i++) {
+        id[i] = -1;
+        tag[i] = -1;
+        elementType[i] = NoRef;
+    }
+
+    // Each worker receives messages
+    MPI_Status status;
 
-    if (!noError()) {
-        delete mesh_p;
-        return NULL;
+    MPI_Recv(&errorFlag, 1, MPI_INT, 0, 81719, mpiInfo->comm, &status);
+    if (errorFlag)
+        return errorFlag;
+
+    MPI_Recv(&vertices[0], chunkSize*MAX_numNodes_gmsh, MPI_INT, 0, 81720, mpiInfo->comm, &status);
+    MPI_Recv(&id[0], chunkSize, MPI_INT, 0, 81721, mpiInfo->comm, &status);
+    MPI_Recv(&tag[0], chunkSize, MPI_INT, 0, 81722, mpiInfo->comm, &status);
+    MPI_Recv(&elementType[0], chunkSize, MPI_INT, 0, 81723, mpiInfo->comm, &status);
+    MPI_Recv(chunkInfo, 2, MPI_INT, 0, 81724, mpiInfo->comm, &status);
+    chunkElements = chunkInfo[0];
+    chunkFaceElements = chunkInfo[1];
+    MPI_Recv(&elementIndices[0], chunkElements, MPI_INT, 0, 81725, mpiInfo->comm,&status);
+    MPI_Recv(&faceElementIndices[0], chunkFaceElements, MPI_INT, 0, 81726, mpiInfo->comm,&status);
+
+    MPI_Bcast(&errorFlag, 1, MPI_INT,  0, mpiInfo->comm);
+    if (errorFlag)
+        return errorFlag;
+
+    // all elements have been read and shared, now we have to identify the
+    // elements for finley
+    int numNodes[3] = {0,0,0};
+    MPI_Bcast(numNodes, 3, MPI_INT,  0, mpiInfo->comm);
+    finalElementType = static_cast<ElementTypeId>(numNodes[0]);
+    finalFaceElementType = static_cast<ElementTypeId>(numNodes[1]);
+    contactElementType = static_cast<ElementTypeId>(numNodes[2]);
+
+    refElements.reset(new ReferenceElementSet(finalElementType, order, reducedOrder));
+    refFaceElements.reset(new ReferenceElementSet(finalFaceElementType, order, reducedOrder));
+    refContactElements.reset(new ReferenceElementSet(contactElementType, order, reducedOrder));
+    refPoints.reset(new ReferenceElementSet(Point1, order, reducedOrder));
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    dom->setElements(elements);
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    dom->setFaceElements(faces);
+    ElementFile* contacts = new ElementFile(refContactElements, mpiInfo);
+    dom->setContactElements(contacts);
+    ElementFile* points = new ElementFile(refPoints, mpiInfo);
+    dom->setPoints(points);
+    elements->allocTable(chunkElements);
+    faces->allocTable(chunkFaceElements);
+    contacts->allocTable(0);
+    points->allocTable(0);
+    elements->minColor = 0;
+    elements->maxColor = chunkElements-1;
+    faces->minColor = 0;
+    faces->maxColor = chunkFaceElements-1;
+    contacts->minColor = 0;
+    contacts->maxColor = 0;
+    points->minColor = 0;
+    points->maxColor = 0;
+
+#pragma omp parallel for schedule(static)
+    for (index_t e = 0; e < chunkElements; e++) {
+        elements->Id[e] = id[elementIndices[e]];
+        elements->Tag[e] = tag[elementIndices[e]];
+        elements->Color[e] = elementIndices[e];
+        elements->Owner[e] = mpiInfo->rank;
+        for (int j = 0; j < elements->numNodes; ++j)  {
+            int vertex = vertices[INDEX2(j, elementIndices[e], MAX_numNodes_gmsh)];
+            elements->Nodes[INDEX2(j, e, elements->numNodes)] = vertex;
+        }
     }
-    return mesh_p;
+
+#pragma omp parallel for schedule(static)
+    for (index_t e = 0; e < chunkFaceElements; e++) {
+        faces->Id[e] = id[faceElementIndices[e]];
+        faces->Tag[e] = tag[faceElementIndices[e]];
+        faces->Color[e] = e;
+        faces->Owner[e] = mpiInfo->rank;
+        for (int j = 0; j < faces->numNodes; ++j) {
+            int faceVertex = vertices[INDEX2(j, faceElementIndices[e], MAX_numNodes_gmsh)];
+            faces->Nodes[INDEX2(j, e, faces->numNodes)] = faceVertex;
+        }
+    }
+
+    return errorFlag;
+}
+
+void recv_state(escript::JMPI& mpiInfo, int* error, int* logic)
+{
+    int flags[2] = { 0 };
+    // Broadcast line
+    MPI_Bcast(&flags, 2, MPI_INT, 0, mpiInfo->comm);
+    *error = flags[0];
+    if (logic)
+        *logic = flags[1];
 }
+#endif // ESYS_MPI
 
-Mesh* Mesh::readGmshSlave(esysUtils::JMPI& mpi_info, const std::string fname, int numDim, int order,
-                     int reduced_order, bool optimize, bool useMacroElements)
+FinleyDomain* readGmshSlave(escript::JMPI& mpiInfo,
+                            const std::string& filename, int numDim, int order,
+                            int reducedOrder, bool optimize,
+                            bool useMacroElements)
 {
 #ifndef ESYS_MPI
-    throw FinleyAdapterException("slave function called in non-MPI build");
+    throw FinleyException("slave function called in non-MPI build!");
 #else
-    if (mpi_info->size == 1)
-        throw FinleyAdapterException("slave function called but only one process");
+    if (mpiInfo->size == 1)
+        throw FinleyException("slave function called but only one process");
 
-    double version = 1.0;
-    int errorFlag=0, logicFlag=0;
-    int numNames=0;
-    int i, tag_info[2];
-    char name[LenString_MAX+1];
-    char error_msg[LenErrorMsg_MAX] = {0};
-    std::map<int,int> nodeTags;
-#ifdef Finley_TRACE
-    double time0=timer();
-#endif
-    FILE * fileHandle_p = NULL;
+    const double version = 1.0;
+    int errorFlag = 0, logicFlag = 0;
+    std::string errorMsg;
 
-    resetError();
     // allocate mesh
-    Mesh* mesh_p = new Mesh(fname, numDim, mpi_info);
+    FinleyDomain* dom = new FinleyDomain(filename, numDim, mpiInfo);
 
-    // get file handle
-    /* start reading */
-    while(noError() && errorFlag != SUCCESS) {
+    while (errorFlag != SUCCESS) {
         logicFlag = 0;
-        //pre logic state fetch
-        recv_state(mpi_info, &errorFlag, &logicFlag);
-        if (check_error(errorFlag, NULL, error_msg) == SUCCESS)
+        // pre logic state fetch
+        recv_state(mpiInfo, &errorFlag, &logicFlag);
+        if (check_error(errorFlag, NULL, errorMsg) == SUCCESS)
             break;
-         
-        /* format */
-        /* nodes are read */
+
+        // format
+        // nodes are read
         if (logicFlag == 2) {
             int mapsize = 0;
             std::vector<int> sendable_map;
-            recv_state(mpi_info, &errorFlag, &logicFlag);
-            check_error(errorFlag, NULL, error_msg);
-            MPI_Bcast(&mapsize, 1, MPI_INT, 0, mpi_info->comm);
+            recv_state(mpiInfo, &errorFlag, &logicFlag);
+            check_error(errorFlag, NULL, errorMsg);
+            MPI_Bcast(&mapsize, 1, MPI_INT, 0, mpiInfo->comm);
             sendable_map.resize(mapsize);
-            MPI_Bcast(&sendable_map[0], mapsize, MPI_INT, 0, mpi_info->comm);
+            MPI_Bcast(&sendable_map[0], mapsize, MPI_INT, 0, mpiInfo->comm);
+            std::map<int,int> nodeTags;
             for (int j = 0; j < mapsize; j += 2)
                 nodeTags[sendable_map[j]] = sendable_map[j + 1];
 
-            errorFlag = getNodes(mpi_info, mesh_p, fileHandle_p, numDim,
-                    error_msg, nodeTags, errorFlag);
+            errorFlag = getNodesSlave(mpiInfo, dom, numDim, errorMsg, nodeTags);
         }
-
-        /* elements */
-        else if(logicFlag==3) {
-            errorFlag=getElements(mpi_info, mesh_p, fileHandle_p, error_msg, useMacroElements,
-                    fname, numDim, version, order, reduced_order);
+        // elements
+        else if (logicFlag == 3) {
+            errorFlag = getElementsSlave(mpiInfo, dom, errorMsg,
+                                         useMacroElements, numDim, version,
+                                         order, reducedOrder);
         }
-         /* name tags (thanks to Antoine Lefebvre, antoine.lefebvre2 at mail.mcgill.ca ) */
-        else if (logicFlag==4) {
-            if (! noError())
-                errorFlag = THROW_ERROR;
-            // Broadcast numNames if there are multiple mpi procs
-            MPI_Bcast(&numNames, 1, MPI_INT,  0, mpi_info->comm);
-            for (i = 0; i < numNames; i++) {
-                //mpi broadcast the tag info
-                tag_info[0]=0;
-                tag_info[1]=0;
-                MPI_Bcast(tag_info, 2, MPI_INT,  0, mpi_info->comm);
-                MPI_Bcast(&name, tag_info[1], MPI_CHAR,  0, mpi_info->comm); //strlen + 1 for null terminator
-                mesh_p->addTagMap(&name[1], tag_info[0]);
+        // name tags
+        // (thanks to Antoine Lefebvre, antoine.lefebvre2 at mail.mcgill.ca)
+        else if (logicFlag == 4) {
+            // Broadcast numNames
+            int numNames = 0;
+            MPI_Bcast(&numNames, 1, MPI_INT,  0, mpiInfo->comm);
+            for (int i = 0; i < numNames; i++) {
+                char name[1024];
+                int tagInfo[2];
+                // broadcast the tag info
+                tagInfo[0] = 0;
+                tagInfo[1] = 0;
+                MPI_Bcast(tagInfo, 2, MPI_INT,  0, mpiInfo->comm);
+                //strlen + 1 for null terminator
+                MPI_Bcast(&name, tagInfo[1], MPI_CHAR, 0, mpiInfo->comm);
+                dom->setTagMap(&name[1], tagInfo[0]);
             }
         }
         //post logic error check
-        recv_state(mpi_info, &errorFlag, &logicFlag);
-        if (check_error(errorFlag, NULL, error_msg) == SUCCESS)
+        recv_state(mpiInfo, &errorFlag, &logicFlag);
+        if (check_error(errorFlag, NULL, errorMsg) == SUCCESS)
             break;
-    //end while loop
-    }
-
-    // clean up
-    if (!noError()) {
-        delete mesh_p;
-        return NULL;
-    }
-    // resolve id's
-    if (noError())
-        mesh_p->resolveNodeIds();
-    // rearrange elements
-    if (noError())
-        mesh_p->prepare(optimize);
+    } //end while loop
 
-    if (!noError()) {
-        delete mesh_p;
-        return NULL;
-    }
-    return mesh_p;
-#endif //#ifndef ESYS_MPI -> #else
+    return dom;
+#endif // ESYS_MPI
 }
 
+} // anonymous namespace
 
 
+namespace finley {
 
-Mesh* Mesh::readGmsh(esysUtils::JMPI& mpi_info, const std::string fname, int numDim, int order,
-                     int reduced_order, bool optimize, bool useMacroElements)
+escript::Domain_ptr FinleyDomain::readGmsh(escript::JMPI mpiInfo,
+                        const std::string& filename, int numDim, int order,
+                        int reducedOrder, bool optimize, bool useMacroElements)
 {
-    if (mpi_info->rank == 0)
-        return readGmshMaster(mpi_info, fname, numDim, order, reduced_order,
-                optimize, useMacroElements);
+    FinleyDomain* dom;
 
-    return readGmshSlave(mpi_info, fname, numDim, order, reduced_order,
-            optimize, useMacroElements);
+    if (mpiInfo->rank == 0) {
+        dom = readGmshMaster(mpiInfo, filename, numDim, order, reducedOrder,
+                             optimize, useMacroElements);
+    } else {
+        dom = readGmshSlave(mpiInfo, filename, numDim, order, reducedOrder,
+                            optimize, useMacroElements);
+    }
+
+    // resolve id's
+    dom->resolveNodeIds();
+    // rearrange elements
+    dom->prepare(optimize);
+    return dom->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_rec4.cpp b/finley/src/Mesh_rec4.cpp
index 9c42f9b..7b26ed5 100644
--- a/finley/src/Mesh_rec4.cpp
+++ b/finley/src/Mesh_rec4.cpp
@@ -14,65 +14,64 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
 
-/****************************************************************************
+#include <escript/index.h>
 
-  Finley: generates rectangular meshes
-
-  Generates a numElements[0] x numElements[1] mesh with first order elements
-  (Rec4) in the rectangle [0,Length[0]] x [0,Length[1]].
-  order is the desired accuracy of the integration scheme.
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "RectangularMesh.h"
+using escript::DataTypes::real_t;
 
 namespace finley {
 
-Mesh* RectangularMesh_Rec4(const dim_t* numElements, const double* Length,
-                           const bool* periodic, int order, int reduced_order,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool optimize, esysUtils::JMPI& mpiInfo)
+escript::Domain_ptr FinleyDomain::createRec4(dim_t NE0, dim_t NE1,
+                                             real_t l0, real_t l1,
+                                             bool periodic0, bool periodic1,
+                                             int order, int reducedOrder,
+                                             bool useElementsOnFace,
+                                             bool optimize,
+                                             escript::JMPI mpiInfo)
 {
     const int N_PER_E = 1;
     const int DIM = 2;
-    dim_t Nstride0=0, Nstride1=0, local_NE0, local_NE1;
-    index_t e_offset0=0, e_offset1=0;
+    const int LEFTTAG = 1;    // boundary x1=0
+    const int RIGHTTAG = 2;   // boundary x1=1
+    const int BOTTOMTAG = 10; // boundary x2=0
+    const int TOPTAG = 20;    // boundary x2=1
+    dim_t Nstride0 = 0, Nstride1 = 0, local_NE0, local_NE1;
+    index_t e_offset0 = 0, e_offset1 = 0;
 
-    const Esys_MPI_rank myRank = mpiInfo->rank;
+    const int myRank = mpiInfo->rank;
 
     // set up the global dimensions of the mesh
-    const dim_t NE0 = std::max(dim_t(1),numElements[0]);
-    const dim_t NE1 = std::max(dim_t(1),numElements[1]);
-    const dim_t N0 = N_PER_E*NE0+1;
-    const dim_t N1 = N_PER_E*NE1+1;
+    NE0 = std::max((dim_t)1, NE0);
+    NE1 = std::max((dim_t)1, NE1);
+    const dim_t N0 = N_PER_E*NE0 + 1;
+    const dim_t N1 = N_PER_E*NE1 + 1;
 
     // allocate mesh
     std::stringstream name;
     name << "Rectangular " << N0 << " x " << N1 << " mesh";
-    Mesh* out = new Mesh(name.str(), DIM, mpiInfo);
+    FinleyDomain* out = new FinleyDomain(name.str(), DIM, mpiInfo);
 
     const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
     if (useElementsOnFace) {
-        refFaceElements.reset(new ReferenceElementSet(Rec4Face, order, reduced_order));
-        refContactElements.reset(new ReferenceElementSet(Rec4Face_Contact, order, reduced_order));
+        refFaceElements.reset(new ReferenceElementSet(Rec4Face, order, reducedOrder));
+        refContactElements.reset(new ReferenceElementSet(Rec4Face_Contact, order, reducedOrder));
     } else {
-        refFaceElements.reset(new ReferenceElementSet(Line2, order, reduced_order));
-        refContactElements.reset(new ReferenceElementSet(Line2_Contact, order, reduced_order));
+        refFaceElements.reset(new ReferenceElementSet(Line2, order, reducedOrder));
+        refContactElements.reset(new ReferenceElementSet(Line2_Contact, order, reducedOrder));
     }
-    refElements.reset(new ReferenceElementSet(Rec4, order, reduced_order));
-    refPoints.reset(new ReferenceElementSet(Point1, order, reduced_order));
+    refElements.reset(new ReferenceElementSet(Rec4, order, reducedOrder));
+    refPoints.reset(new ReferenceElementSet(Point1, order, reducedOrder));
 
-    out->setPoints(new ElementFile(refPoints, mpiInfo));
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    out->setElements(elements);
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    out->setFaceElements(faces);
     out->setContactElements(new ElementFile(refContactElements, mpiInfo));
-    out->setFaceElements(new ElementFile(refFaceElements, mpiInfo));
-    out->setElements(new ElementFile(refElements, mpiInfo));
+    out->setPoints(new ElementFile(refPoints, mpiInfo));
 
     // work out the largest dimension
-    if (N1 == std::max(N0,N1)) {
+    if (N1 == std::max(N0, N1)) {
         Nstride0 = 1;
         Nstride1 = N0;
         local_NE0 = NE0;
@@ -85,199 +84,191 @@ Mesh* RectangularMesh_Rec4(const dim_t* numElements, const double* Length,
         local_NE1 = NE1;
         e_offset1 = 0;
     }
-    const index_t offset0 = e_offset0*N_PER_E;
-    const index_t offset1 = e_offset1*N_PER_E;
-    const dim_t local_N0 = local_NE0>0 ? local_NE0*N_PER_E+1 : 0;
-    const dim_t local_N1 = local_NE1>0 ? local_NE1*N_PER_E+1 : 0;
-    dim_t NDOF0=0, NDOF1=0;
+    const index_t offset0 = e_offset0 * N_PER_E;
+    const index_t offset1 = e_offset1 * N_PER_E;
+    const dim_t local_N0 = local_NE0 > 0 ? local_NE0*N_PER_E+1 : 0;
+    const dim_t local_N1 = local_NE1 > 0 ? local_NE1*N_PER_E+1 : 0;
+    dim_t NDOF0 = 0, NDOF1 = 0;
 
     // get the number of surface elements
     dim_t NFaceElements = 0;
-    if (!periodic[0] && local_NE0>0) {
-        NDOF0=N0;
+    if (!periodic0 && local_NE0 > 0) {
+        NDOF0 = N0;
         if (e_offset0 == 0)
-            NFaceElements+=local_NE1;
-        if (local_NE0+e_offset0 == NE0)
-            NFaceElements+=local_NE1;
+            NFaceElements += local_NE1;
+        if (local_NE0 + e_offset0 == NE0)
+            NFaceElements += local_NE1;
     } else {
-        NDOF0=N0-1;
+        NDOF0 = N0 - 1;
     }
 
-    if (!periodic[1] && local_NE1>0) {
-        NDOF1=N1;
+    if (!periodic1 && local_NE1 > 0) {
+        NDOF1 = N1;
         if (e_offset1 == 0)
-            NFaceElements+=local_NE0;
-        if (local_NE1+e_offset1 == NE1)
-            NFaceElements+=local_NE0;
+            NFaceElements += local_NE0;
+        if (local_NE1 + e_offset1 == NE1)
+            NFaceElements += local_NE0;
     } else {
-        NDOF1=N1-1;
+        NDOF1 = N1 - 1;
     }
 
     // allocate tables
-    out->Nodes->allocTable(local_N0*local_N1);
-    out->Elements->allocTable(local_NE0*local_NE1);
-    out->FaceElements->allocTable(NFaceElements);
+    NodeFile* nodes = out->getNodes();
+    nodes->allocTable(local_N0 * local_N1);
+    elements->allocTable(local_NE0 * local_NE1);
+    faces->allocTable(NFaceElements);
 
     // create nodes
 #pragma omp parallel for
-    for (index_t i1=0; i1<local_N1; i1++) {
-        for (index_t i0=0; i0<local_N0; i0++) {
-            const dim_t k = i0+local_N0*i1;
-            const index_t global_i0 = i0+offset0;
-            const index_t global_i1 = i1+offset1;
-            out->Nodes->Coordinates[INDEX2(0,k,DIM)]=DBLE(global_i0)/DBLE(N0-1)*Length[0];
-            out->Nodes->Coordinates[INDEX2(1,k,DIM)]=DBLE(global_i1)/DBLE(N1-1)*Length[1];
-            out->Nodes->Id[k] = Nstride0*global_i0 + Nstride1*global_i1;
-            out->Nodes->Tag[k]=0;
-            out->Nodes->globalDegreesOfFreedom[k] = Nstride0*(global_i0%NDOF0)
-                                                  + Nstride1*(global_i1%NDOF1);
+    for (index_t i1 = 0; i1 < local_N1; i1++) {
+        for (index_t i0 = 0; i0 < local_N0; i0++) {
+            const dim_t k = i0 + local_N0 * i1;
+            const index_t global_i0 = i0 + offset0;
+            const index_t global_i1 = i1 + offset1;
+            nodes->Coordinates[INDEX2(0, k, DIM)] = (real_t)global_i0 / (real_t)(N0 - 1) * l0;
+            nodes->Coordinates[INDEX2(1, k, DIM)] = (real_t)global_i1 / (real_t)(N1 - 1) * l1;
+            nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1;
+            nodes->Tag[k] = 0;
+            nodes->globalDegreesOfFreedom[k] = Nstride0 * (global_i0 % NDOF0)
+                                             + Nstride1 * (global_i1 % NDOF1);
         }
     }
 
     // set the elements
-    dim_t NN = out->Elements->numNodes;
+    dim_t NN = elements->numNodes;
 #pragma omp parallel for
-    for (index_t i1=0; i1<local_NE1; i1++) {
-        for (index_t i0=0; i0<local_NE0; i0++) {
-            const dim_t k = i0+local_NE0*i1;
-            const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
-                                + Nstride1*N_PER_E*(i1+e_offset1);
-
-            out->Elements->Id[k] = (i0+e_offset0) + NE0*(i1+e_offset1);
-            out->Elements->Tag[k] = 0;
-            out->Elements->Owner[k] = myRank;
-
-            out->Elements->Nodes[INDEX2(0,k,NN)]=node0;
-            out->Elements->Nodes[INDEX2(1,k,NN)]=node0+Nstride0;
-            out->Elements->Nodes[INDEX2(2,k,NN)]=node0+Nstride1+Nstride0;
-            out->Elements->Nodes[INDEX2(3,k,NN)]=node0+Nstride1;
+    for (index_t i1 = 0; i1 < local_NE1; i1++) {
+        for (index_t i0 = 0; i0 < local_NE0; i0++) {
+            const dim_t k = i0 + local_NE0 * i1;
+            const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0)
+                                + Nstride1 * N_PER_E * (i1 + e_offset1);
+
+            elements->Id[k] = (i0 + e_offset0) + NE0*(i1 + e_offset1);
+            elements->Tag[k] = 0;
+            elements->Owner[k] = myRank;
+
+            elements->Nodes[INDEX2(0, k, NN)] = node0;
+            elements->Nodes[INDEX2(1, k, NN)] = node0 + Nstride0;
+            elements->Nodes[INDEX2(2, k, NN)] = node0 + Nstride1 + Nstride0;
+            elements->Nodes[INDEX2(3, k, NN)] = node0 + Nstride1;
         }
     }
 
     // face elements
-    NN=out->FaceElements->numNodes;
-    dim_t totalNECount=NE0*NE1;
+    NN = faces->numNodes;
+    dim_t totalNECount = NE0 * NE1;
     dim_t faceNECount = 0;
-    index_t* eNodes = out->FaceElements->Nodes;
+    index_t* eNodes = faces->Nodes;
 
-    if (!periodic[0] && local_NE0>0) {
-        // **  elements on boundary 001 (x1=0):
+    if (!periodic0 && local_NE0 > 0) {
+        // ** elements on boundary 001 (x1=0)
         if (e_offset0 == 0) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                const dim_t k = i1+faceNECount;
-                const index_t node0 = Nstride1*N_PER_E*(i1+e_offset1);
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride1 * N_PER_E * (i1 + e_offset1);
 
-                out->FaceElements->Id[k] = i1+e_offset1+totalNECount;
-                out->FaceElements->Tag[k] = 1;
-                out->FaceElements->Owner[k] = myRank;
+                faces->Id[k] = i1 + e_offset1 + totalNECount;
+                faces->Tag[k] = LEFTTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(1,k,NN)]=node0;
-                    eNodes[INDEX2(2,k,NN)]=node0+Nstride0;
-                    eNodes[INDEX2(3,k,NN)]=node0+Nstride1+Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(1, k, NN)] = node0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + Nstride1 + Nstride0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(1,k,NN)]=node0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(1, k, NN)] = node0;
                 }
             }
-            faceNECount+=local_NE1;
+            faceNECount += local_NE1;
         }
-        totalNECount+=NE1;
-
-        // **  elements on boundary 002 (x1=1):
-        if (local_NE0+e_offset0 == NE0) {
+        totalNECount += NE1;
+        // ** elements on boundary 002 (x1=1)
+        if (local_NE0 + e_offset0 == NE0) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                const dim_t k = i1+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(NE0-1)
-                                    + Nstride1*N_PER_E*(i1+e_offset1);
-
-                out->FaceElements->Id[k] = (i1+e_offset1)+totalNECount;
-                out->FaceElements->Tag[k] = 2;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (NE0 - 1)
+                                    + Nstride1 * N_PER_E * (i1 + e_offset1);
+
+                faces->Id[k] = (i1 + e_offset1) + totalNECount;
+                faces->Tag[k] = RIGHTTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride1+Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(3,k,NN)]=node0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride1 + Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(3, k, NN)] = node0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride1+Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride1 + Nstride0;
                 }
             }
-            faceNECount+=local_NE1;
+            faceNECount += local_NE1;
         }
-        totalNECount+=NE1;
+        totalNECount += NE1;
     }
 
-    if (!periodic[1] && local_NE1>0) {
-        // **  elements on boundary 010 (x2=0):
+    if (!periodic1 && local_NE1 > 0) {
+        // ** elements on boundary 010 (x2=0)
         if (e_offset1 == 0) {
 #pragma omp parallel for
-            for (index_t i0=0; i0<local_NE0; i0++) {
-                const dim_t k = i0+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0);
-                out->FaceElements->Id[k] = e_offset0+i0+totalNECount;
-                out->FaceElements->Tag[k] = 10;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0);
+                faces->Id[k] = e_offset0 + i0 + totalNECount;
+                faces->Tag[k] = BOTTOMTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+Nstride1+Nstride0;
-                    eNodes[INDEX2(3,k,NN)]=node0+Nstride1;
+                    eNodes[INDEX2(0, k, NN)] = node0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride1 + Nstride0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + Nstride1;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride0;
                 }
             }
-            faceNECount+=local_NE0;
+            faceNECount += local_NE0;
         }
-        totalNECount+=NE0;
-
-        // **  elements on boundary 020 (x2=1):
-        if (local_NE1+e_offset1 == NE1) {
+        totalNECount += NE0;
+        // ** elements on boundary 020 (x2=1)
+        if (local_NE1 + e_offset1 == NE1) {
 #pragma omp parallel for
-            for (index_t i0=0; i0<local_NE0; i0++) {
-                const dim_t k = i0+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
-                                    + Nstride1*N_PER_E*(NE1-1);
-
-                out->FaceElements->Id[k] = i0+e_offset0+totalNECount;
-                out->FaceElements->Tag[k] = 20;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0)
+                                    + Nstride1 * N_PER_E * (NE1 - 1);
+
+                faces->Id[k] = i0 + e_offset0 + totalNECount;
+                faces->Tag[k] = TOPTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride1+Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(2,k,NN)]=node0;
-                    eNodes[INDEX2(3,k,NN)]=node0+Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride1 + Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(2, k, NN)] = node0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + Nstride0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+Nstride1+Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+Nstride1;
+                    eNodes[INDEX2(0, k, NN)] = node0 + Nstride1 + Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + Nstride1;
                 }
             }
-            faceNECount+=local_NE0;
+            faceNECount += local_NE0;
         }
-        totalNECount+=NE0;
+        totalNECount += NE0;
     }
 
     // add tag names
-    out->addTagMap("top", 20);
-    out->addTagMap("bottom", 10);
-    out->addTagMap("left", 1);
-    out->addTagMap("right", 2);
+    out->setTagMap("top", TOPTAG);
+    out->setTagMap("bottom", BOTTOMTAG);
+    out->setTagMap("left", LEFTTAG);
+    out->setTagMap("right", RIGHTTAG);
 
     // prepare mesh for further calculations
     out->resolveNodeIds();
-    if (noError()) {
-        out->prepare(optimize);
-    }
-    if (!noError()) {
-        delete out;
-        out=NULL;
-    }
-
-    return out;
+    out->prepare(optimize);
+    return out->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_rec8.cpp b/finley/src/Mesh_rec8.cpp
index 4c2d6cb..9a42c44 100644
--- a/finley/src/Mesh_rec8.cpp
+++ b/finley/src/Mesh_rec8.cpp
@@ -14,85 +14,87 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
 
-/****************************************************************************
+#include <escript/index.h>
 
-  Finley: generates rectangular meshes
-
-  Generates a numElements[0] x numElements[1] mesh with second order elements
-  (Rec8) in the rectangle [0,Length[0]] x [0,Length[1]].
-  order is the desired accuracy of the integration scheme.
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "RectangularMesh.h"
+using escript::DataTypes::real_t;
 
 namespace finley {
 
-Mesh* RectangularMesh_Rec8(const dim_t* numElements, const double* Length,
-                           const bool* periodic, int order, int reduced_order,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool useMacroElements, bool optimize,
-                           esysUtils::JMPI& mpiInfo)
+escript::Domain_ptr FinleyDomain::createRec8(dim_t NE0, dim_t NE1,
+                                             real_t l0, real_t l1,
+                                             bool periodic0, bool periodic1,
+                                             int order, int reducedOrder,
+                                             bool useElementsOnFace,
+                                             bool useFullElementOrder,
+                                             bool useMacroElements,
+                                             bool optimize,
+                                             escript::JMPI mpiInfo)
 {
     const int N_PER_E = 2;
     const int DIM = 2;
-    dim_t Nstride0=0, Nstride1=0, local_NE0, local_NE1;
-    index_t e_offset0, e_offset1;
+    const int LEFTTAG = 1;    // boundary x1=0
+    const int RIGHTTAG = 2;   // boundary x1=1
+    const int BOTTOMTAG = 10; // boundary x2=0
+    const int TOPTAG = 20;    // boundary x2=1
+    dim_t Nstride0 = 0, Nstride1 = 0, local_NE0, local_NE1;
+    index_t e_offset0 = 0, e_offset1 = 0;
     const bool generateAllNodes = useFullElementOrder || useMacroElements;
 
-    const Esys_MPI_rank myRank = mpiInfo->rank;
+    const int myRank = mpiInfo->rank;
 
     // set up the global dimensions of the mesh
-    const dim_t NE0 = std::max(dim_t(1),numElements[0]);
-    const dim_t NE1 = std::max(dim_t(1),numElements[1]);
-    const dim_t N0 = N_PER_E*NE0+1;
-    const dim_t N1 = N_PER_E*NE1+1;
+    NE0 = std::max((dim_t)1, NE0);
+    NE1 = std::max((dim_t)1, NE1);
+    const dim_t N0 = N_PER_E*NE0 + 1;
+    const dim_t N1 = N_PER_E*NE1 + 1;
 
     // allocate mesh
     std::stringstream name;
     name << "Rectangular " << N0 << " x " << N1 << " mesh";
-    Mesh* out = new Mesh(name.str(), DIM, mpiInfo);
+    FinleyDomain* out = new FinleyDomain(name.str(), DIM, mpiInfo);
 
     const_ReferenceElementSet_ptr refPoints, refContactElements, refFaceElements, refElements;
     if (generateAllNodes) {
         if (useMacroElements) {
-            refElements.reset(new ReferenceElementSet(Rec9Macro, order, reduced_order));
+            refElements.reset(new ReferenceElementSet(Rec9Macro, order, reducedOrder));
         } else {
-            refElements.reset(new ReferenceElementSet(Rec9, order, reduced_order));
+            refElements.reset(new ReferenceElementSet(Rec9, order, reducedOrder));
         }
         if (useElementsOnFace) {
-            setError(SYSTEM_ERROR, "rich elements for Rec9 elements are not supported yet.");
+            throw escript::NotImplementedError("rich elements for Rec9 elements are not supported yet.");
         } else {
             if (useMacroElements) {
-                refFaceElements.reset(new ReferenceElementSet(Line3Macro, order, reduced_order));
+                refFaceElements.reset(new ReferenceElementSet(Line3Macro, order, reducedOrder));
             } else {
-                refFaceElements.reset(new ReferenceElementSet(Line3, order, reduced_order));
+                refFaceElements.reset(new ReferenceElementSet(Line3, order, reducedOrder));
             }
-            refContactElements.reset(new ReferenceElementSet(Line3_Contact, order, reduced_order));
+            refContactElements.reset(new ReferenceElementSet(Line3_Contact, order, reducedOrder));
         }
     } else { // !generateAllNodes
-        refElements.reset(new ReferenceElementSet(Rec8, order, reduced_order));
+        refElements.reset(new ReferenceElementSet(Rec8, order, reducedOrder));
         if (useElementsOnFace) {
-            refFaceElements.reset(new ReferenceElementSet(Rec8Face, order, reduced_order));
-            refContactElements.reset(new ReferenceElementSet(Rec8Face_Contact, order, reduced_order));
+            refFaceElements.reset(new ReferenceElementSet(Rec8Face, order, reducedOrder));
+            refContactElements.reset(new ReferenceElementSet(Rec8Face_Contact, order, reducedOrder));
         } else {
-            refFaceElements.reset(new ReferenceElementSet(Line3, order, reduced_order));
-            refContactElements.reset(new ReferenceElementSet(Line3_Contact, order, reduced_order));
+            refFaceElements.reset(new ReferenceElementSet(Line3, order, reducedOrder));
+            refContactElements.reset(new ReferenceElementSet(Line3_Contact, order, reducedOrder));
         }
     }
-    refPoints.reset(new ReferenceElementSet(Point1, order, reduced_order));
-
-    out->setPoints(new ElementFile(refPoints, mpiInfo));
+    refPoints.reset(new ReferenceElementSet(Point1, order, reducedOrder));
+    if (!refPoints->referenceElement)
+        throw FinleyException("ERRRRORRRRR!!");
+
+    ElementFile* elements = new ElementFile(refElements, mpiInfo);
+    out->setElements(elements);
+    ElementFile* faces = new ElementFile(refFaceElements, mpiInfo);
+    out->setFaceElements(faces);
     out->setContactElements(new ElementFile(refContactElements, mpiInfo));
-    out->setFaceElements(new ElementFile(refFaceElements, mpiInfo));
-    out->setElements(new ElementFile(refElements, mpiInfo));
+    out->setPoints(new ElementFile(refPoints, mpiInfo));
 
     // work out the largest dimension
-    if (N1 == std::max(N0,N1)) {
+    if (N1 == std::max(N0, N1)) {
         Nstride0 = 1;
         Nstride1 = N0;
         local_NE0 = NE0;
@@ -105,225 +107,218 @@ Mesh* RectangularMesh_Rec8(const dim_t* numElements, const double* Length,
         local_NE1 = NE1;
         e_offset1 = 0;
     }
-    const index_t offset0 = e_offset0*N_PER_E;
-    const index_t offset1 = e_offset1*N_PER_E;
-    const dim_t local_N0 = local_NE0>0 ? local_NE0*N_PER_E+1 : 0;
-    const dim_t local_N1 = local_NE1>0 ? local_NE1*N_PER_E+1 : 0;
-    dim_t NDOF0=0, NDOF1=0;
+    const index_t offset0 = e_offset0 * N_PER_E;
+    const index_t offset1 = e_offset1 * N_PER_E;
+    const dim_t local_N0 = local_NE0 > 0 ? local_NE0*N_PER_E+1 : 0;
+    const dim_t local_N1 = local_NE1 > 0 ? local_NE1*N_PER_E+1 : 0;
+    dim_t NDOF0 = 0, NDOF1 = 0;
 
     // get the number of surface elements
     dim_t NFaceElements = 0;
-    if (!periodic[0] && local_NE0>0) {
-        NDOF0=N0;
+    if (!periodic0 && local_NE0 > 0) {
+        NDOF0 = N0;
         if (e_offset0 == 0)
-            NFaceElements+=local_NE1;
-        if (local_NE0+e_offset0 == NE0)
-            NFaceElements+=local_NE1;
+            NFaceElements += local_NE1;
+        if (local_NE0 + e_offset0 == NE0)
+            NFaceElements += local_NE1;
     } else {
-        NDOF0=N0-1;
+        NDOF0 = N0 - 1;
     }
-    if (!periodic[1] && local_NE1>0) {
-        NDOF1=N1;
+
+    if (!periodic1 && local_NE1 > 0) {
+        NDOF1 = N1;
         if (e_offset1 == 0)
-            NFaceElements+=local_NE0;
-        if (local_NE1+e_offset1 == NE1)
-            NFaceElements+=local_NE0;
+            NFaceElements += local_NE0;
+        if (local_NE1 + e_offset1 == NE1)
+            NFaceElements += local_NE0;
     } else {
-        NDOF1=N1-1;
+        NDOF1 = N1 - 1;
     }
 
     // allocate tables
-    out->Nodes->allocTable(local_N0*local_N1);
-    out->Elements->allocTable(local_NE0*local_NE1);
-    out->FaceElements->allocTable(NFaceElements);
+    NodeFile* nodes = out->getNodes();
+    nodes->allocTable(local_N0 * local_N1);
+    elements->allocTable(local_NE0 * local_NE1);
+    faces->allocTable(NFaceElements);
 
     // create nodes
 #pragma omp parallel for
-    for (index_t i1=0; i1<local_N1; i1++) {
-        for (index_t i0=0; i0<local_N0; i0++) {
-            const dim_t k = i0+local_N0*i1;
-            const index_t global_i0 = i0+offset0;
-            const index_t global_i1 = i1+offset1;
-            out->Nodes->Coordinates[INDEX2(0,k,DIM)]=DBLE(global_i0)/DBLE(N0-1)*Length[0];
-            out->Nodes->Coordinates[INDEX2(1,k,DIM)]=DBLE(global_i1)/DBLE(N1-1)*Length[1];
-            out->Nodes->Id[k] = Nstride0*global_i0+Nstride1*global_i1;
-            out->Nodes->Tag[k]=0;
-            out->Nodes->globalDegreesOfFreedom[k] = Nstride0*(global_i0%NDOF0)
-                                                  + Nstride1*(global_i1%NDOF1);
+    for (index_t i1 = 0; i1 < local_N1; i1++) {
+        for (index_t i0 = 0; i0 < local_N0; i0++) {
+            const dim_t k = i0 + local_N0 * i1;
+            const index_t global_i0 = i0 + offset0;
+            const index_t global_i1 = i1 + offset1;
+            nodes->Coordinates[INDEX2(0, k, DIM)] = (real_t)global_i0 / (real_t)(N0 - 1) * l0;
+            nodes->Coordinates[INDEX2(1, k, DIM)] = (real_t)global_i1 / (real_t)(N1 - 1) * l1;
+            nodes->Id[k] = Nstride0 * global_i0 + Nstride1 * global_i1;
+            nodes->Tag[k] = 0;
+            nodes->globalDegreesOfFreedom[k] = Nstride0 * (global_i0 % NDOF0)
+                                             + Nstride1 * (global_i1 % NDOF1);
         }
     }
 
     // set the elements
-    dim_t NN = out->Elements->numNodes;
+    dim_t NN = elements->numNodes;
 #pragma omp parallel for
-    for (index_t i1=0; i1<local_NE1; i1++) {
-        for (index_t i0=0; i0<local_NE0; i0++) {
-            const dim_t k = i0+local_NE0*i1;
-            const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
-                                + Nstride1*N_PER_E*(i1+e_offset1);
-
-            out->Elements->Id[k]=(i0+e_offset0)+NE0*(i1+e_offset1);
-            out->Elements->Tag[k]=0;
-            out->Elements->Owner[k]=myRank;
-
-            out->Elements->Nodes[INDEX2(0,k,NN)]=node0;
-            out->Elements->Nodes[INDEX2(1,k,NN)]=node0+2*Nstride0;
-            out->Elements->Nodes[INDEX2(2,k,NN)]=node0+2*Nstride1+2*Nstride0;
-            out->Elements->Nodes[INDEX2(3,k,NN)]=node0+2*Nstride1;
-            out->Elements->Nodes[INDEX2(4,k,NN)]=node0+1*Nstride0;
-            out->Elements->Nodes[INDEX2(5,k,NN)]=node0+Nstride1+2*Nstride0;
-            out->Elements->Nodes[INDEX2(6,k,NN)]=node0+2*Nstride1+1*Nstride0;
-            out->Elements->Nodes[INDEX2(7,k,NN)]=node0+Nstride1;
+    for (index_t i1 = 0; i1 < local_NE1; i1++) {
+        for (index_t i0 = 0; i0 < local_NE0; i0++) {
+            const dim_t k = i0 + local_NE0 * i1;
+            const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0)
+                                + Nstride1 * N_PER_E * (i1 + e_offset1);
+
+            elements->Id[k] = (i0 + e_offset0) + NE0*(i1 + e_offset1);
+            elements->Tag[k] = 0;
+            elements->Owner[k] = myRank;
+
+            elements->Nodes[INDEX2(0, k, NN)] = node0;
+            elements->Nodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride0;
+            elements->Nodes[INDEX2(2, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+            elements->Nodes[INDEX2(3, k, NN)] = node0 + 2 * Nstride1;
+            elements->Nodes[INDEX2(4, k, NN)] = node0 + 1 * Nstride0;
+            elements->Nodes[INDEX2(5, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
+            elements->Nodes[INDEX2(6, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
+            elements->Nodes[INDEX2(7, k, NN)] = node0 + Nstride1;
             if (generateAllNodes) {
-                out->Elements->Nodes[INDEX2(8,k,NN)]=node0+1*Nstride1+1*Nstride0;
+                elements->Nodes[INDEX2(8, k, NN)] = node0 + Nstride1 + Nstride0;
             }
         }
     }
 
     // face elements
-    NN=out->FaceElements->numNodes;
-    dim_t totalNECount=NE0*NE1;
-    dim_t faceNECount=0;
-    index_t* eNodes = out->FaceElements->Nodes;
+    NN = faces->numNodes;
+    dim_t totalNECount = NE0 * NE1;
+    dim_t faceNECount = 0;
+    index_t* eNodes = faces->Nodes;
 
-    if (!periodic[0] && local_NE0>0) {
-        // **  elements on boundary 001 (x1=0):
+    if (!periodic0 && local_NE0 > 0) {
+        // ** elements on boundary 001 (x1=0)
         if (e_offset0 == 0) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                const dim_t k = i1+faceNECount;
-                const index_t node0 = Nstride1*N_PER_E*(i1+e_offset1);
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride1 * N_PER_E * (i1 + e_offset1);
 
-                out->FaceElements->Id[k]=i1+e_offset1+totalNECount;
-                out->FaceElements->Tag[k]=1;
-                out->FaceElements->Owner[k]=myRank;
+                faces->Id[k] = i1 + e_offset1 + totalNECount;
+                faces->Tag[k] = LEFTTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(1,k,NN)]=node0;
-                    eNodes[INDEX2(2,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(3,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(4,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(5,k,NN)]=node0+1*Nstride0;
-                    eNodes[INDEX2(6,k,NN)]=node0+Nstride1+2*Nstride0;
-                    eNodes[INDEX2(7,k,NN)]=node0+2*Nstride1+1*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(1, k, NN)] = node0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(4, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(5, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(6, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(7, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(1,k,NN)]=node0;
-                    eNodes[INDEX2(2,k,NN)]=node0+Nstride1;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(1, k, NN)] = node0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride1;
                 }
             }
-            faceNECount+=local_NE1;
+            faceNECount += local_NE1;
         }
-        totalNECount+=NE1;
-
-        // **  elements on boundary 002 (x1=1):
-        if (local_NE0+e_offset0 == NE0) {
+        totalNECount += NE1;
+        // ** elements on boundary 002 (x1=1)
+        if (local_NE0 + e_offset0 == NE0) {
 #pragma omp parallel for
-            for (index_t i1=0; i1<local_NE1; i1++) {
-                const dim_t k = i1+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(NE0-1)
-                                    + Nstride1*N_PER_E*(i1+e_offset1);
-
-                out->FaceElements->Id[k] = (i1+e_offset1)+totalNECount;
-                out->FaceElements->Tag[k] = 2;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i1 = 0; i1 < local_NE1; i1++) {
+                const dim_t k = i1 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (NE0 - 1)
+                                    + Nstride1 * N_PER_E * (i1 + e_offset1);
+
+                faces->Id[k] = (i1 + e_offset1) + totalNECount;
+                faces->Tag[k] = RIGHTTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(3,k,NN)]=node0;
-                    eNodes[INDEX2(4,k,NN)]=node0+Nstride1+2*Nstride0;
-                    eNodes[INDEX2(5,k,NN)]=node0+2*Nstride1+1*Nstride0;
-                    eNodes[INDEX2(6,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(7,k,NN)]=node0+1*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(3, k, NN)] = node0;
+                    eNodes[INDEX2(4, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(5, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
+                    eNodes[INDEX2(6, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(7, k, NN)] = node0 + Nstride0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+Nstride1+2*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
                 }
             }
-            faceNECount+=local_NE1;
+            faceNECount += local_NE1;
         }
-        totalNECount+=NE1;
+        totalNECount += NE1;
     }
-    if (!periodic[1] && local_NE1>0) {
-        // **  elements on boundary 010 (x2=0):
+
+    if (!periodic1 && local_NE1 > 0) {
+        // ** elements on boundary 010 (x2=0)
         if (e_offset1 == 0) {
 #pragma omp parallel for
-            for (index_t i0=0; i0<local_NE0; i0++) {
-                const dim_t k = i0+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0);
-
-                out->FaceElements->Id[k] = e_offset0+i0+totalNECount;
-                out->FaceElements->Tag[k] = 10;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0);
+                faces->Id[k] = e_offset0 + i0 + totalNECount;
+                faces->Tag[k] = BOTTOMTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(3,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(4,k,NN)]=node0+1*Nstride0;
-                    eNodes[INDEX2(5,k,NN)]=node0+Nstride1+2*Nstride0;
-                    eNodes[INDEX2(6,k,NN)]=node0+2*Nstride1+1*Nstride0;
-                    eNodes[INDEX2(7,k,NN)]=node0+Nstride1;
+                    eNodes[INDEX2(0, k, NN)] = node0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(4, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(5, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(6, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
+                    eNodes[INDEX2(7, k, NN)] = node0 + Nstride1;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(2,k,NN)]=node0+1*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(2, k, NN)] = node0 + Nstride0;
                 }
             }
-            faceNECount+=local_NE0;
+            faceNECount += local_NE0;
         }
-        totalNECount+=NE0;
-
-        // **  elements on boundary 020 (x2=1):
-        if (local_NE1+e_offset1 == NE1) {
+        totalNECount += NE0;
+        // ** elements on boundary 020 (x2=1)
+        if (local_NE1 + e_offset1 == NE1) {
 #pragma omp parallel for
-            for (index_t i0=0; i0<local_NE0; i0++) {
-                const dim_t k = i0+faceNECount;
-                const index_t node0 = Nstride0*N_PER_E*(i0+e_offset0)
-                                    + Nstride1*N_PER_E*(NE1-1);
-
-                out->FaceElements->Id[k] = i0+e_offset0+totalNECount;
-                out->FaceElements->Tag[k] = 20;
-                out->FaceElements->Owner[k] = myRank;
+            for (index_t i0 = 0; i0 < local_NE0; i0++) {
+                const dim_t k = i0 + faceNECount;
+                const index_t node0 = Nstride0 * N_PER_E * (i0 + e_offset0)
+                                    + Nstride1 * N_PER_E * (NE1 - 1);
+
+                faces->Id[k] = i0 + e_offset0 + totalNECount;
+                faces->Tag[k] = TOPTAG;
+                faces->Owner[k] = myRank;
                 if (useElementsOnFace) {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(2,k,NN)]=node0;
-                    eNodes[INDEX2(3,k,NN)]=node0+2*Nstride0;
-                    eNodes[INDEX2(4,k,NN)]=node0+2*Nstride1+1*Nstride0;
-                    eNodes[INDEX2(5,k,NN)]=node0+Nstride1;
-                    eNodes[INDEX2(6,k,NN)]=node0+1*Nstride0;
-                    eNodes[INDEX2(7,k,NN)]=node0+Nstride1+2*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(2, k, NN)] = node0;
+                    eNodes[INDEX2(3, k, NN)] = node0 + 2 * Nstride0;
+                    eNodes[INDEX2(4, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
+                    eNodes[INDEX2(5, k, NN)] = node0 + Nstride1;
+                    eNodes[INDEX2(6, k, NN)] = node0 + Nstride0;
+                    eNodes[INDEX2(7, k, NN)] = node0 + Nstride1 + 2 * Nstride0;
                 } else {
-                    eNodes[INDEX2(0,k,NN)]=node0+2*Nstride1+2*Nstride0;
-                    eNodes[INDEX2(1,k,NN)]=node0+2*Nstride1;
-                    eNodes[INDEX2(2,k,NN)]=node0+2*Nstride1+1*Nstride0;
+                    eNodes[INDEX2(0, k, NN)] = node0 + 2 * Nstride1 + 2 * Nstride0;
+                    eNodes[INDEX2(1, k, NN)] = node0 + 2 * Nstride1;
+                    eNodes[INDEX2(2, k, NN)] = node0 + 2 * Nstride1 + Nstride0;
                 }
             }
-            faceNECount+=local_NE0;
+            faceNECount += local_NE0;
         }
-        totalNECount+=NE0;
+        totalNECount += NE0;
     }
 
     // add tag names
-    out->addTagMap("top", 20);
-    out->addTagMap("bottom", 10);
-    out->addTagMap("left", 1);
-    out->addTagMap("right", 2);
+    out->setTagMap("top", TOPTAG);
+    out->setTagMap("bottom", BOTTOMTAG);
+    out->setTagMap("left", LEFTTAG);
+    out->setTagMap("right", RIGHTTAG);
 
     // prepare mesh for further calculations
     out->resolveNodeIds();
-    if (noError()) {
-        out->prepare(optimize);
-    }
-    if (!noError()) {
-        delete out;
-        out=NULL;
-    }
-
-    return out;
+    out->prepare(optimize);
+    return out->getPtr();
 }
 
 } // namespace finley
diff --git a/finley/src/Mesh_write.cpp b/finley/src/Mesh_write.cpp
index 169b15e..19cc2b2 100644
--- a/finley/src/Mesh_write.cpp
+++ b/finley/src/Mesh_write.cpp
@@ -14,19 +14,12 @@
 *
 *****************************************************************************/
 
+#include "FinleyDomain.h"
 
-/*****************************************************************************/
-
-/*   Finley: write Mesh in finley file format */
-
-/*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-
-#include "Mesh.h"
+#include <escript/index.h>
 
 #include <iomanip>
+#include <iostream>
 
 using std::cout;
 using std::endl;
@@ -37,17 +30,17 @@ using std::string;
 namespace finley {
 
 // private
-void Mesh::writeElementInfo(std::ostream& stream, const ElementFile* e,
-                            const string defaultType) const
+void FinleyDomain::writeElementInfo(std::ostream& stream, const ElementFile* e,
+                                    const string& defaultType) const
 {
     if (e != NULL) {
         stream << e->referenceElementSet->referenceElement->Type->Name
           << " " << e->numElements << endl;
         const int NN = e->numNodes;
-        for (index_t i=0; i < e->numElements; i++) {
+        for (index_t i = 0; i < e->numElements; i++) {
             stream << e->Id[i] << " " << e->Tag[i];
-            for (int j=0; j<NN; j++)
-                stream << " " << Nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
+            for (int j = 0; j < NN; j++)
+                stream << " " << m_nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
             stream << endl;
         }
     } else {
@@ -56,13 +49,13 @@ void Mesh::writeElementInfo(std::ostream& stream, const ElementFile* e,
 }
 
 // private
-void Mesh::printElementInfo(const ElementFile* e, const string title,
-                            const string defaultType, bool full) const
+void FinleyDomain::printElementInfo(const ElementFile* e, const string& title,
+                                    const string& defaultType, bool full) const
 {
     if (e != NULL) {
-        dim_t mine=0, overlap=0;
-        for (index_t i=0; i < e->numElements; i++) {
-            if (e->Owner[i] == MPIInfo->rank)
+        dim_t mine = 0, overlap = 0;
+        for (index_t i = 0; i < e->numElements; i++) {
+            if (e->Owner[i] == m_mpiInfo->rank)
                 mine++;
             else
                 overlap++;
@@ -75,13 +68,13 @@ void Mesh::printElementInfo(const ElementFile* e, const string title,
         if (full) {
             const int NN = e->numNodes;
             cout << "\t     Id   Tag Owner Color:  Nodes" << endl;
-            for (index_t i=0; i < e->numElements; i++) {
+            for (index_t i = 0; i < e->numElements; i++) {
                 cout << "\t" << setw(7) << e->Id[i]
                      << setw(6) << e->Tag[i]
                      << setw(6) << e->Owner[i]
                      << setw(6) << e->Color[i] << ": ";
-                for (int j=0; j<NN; j++)
-                    cout << setw(6) << Nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
+                for (int j = 0; j < NN; j++)
+                    cout << setw(6) << m_nodes->Id[e->Nodes[INDEX2(j,i,NN)]];
                 cout << endl;
             }
         }
@@ -90,38 +83,34 @@ void Mesh::printElementInfo(const ElementFile* e, const string title,
     }
 }
 
-                             
-/// writes the mesh to an external file using the 'fly' file format
-void Mesh::write(const string filename) const
+
+void FinleyDomain::write(const string& filename) const
 {
-    if (MPIInfo->size >1) {
-        setError(IO_ERROR, "Mesh::write: only single rank runs are supported.");
-        return;
-    }
+    if (m_mpiInfo->size > 1)
+        throw escript::NotImplementedError("FinleyDomain::write: only single rank "
+                                           "runs are supported.");
 
     std::ofstream f(filename.c_str());
     if (!f.is_open()) {
         std::stringstream ss;
-        ss << "Mesh::write: Opening file " << filename << " for writing failred.";
-        string err(ss.str());
-        setError(IO_ERROR, err.c_str());
-        return;
+        ss << "FinleyDomain::write: Opening file " << filename << " for writing failed";
+        throw escript::IOError(ss.str());
     }
 
     // write header
     f << m_name << endl;
 
     // write nodes
-    if (Nodes != NULL) {
+    if (m_nodes != NULL) {
         const int numDim = getDim();
-        f << numDim << "D-Nodes " << Nodes->numNodes << endl;
-        for (index_t i=0; i<Nodes->numNodes; i++) {
-            f << Nodes->Id[i] << " " << Nodes->globalDegreesOfFreedom[i]
-              << " " << Nodes->Tag[i];
+        f << numDim << "D-Nodes " << m_nodes->getNumNodes() << endl;
+        for (index_t i = 0; i < m_nodes->getNumNodes(); i++) {
+            f << m_nodes->Id[i] << " " << m_nodes->globalDegreesOfFreedom[i]
+              << " " << m_nodes->Tag[i];
             f.setf(ios::scientific, ios::floatfield);
             f.precision(15);
-            for (int j=0; j<numDim; j++)
-                f << " " << Nodes->Coordinates[INDEX2(j,i,numDim)];
+            for (int j = 0; j < numDim; j++)
+                f << " " << m_nodes->Coordinates[INDEX2(j,i,numDim)];
             f << endl;
         }
     } else {
@@ -129,35 +118,36 @@ void Mesh::write(const string filename) const
     }
 
     // write elements
-    writeElementInfo(f, Elements, "Tet4");
+    writeElementInfo(f, m_elements, "Tet4");
 
     // write face elements
-    writeElementInfo(f, FaceElements, "Tri3");
+    writeElementInfo(f, m_faceElements, "Tri3");
 
     // write contact elements
-    writeElementInfo(f, ContactElements, "Tri3_Contact");
+    writeElementInfo(f, m_contactElements, "Tri3_Contact");
 
     // write points
-    writeElementInfo(f, Points, "Point1");
+    writeElementInfo(f, m_points, "Point1");
 
     // write tags
-    if (tagMap.size() > 0) {
+    if (m_tagMap.size() > 0) {
         f <<  "Tags" << endl;
         TagMap::const_iterator it;
-        for (it=tagMap.begin(); it!=tagMap.end(); it++) {
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
             f << it->first << " " << it->second << endl;
         }
     }
+    f << endl;
     f.close();
 #ifdef Finley_TRACE
     cout << "mesh " << m_name << " has been written to file " << filename << endl;
 #endif
 }
 
-void Mesh::printInfo(bool full)
+void FinleyDomain::Print_Mesh_Info(bool full) const
 {
-    cout << "PrintMesh_Info running on CPU " << MPIInfo->rank << " of "
-              << MPIInfo->size << endl;
+    cout << "PrintMeshInfo running on CPU " << m_mpiInfo->rank << " of "
+              << m_mpiInfo->size << endl;
     cout << "\tMesh name '" << m_name << "'\n";
     cout << "\tApproximation order " << approximationOrder << endl;
     cout << "\tReduced Approximation order " <<reducedApproximationOrder << endl;
@@ -165,22 +155,22 @@ void Mesh::printInfo(bool full)
     cout << "\tReduced Integration order " << reducedIntegrationOrder << endl;
 
     // write nodes
-    if (Nodes != NULL) {
+    if (m_nodes != NULL) {
         const int numDim = getDim();
-        cout << "\tNodes: " << numDim << "D-Nodes " << Nodes->numNodes << endl;
+        cout << "\tNodes: " << numDim << "D-Nodes " << m_nodes->getNumNodes() << endl;
         if (full) {
             cout << "\t     Id   Tag  gDOF   gNI grDfI  grNI:  Coordinates\n";
-            for (index_t i=0; i < Nodes->numNodes; i++) {
-                cout << "\t" << setw(7) << Nodes->Id[i]
-                     << setw(6) << Nodes->Tag[i]
-                     << setw(6) << Nodes->globalDegreesOfFreedom[i]
-                     << setw(6) << Nodes->globalNodesIndex[i]
-                     << setw(6) << Nodes->globalReducedDOFIndex[i]
-                     << setw(6) << Nodes->globalReducedNodesIndex[i] << ": ";
+            for (index_t i = 0; i < m_nodes->getNumNodes(); i++) {
+                cout << "\t" << setw(7) << m_nodes->Id[i]
+                     << setw(6) << m_nodes->Tag[i]
+                     << setw(6) << m_nodes->globalDegreesOfFreedom[i]
+                     << setw(6) << m_nodes->globalNodesIndex[i]
+                     << setw(6) << m_nodes->globalReducedDOFIndex[i]
+                     << setw(6) << m_nodes->globalReducedNodesIndex[i] << ": ";
                 cout.setf(ios::scientific, ios::floatfield);
                 cout.precision(15);
-                for (int j=0; j<numDim; j++)
-                    cout << " " << Nodes->Coordinates[INDEX2(j,i,numDim)];
+                for (int j = 0; j < numDim; j++)
+                    cout << " " << m_nodes->Coordinates[INDEX2(j,i,numDim)];
                 cout << endl;
             }
         }
@@ -189,22 +179,22 @@ void Mesh::printInfo(bool full)
     }
 
     // write elements
-    printElementInfo(Elements, "Elements", "Tet4", full);
+    printElementInfo(m_elements, "Elements", "Tet4", full);
 
     // write face elements
-    printElementInfo(FaceElements, "Face elements", "Tri3", full);
+    printElementInfo(m_faceElements, "Face elements", "Tri3", full);
 
     // write contact elements
-    printElementInfo(ContactElements, "Contact elements", "Tri3_Contact", full);
+    printElementInfo(m_contactElements, "Contact elements", "Tri3_Contact", full);
 
     // write points
-    printElementInfo(Points, "Points", "Point1", full);
+    printElementInfo(m_points, "Points", "Point1", full);
 
     // write tags
-    if (tagMap.size() > 0) {
+    if (m_tagMap.size() > 0) {
         cout << "\tTags:\n";
         TagMap::const_iterator it;
-        for (it=tagMap.begin(); it!=tagMap.end(); it++) {
+        for (it = m_tagMap.begin(); it != m_tagMap.end(); it++) {
             cout << "\t" << setw(7) << it->second << " " << it->first << endl;
         }
     }
diff --git a/finley/src/NodeFile.cpp b/finley/src/NodeFile.cpp
index 6fb1627..541ec80 100644
--- a/finley/src/NodeFile.cpp
+++ b/finley/src/NodeFile.cpp
@@ -14,25 +14,39 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Finley: NodeFile
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "NodeFile.h"
+
 #include <escript/Data.h>
+#include <escript/index.h>
 
 #include <limits>
 #include <sstream>
+#include <iostream>
 
 namespace finley {
 
 // helper function
+static std::pair<index_t,index_t> getGlobalRange(dim_t n, const index_t* id,
+                                                 escript::JMPI mpiInfo)
+{
+    std::pair<index_t,index_t> result(util::getMinMaxInt(1, n, id));
+
+#ifdef ESYS_MPI
+    index_t global_id_range[2];
+    index_t id_range[2] = { -result.first, result.second };
+    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX,
+                  mpiInfo->comm);
+    result.first = -global_id_range[0];
+    result.second = global_id_range[1];
+#endif
+    if (result.second < result.first) {
+        result.first = -1;
+        result.second = 0;
+    }
+    return result;
+}
+
+// helper function
 static void scatterEntries(dim_t n, const index_t* index, index_t min_index,
                            index_t max_index, index_t* Id_out,
                            const index_t* Id_in,
@@ -86,8 +100,9 @@ static void gatherEntries(dim_t n, const index_t* index,
 
 /// constructor
 /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
-NodeFile::NodeFile(int nDim, esysUtils::JMPI& mpiInfo) :
+NodeFile::NodeFile(int nDim, escript::JMPI mpiInfo) :
     numNodes(0),
+    MPIInfo(mpiInfo),
     numDim(nDim),
     Id(NULL),
     Tag(NULL),
@@ -101,23 +116,20 @@ NodeFile::NodeFile(int nDim, esysUtils::JMPI& mpiInfo) :
     reducedDegreesOfFreedomId(NULL),
     status(FINLEY_INITIAL_STATUS)
 {
-    MPIInfo = mpiInfo;
 }
 
-/// destructor
 NodeFile::~NodeFile()
 {
     freeTable();
 }
 
-/// allocates the node table within this node file to hold NN nodes.
 void NodeFile::allocTable(dim_t NN)
 {
     if (numNodes > 0)
         freeTable();
 
     Id = new index_t[NN];
-    Coordinates = new double[NN*numDim];
+    Coordinates = new escript::DataTypes::real_t[NN*numDim];
     Tag = new int[NN];
     globalDegreesOfFreedom = new index_t[NN];
     globalReducedDOFIndex = new index_t[NN];
@@ -126,7 +138,7 @@ void NodeFile::allocTable(dim_t NN)
     reducedNodesId = new index_t[NN];
     degreesOfFreedomId = new index_t[NN];
     reducedDegreesOfFreedomId = new index_t[NN];
-    numNodes=NN;
+    numNodes = NN;
 
     // this initialization makes sure that data are located on the right
     // processor
@@ -134,7 +146,7 @@ void NodeFile::allocTable(dim_t NN)
     for (index_t n=0; n<numNodes; n++) {
         Id[n] = -1;
         for (int i=0; i<numDim; i++)
-            Coordinates[INDEX2(i,n,numDim)]=0.;
+            Coordinates[INDEX2(i,n,numDim)] = 0.;
         Tag[n] = -1;
         globalDegreesOfFreedom[n] = -1;
         globalReducedDOFIndex[n] = -1;
@@ -146,7 +158,6 @@ void NodeFile::allocTable(dim_t NN)
     }
 }
 
-/// frees the node table within this node file
 void NodeFile::freeTable()
 {
     delete[] Id;
@@ -168,10 +179,17 @@ void NodeFile::freeTable()
     reducedNodesDistribution.reset();
     degreesOfFreedomDistribution.reset();
     reducedDegreesOfFreedomDistribution.reset();
+#ifdef ESYS_HAVE_PASO
     degreesOfFreedomConnector.reset();
     reducedDegreesOfFreedomConnector.reset();
-
-    numNodes=0;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    trilinosRowMap.reset();
+    trilinosReducedRowMap.reset();
+    trilinosColMap.reset();
+    trilinosReducedColMap.reset();
+#endif
+    numNodes = 0;
 }
 
 void NodeFile::print() const
@@ -193,28 +211,53 @@ void NodeFile::print() const
     }
 }
 
+std::pair<index_t,index_t> NodeFile::getDOFRange() const
+{
+    std::pair<index_t,index_t> result(util::getMinMaxInt(
+                                        1, numNodes, globalDegreesOfFreedom));
+    if (result.second < result.first) {
+        result.first = -1;
+        result.second = 0;
+    }
+    return result;
+}
+
+std::pair<index_t,index_t> NodeFile::getGlobalIdRange() const
+{
+    return getGlobalRange(numNodes, Id, MPIInfo);
+}
+
+std::pair<index_t,index_t> NodeFile::getGlobalDOFRange() const
+{
+    return getGlobalRange(numNodes, globalDegreesOfFreedom, MPIInfo);
+}
+
+std::pair<index_t,index_t> NodeFile::getGlobalNodeIDIndexRange() const
+{
+    return getGlobalRange(numNodes, globalNodesIndex, MPIInfo);
+}
+
 /// copies the array newX into this->coordinates
 void NodeFile::setCoordinates(const escript::Data& newX)
 {
-    if (newX.getDataPointSize() != numDim)  {
+    if (newX.getDataPointSize() != numDim) {
         std::stringstream ss;
         ss << "NodeFile::setCoordinates: number of dimensions of new "
             "coordinates has to be " << numDim;
-        const std::string errorMsg(ss.str());
-        setError(VALUE_ERROR, errorMsg.c_str());
+        throw escript::ValueError(ss.str());
     } else if (newX.getNumDataPointsPerSample() != 1 ||
             newX.getNumSamples() != numNodes) {
         std::stringstream ss;
         ss << "NodeFile::setCoordinates: number of given nodes must be "
             << numNodes;
-        const std::string errorMsg(ss.str());
-        setError(VALUE_ERROR, errorMsg.c_str());
+        throw escript::ValueError(ss.str());
     } else {
-        const size_t numDim_size=numDim*sizeof(double);
+        const size_t numDim_size = numDim * sizeof(double);
         ++status;
 #pragma omp parallel for
-        for (index_t n=0; n<numNodes; n++) {
-            memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
+        for (index_t n = 0; n < numNodes; n++) {
+            memcpy(&Coordinates[INDEX2(0, n, numDim)],
+                    newX.getSampleDataRO(n), numDim_size);
         }
     }
 }
@@ -222,102 +265,31 @@ void NodeFile::setCoordinates(const escript::Data& newX)
 /// sets tags to newTag where mask>0
 void NodeFile::setTags(int newTag, const escript::Data& mask)
 {
-    resetError();
-
     if (1 != mask.getDataPointSize()) {
-       setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
-       return;
+        throw escript::ValueError("NodeFile::setTags: number of components of mask must be 1.");
     } else if (mask.getNumDataPointsPerSample() != 1 ||
             mask.getNumSamples() != numNodes) {
-       setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
-       return;
+        throw escript::ValueError("NodeFile::setTags: illegal number of samples of mask Data object");
     }
 
 #pragma omp parallel for
-    for (index_t n=0; n<numNodes; n++) {
-         if (mask.getSampleDataRO(n)[0] > 0)
-             Tag[n]=newTag;
+    for (index_t n = 0; n < numNodes; n++) {
+        if (mask.getSampleDataRO(n)[0] > 0)
+            Tag[n] = newTag;
     }
     updateTagList();
 }
 
-std::pair<index_t,index_t> NodeFile::getDOFRange() const
-{
-    std::pair<index_t,index_t> result(util::getMinMaxInt(
-                                        1, numNodes, globalDegreesOfFreedom));
-    if (result.second < result.first) {
-        result.first = -1;
-        result.second = 0;
-    }
-    return result;
-}
-
-std::pair<index_t,index_t> NodeFile::getGlobalIdRange() const
-{
-    std::pair<index_t,index_t> result(util::getMinMaxInt(1, numNodes, Id));
-
-#ifdef ESYS_MPI
-    index_t global_id_range[2];
-    index_t id_range[2] = { -result.first, result.second };
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, MPIInfo->comm);
-    result.first = -global_id_range[0];
-    result.second = global_id_range[1];
-#endif
-    if (result.second < result.first) {
-        result.first = -1;
-        result.second = 0;
-    }
-    return result;
-}
-
-std::pair<index_t,index_t> NodeFile::getGlobalDOFRange() const
-{
-    std::pair<index_t,index_t> result(util::getMinMaxInt(
-                                        1, numNodes, globalDegreesOfFreedom));
-
-#ifdef ESYS_MPI
-    index_t global_id_range[2];
-    index_t id_range[2] = { -result.first, result.second };
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, MPIInfo->comm);
-    result.first = -global_id_range[0];
-    result.second = global_id_range[1];
-#endif
-    if (result.second < result.first) {
-        result.first = -1;
-        result.second = 0;
-    }
-    return result;
-}
-
-std::pair<index_t,index_t> NodeFile::getGlobalNodeIDIndexRange() const
-{
-    std::pair<index_t,index_t> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
-
-#ifdef ESYS_MPI
-    index_t global_id_range[2];
-    index_t id_range[2] = { -result.first, result.second };
-    MPI_Allreduce(id_range, global_id_range, 2, MPI_DIM_T, MPI_MAX, MPIInfo->comm);
-    result.first = -global_id_range[0];
-    result.second = global_id_range[1];
-#endif
-    if (result.second < result.first) {
-        result.first = -1;
-        result.second = 0;
-    }
-    return result;
-}
 
 void NodeFile::copyTable(index_t offset, index_t idOffset, index_t dofOffset,
                          const NodeFile* in)
 {
     // check number of dimensions and table size
     if (numDim != in->numDim) {
-        setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
-        return;
+        throw escript::ValueError("NodeFile::copyTable: dimensions of node files don't match");
     }
     if (numNodes < in->numNodes+offset) {
-        setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
-        return;
+        throw escript::ValueError("NodeFile::copyTable: node table is too small.");
     }
 
 #pragma omp parallel for
@@ -352,54 +324,54 @@ void NodeFile::gather(const index_t* index, const NodeFile* in)
             numDim, Coordinates, in->Coordinates);
 }
 
-void NodeFile::gather_global(const index_t *index, const NodeFile* in)
+void NodeFile::gather_global(const index_t* index, const NodeFile* in)
 {
     // get the global range of node ids
     const std::pair<index_t,index_t> id_range(in->getGlobalIdRange());
-    const index_t undefined_node=id_range.first-1;
+    const index_t undefined_node = id_range.first-1;
     std::vector<index_t> distribution(in->MPIInfo->size+1);
 
     // distribute the range of node ids
-    index_t buffer_len=in->MPIInfo->setDistribution(id_range.first, id_range.second, &distribution[0]);
+    index_t buffer_len = in->MPIInfo->setDistribution(id_range.first, id_range.second, &distribution[0]);
 
     // allocate buffers
-    index_t *Id_buffer=new index_t[buffer_len];
-    int *Tag_buffer=new int[buffer_len];
-    index_t *globalDegreesOfFreedom_buffer=new index_t[buffer_len];
-    double *Coordinates_buffer=new double[buffer_len*numDim];
+    index_t* Id_buffer = new index_t[buffer_len];
+    int* Tag_buffer = new int[buffer_len];
+    index_t* globalDegreesOfFreedom_buffer = new index_t[buffer_len];
+    double* Coordinates_buffer = new double[buffer_len*numDim];
 
     // fill Id_buffer by the undefined_node marker to check if nodes
     // are defined
 #pragma omp parallel for
-    for (index_t n=0; n<buffer_len; n++)
-        Id_buffer[n]=undefined_node;
+    for (index_t n = 0; n < buffer_len; n++)
+        Id_buffer[n] = undefined_node;
 
     // fill the buffer by sending portions around in a circle
 #ifdef ESYS_MPI
     MPI_Status status;
-    int dest=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank+1);
-    int source=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank-1);
+    int dest = in->MPIInfo->mod_rank(in->MPIInfo->rank+1);
+    int source = in->MPIInfo->mod_rank(in->MPIInfo->rank-1);
 #endif
-    int buffer_rank=in->MPIInfo->rank;
+    int buffer_rank = in->MPIInfo->rank;
     for (int p=0; p<in->MPIInfo->size; ++p) {
         if (p>0) { // the initial send can be skipped
 #ifdef ESYS_MPI
             MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_DIM_T, dest,
-                    in->MPIInfo->msg_tag_counter, source,
-                    in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
+                    in->MPIInfo->counter(), source,
+                    in->MPIInfo->counter(), in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
-                    in->MPIInfo->msg_tag_counter+1, source,
-                    in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
+                    in->MPIInfo->counter()+1, source,
+                    in->MPIInfo->counter()+1, in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
-                    MPI_DIM_T, dest, in->MPIInfo->msg_tag_counter+2, source,
-                    in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
+                    MPI_DIM_T, dest, in->MPIInfo->counter()+2, source,
+                    in->MPIInfo->counter()+2, in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
-                    MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
-                    in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
+                    MPI_DOUBLE, dest, in->MPIInfo->counter()+3, source,
+                    in->MPIInfo->counter()+3, in->MPIInfo->comm, &status);
+	        in->MPIInfo->incCounter(4);
 #endif
-	    ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
         }
-        buffer_rank=esysUtils::mod_rank(in->MPIInfo->size, buffer_rank-1);
+        buffer_rank=in->MPIInfo->mod_rank(buffer_rank-1);
         scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
                 distribution[buffer_rank+1], Id_buffer, in->Id,
                 Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
@@ -409,8 +381,8 @@ void NodeFile::gather_global(const index_t *index, const NodeFile* in)
     // now entries are collected from the buffer again by sending the
     // entries around in a circle
 #ifdef ESYS_MPI
-    dest=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank+1);
-    source=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank-1);
+    dest = in->MPIInfo->mod_rank(in->MPIInfo->rank+1);
+    source = in->MPIInfo->mod_rank(in->MPIInfo->rank-1);
 #endif
     buffer_rank=in->MPIInfo->rank;
     for (int p=0; p<in->MPIInfo->size; ++p) {
@@ -421,59 +393,66 @@ void NodeFile::gather_global(const index_t *index, const NodeFile* in)
         if (p < in->MPIInfo->size-1) { // the last send can be skipped
 #ifdef ESYS_MPI
             MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_DIM_T, dest,
-                    in->MPIInfo->msg_tag_counter, source,
-                    in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
+                    in->MPIInfo->counter(), source,
+                    in->MPIInfo->counter(), in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
-                    in->MPIInfo->msg_tag_counter+1, source,
-                    in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
+                    in->MPIInfo->counter()+1, source,
+                    in->MPIInfo->counter()+1, in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
-                    MPI_DIM_T, dest, in->MPIInfo->msg_tag_counter+2, source,
-                    in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
+                    MPI_DIM_T, dest, in->MPIInfo->counter()+2, source,
+                    in->MPIInfo->counter()+2, in->MPIInfo->comm, &status);
             MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
-                    MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
-                    in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
+                    MPI_DOUBLE, dest, in->MPIInfo->counter()+3, source,
+                    in->MPIInfo->counter()+3, in->MPIInfo->comm, &status);
+            in->MPIInfo->incCounter(4);
 #endif
-            ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
         }
-        buffer_rank=esysUtils::mod_rank(in->MPIInfo->size, buffer_rank-1);
+        buffer_rank=in->MPIInfo->mod_rank(buffer_rank-1);
     }
+#if DOASSERT
     // check if all nodes are set:
+    index_t err=-1;
 #pragma omp parallel for
     for (index_t n=0; n<numNodes; ++n) {
         if (Id[n] == undefined_node) {
-            std::stringstream ss;
-            ss << "NodeFile::gather_global: Node id " << Id[n]
-                << " at position " << n << " is referenced but not defined.";
-            const std::string errorMsg(ss.str());
-            setError(VALUE_ERROR, errorMsg.c_str());
+#pragma omp critical
+            err=n;
         }
     }
+    if (err>=0) {
+        std::stringstream ss;
+        ss << "NodeFile::gather_global: Node id " << Id[err]
+            << " at position " << err << " is referenced but not defined.";
+        const std::string errorMsg(ss.str());
+        throw escript::AssertException(errorMsg);
+    }
+#endif // DOASSERT
     delete[] Id_buffer;
     delete[] Tag_buffer;
     delete[] globalDegreesOfFreedom_buffer;
     delete[] Coordinates_buffer;
-    // make sure that the error is global
-    esysUtils::Esys_MPIInfo_noError(in->MPIInfo);
 }
 
 void NodeFile::assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
-                                   const std::vector<index_t>& distribution)
+                                   const IndexVector& distribution)
 {
-    Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
-    // first we retrieve the min and max DOF on this processor to reduce
+    int p_min = MPIInfo->size, p_max = -1;
+    // first we calculate the min and max DOF on this processor to reduce
     // costs for searching
-    const std::pair<index_t,index_t> dof_range(getDOFRange());
+    const std::pair<index_t,index_t> dofRange(getDOFRange());
 
-    for (int p=0; p<MPIInfo->size; ++p) {
-        if (distribution[p]<=dof_range.first) p_min=p;
-        if (distribution[p]<=dof_range.second) p_max=p;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        if (distribution[p] <= dofRange.first)
+            p_min = p;
+        if (distribution[p] <= dofRange.second)
+            p_max = p;
     }
 #pragma omp parallel for
-    for (index_t n=0; n<numNodes; ++n) {
-        const index_t k=globalDegreesOfFreedom[n];
-        for (int p=p_min; p<=p_max; ++p) {
-            if (k < distribution[p+1]) {
-                mpiRankOfDOF[n]=p;
+    for (index_t n = 0; n < numNodes; ++n) {
+        const index_t k = globalDegreesOfFreedom[n];
+        for (int p = p_min; p <= p_max; ++p) {
+            if (k < distribution[p + 1]) {
+                mpiRankOfDOF[n] = p;
                 break;
             }
         }
@@ -481,8 +460,8 @@ void NodeFile::assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
 }
 
 dim_t NodeFile::prepareLabeling(const std::vector<short>& mask,
-                                std::vector<index_t>& buffer,
-                                std::vector<index_t>& distribution,
+                                IndexVector& buffer,
+                                IndexVector& distribution,
                                 bool useNodes)
 {
     const index_t UNSET_ID=-1,SET_ID=1;
@@ -493,9 +472,9 @@ dim_t NodeFile::prepareLabeling(const std::vector<short>& mask,
     const index_t* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
     // distribute the range of node ids
     distribution.assign(MPIInfo->size+1, 0);
-    int buffer_len=MPIInfo->setDistribution(idRange.first,
+    int buffer_len = MPIInfo->setDistribution(idRange.first,
             idRange.second, &distribution[0]);
-    const dim_t myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
+    const dim_t myCount = distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
 
     // fill buffer by the UNSET_ID marker to check if nodes are defined
     buffer.assign(buffer_len, UNSET_ID);
@@ -503,38 +482,38 @@ dim_t NodeFile::prepareLabeling(const std::vector<short>& mask,
     // fill the buffer by sending portions around in a circle
 #ifdef ESYS_MPI
     MPI_Status status;
-    int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
-    int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
     int buffer_rank=MPIInfo->rank;
     for (int p=0; p<MPIInfo->size; ++p) {
         if (p>0) { // the initial send can be skipped
 #ifdef ESYS_MPI
             MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_DIM_T, dest,
-                    MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
+                    MPIInfo->counter(), source, MPIInfo->counter(),
                     MPIInfo->comm, &status);
+            MPIInfo->incCounter();
 #endif
-            MPIInfo->msg_tag_counter++;
         }
-        buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
-        const index_t id0=distribution[buffer_rank];
-        const index_t id1=distribution[buffer_rank+1];
+        buffer_rank = MPIInfo->mod_rank(buffer_rank-1);
+        const index_t id0 = distribution[buffer_rank];
+        const index_t id1 = distribution[buffer_rank+1];
 #pragma omp parallel for
-        for (index_t n=0; n<numNodes; n++) {
-            if (mask.size()<numNodes || mask[n]>-1) {
-                const index_t k=indexArray[n];
-                if (id0<=k && k<id1) {
-                    buffer[k-id0] = SET_ID;
+        for (index_t n = 0; n < numNodes; n++) {
+            if (mask.size() < numNodes || mask[n] > -1) {
+                const index_t k = indexArray[n];
+                if (id0 <= k && k < id1) {
+                    buffer[k - id0] = SET_ID;
                 }
             }
         }
     }
     // count the entries in the buffer
     // TODO: OMP parallel
-    index_t myNewCount=0;
-    for (index_t n=0; n<myCount; ++n) {
+    index_t myNewCount = 0;
+    for (index_t n = 0; n < myCount; ++n) {
         if (buffer[n] == SET_ID) {
-            buffer[n]=myNewCount;
+            buffer[n] = myNewCount;
             myNewCount++;
         }
     }
@@ -547,10 +526,10 @@ dim_t NodeFile::createDenseDOFLabeling()
     std::vector<index_t> distribution;
     std::vector<index_t> loc_offsets(MPIInfo->size);
     std::vector<index_t> offsets(MPIInfo->size);
-    index_t new_numGlobalDOFs=0;
+    index_t new_numGlobalDOFs = 0;
 
     // retrieve the number of own DOFs and fill buffer
-    loc_offsets[MPIInfo->rank]=prepareLabeling(std::vector<short>(),
+    loc_offsets[MPIInfo->rank] = prepareLabeling(std::vector<short>(),
             DOF_buffer, distribution, false);
 #ifdef ESYS_MPI
     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_DIM_T,
@@ -560,30 +539,30 @@ dim_t NodeFile::createDenseDOFLabeling()
         new_numGlobalDOFs+=offsets[n];
     }
 #else
-    new_numGlobalDOFs=loc_offsets[0];
-    loc_offsets[0]=0;
+    new_numGlobalDOFs = loc_offsets[0];
+    loc_offsets[0] = 0;
 #endif
 
-    const dim_t myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
+    const dim_t myDOFs = distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
 #pragma omp parallel for
-    for (index_t n=0; n<myDOFs; ++n)
-        DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
+    for (index_t n = 0; n < myDOFs; ++n)
+        DOF_buffer[n] += loc_offsets[MPIInfo->rank];
 
     std::vector<unsigned char> set_new_DOF(numNodes, true);
 
     // now entries are collected from the buffer again by sending them around
     // in a circle
 #ifdef ESYS_MPI
-    int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
-    int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
-    int buffer_rank=MPIInfo->rank;
-    for (int p=0; p<MPIInfo->size; ++p) {
-        const index_t dof0=distribution[buffer_rank];
-        const index_t dof1=distribution[buffer_rank+1];
+    int buffer_rank = MPIInfo->rank;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        const index_t dof0 = distribution[buffer_rank];
+        const index_t dof1 = distribution[buffer_rank+1];
 #pragma omp parallel for
-        for (index_t n=0; n<numNodes; n++) {
-            const index_t k=globalDegreesOfFreedom[n];
+        for (index_t n = 0; n < numNodes; n++) {
+            const index_t k = globalDegreesOfFreedom[n];
             if (set_new_DOF[n] && dof0<=k && k<dof1) {
                 globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
                 set_new_DOF[n]=false;
@@ -593,43 +572,43 @@ dim_t NodeFile::createDenseDOFLabeling()
 #ifdef ESYS_MPI
             MPI_Status status;
             MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_DIM_T,
-                    dest, MPIInfo->msg_tag_counter, source,
-                    MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
+                    dest, MPIInfo->counter(), source,
+                    MPIInfo->counter(), MPIInfo->comm, &status);
+	        MPIInfo->incCounter();
 #endif
-	    ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
         }
-        buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
+        buffer_rank = MPIInfo->mod_rank(buffer_rank-1);
     }
 
     return new_numGlobalDOFs;
 }
 
-dim_t NodeFile::createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
-                                   const std::vector<index_t>& dofDistribution)
+dim_t NodeFile::createDenseNodeLabeling(IndexVector& nodeDistribution,
+                                        const IndexVector& dofDistribution)
 {
     const index_t UNSET_ID=-1, SET_ID=1;
-    const index_t myFirstDOF=dofDistribution[MPIInfo->rank];
-    const index_t myLastDOF=dofDistribution[MPIInfo->rank+1];
+    const index_t myFirstDOF = dofDistribution[MPIInfo->rank];
+    const index_t myLastDOF = dofDistribution[MPIInfo->rank+1];
 
     // find the range of node ids controlled by me
-    index_t min_id=std::numeric_limits<index_t>::max();
-    index_t max_id=std::numeric_limits<index_t>::min();
+    index_t min_id = std::numeric_limits<index_t>::max();
+    index_t max_id = std::numeric_limits<index_t>::min();
 #pragma omp parallel
     {
-        index_t loc_max_id=max_id;
-        index_t loc_min_id=min_id;
+        index_t loc_max_id = max_id;
+        index_t loc_min_id = min_id;
 #pragma omp for
-        for (index_t n=0; n<numNodes; n++) {
-            const dim_t dof=globalDegreesOfFreedom[n];
-            if (myFirstDOF<=dof && dof<myLastDOF) {
-                loc_max_id=std::max(loc_max_id, Id[n]);
-                loc_min_id=std::min(loc_min_id, Id[n]);
+        for (index_t n = 0; n < numNodes; n++) {
+            const dim_t dof = globalDegreesOfFreedom[n];
+            if (myFirstDOF <= dof && dof < myLastDOF) {
+                loc_max_id = std::max(loc_max_id, Id[n]);
+                loc_min_id = std::min(loc_min_id, Id[n]);
             }
         }
 #pragma omp critical
         {
-            max_id=std::max(loc_max_id, max_id);
-            min_id=std::min(loc_min_id, min_id);
+            max_id = std::max(loc_max_id, max_id);
+            min_id = std::min(loc_min_id, min_id);
         }
     }
     index_t my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
@@ -650,16 +629,16 @@ dim_t NodeFile::createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
 
     // mark and count the nodes in use
 #pragma omp parallel for
-    for (index_t n=0; n<numNodes; n++) {
-        globalNodesIndex[n]=-1;
-        const index_t dof=globalDegreesOfFreedom[n];
-        if (myFirstDOF<=dof && dof<myLastDOF)
-            Node_buffer[Id[n]-min_id+header_len]=SET_ID;
+    for (index_t n = 0; n < numNodes; n++) {
+        globalNodesIndex[n] = -1;
+        const index_t dof = globalDegreesOfFreedom[n];
+        if (myFirstDOF <= dof && dof < myLastDOF)
+            Node_buffer[Id[n]-min_id+header_len] = SET_ID;
     }
-    index_t myNewNumNodes=0;
-    for (index_t n=0; n<my_buffer_len; n++) {
-        if (Node_buffer[header_len+n]==SET_ID) {
-            Node_buffer[header_len+n]=myNewNumNodes;
+    index_t myNewNumNodes = 0;
+    for (index_t n = 0; n < my_buffer_len; n++) {
+        if (Node_buffer[header_len+n] == SET_ID) {
+            Node_buffer[header_len+n] = myNewNumNodes;
             myNewNumNodes++;
         }
     }
@@ -668,52 +647,52 @@ dim_t NodeFile::createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
     MPI_Allgather(&myNewNumNodes, 1, MPI_DIM_T, &nodeDistribution[0], 1,
                   MPI_DIM_T, MPIInfo->comm);
 #else
-    nodeDistribution[0]=myNewNumNodes;
+    nodeDistribution[0] = myNewNumNodes;
 #endif
 
-    dim_t globalNumNodes=0;
-    for (int p=0; p<MPIInfo->size; ++p) {
-        const dim_t itmp=nodeDistribution[p];
-        nodeDistribution[p]=globalNumNodes;
-        globalNumNodes+=itmp;
+    dim_t globalNumNodes = 0;
+    for (int p = 0; p < MPIInfo->size; ++p) {
+        const dim_t itmp = nodeDistribution[p];
+        nodeDistribution[p] = globalNumNodes;
+        globalNumNodes += itmp;
     }
-    nodeDistribution[MPIInfo->size]=globalNumNodes;
+    nodeDistribution[MPIInfo->size] = globalNumNodes;
 
     // offset node buffer
 #pragma omp parallel for
-    for (index_t n=0; n<my_buffer_len; n++)
-        Node_buffer[n+header_len]+=nodeDistribution[MPIInfo->rank];
+    for (index_t n = 0; n < my_buffer_len; n++)
+        Node_buffer[n+header_len] += nodeDistribution[MPIInfo->rank];
 
     // now we send this buffer around to assign global node index
 #ifdef ESYS_MPI
-    int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
-    int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
     int buffer_rank=MPIInfo->rank;
     for (int p=0; p<MPIInfo->size; ++p) {
-        const index_t nodeID_0=Node_buffer[0];
-        const index_t nodeID_1=Node_buffer[1];
-        const index_t dof0=dofDistribution[buffer_rank];
-        const index_t dof1=dofDistribution[buffer_rank+1];
+        const index_t nodeID_0 = Node_buffer[0];
+        const index_t nodeID_1 = Node_buffer[1];
+        const index_t dof0 = dofDistribution[buffer_rank];
+        const index_t dof1 = dofDistribution[buffer_rank+1];
         if (nodeID_0 <= nodeID_1) {
 #pragma omp parallel for
-            for (index_t n=0; n<numNodes; n++) {
-                const index_t dof=globalDegreesOfFreedom[n];
-                const index_t id=Id[n]-nodeID_0;
-                if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
-                    globalNodesIndex[n]=Node_buffer[id+header_len];
+            for (index_t n = 0; n < numNodes; n++) {
+                const index_t dof = globalDegreesOfFreedom[n];
+                const index_t id = Id[n]-nodeID_0;
+                if (dof0 <= dof && dof < dof1 && id>=0 && id<=nodeID_1-nodeID_0)
+                    globalNodesIndex[n] = Node_buffer[id+header_len];
             }
         }
         if (p<MPIInfo->size-1) { // the last send can be skipped
 #ifdef ESYS_MPI
             MPI_Status status;
             MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_DIM_T,
-                    dest, MPIInfo->msg_tag_counter, source,
-                    MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
+                    dest, MPIInfo->counter(), source,
+                    MPIInfo->counter(), MPIInfo->comm, &status);
+	        MPIInfo->incCounter();
 #endif
-	        ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
         }
-        buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
+        buffer_rank = MPIInfo->mod_rank(buffer_rank-1);
     }
     return globalNumNodes;
 }
@@ -759,8 +738,8 @@ dim_t NodeFile::createDenseReducedLabeling(const std::vector<short>& reducedMask
     // now entries are collected from the buffer by sending them around
     // in a circle
 #ifdef ESYS_MPI
-    int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
-    int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
+    int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
+    int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
 #endif
     int buffer_rank=MPIInfo->rank;
     for (int p=0; p<MPIInfo->size; ++p) {
@@ -778,246 +757,262 @@ dim_t NodeFile::createDenseReducedLabeling(const std::vector<short>& reducedMask
 #ifdef ESYS_MPI
             MPI_Status status;
             MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_DIM_T, dest,
-                    MPIInfo->msg_tag_counter, source,
-                    MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
+                    MPIInfo->counter(), source,
+                    MPIInfo->counter(), MPIInfo->comm, &status);
+	        MPIInfo->incCounter();
 #endif
-	        ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
         }
-        buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
+        buffer_rank = MPIInfo->mod_rank(buffer_rank-1);
     }
     return new_numGlobalReduced;
 }
 
 void NodeFile::createDOFMappingAndCoupling(bool use_reduced_elements) 
 {
-    paso::Distribution_ptr dof_distribution;
+    escript::Distribution_ptr dofDistribution;
     const index_t* globalDOFIndex;
     if (use_reduced_elements) {
-        dof_distribution=reducedDegreesOfFreedomDistribution;
-        globalDOFIndex=globalReducedDOFIndex;
+        dofDistribution = reducedDegreesOfFreedomDistribution;
+        globalDOFIndex = globalReducedDOFIndex;
     } else {
-        dof_distribution=degreesOfFreedomDistribution;
-        globalDOFIndex=globalDegreesOfFreedom;
+        dofDistribution = degreesOfFreedomDistribution;
+        globalDOFIndex = globalDegreesOfFreedom;
     }
-    const index_t myFirstDOF=dof_distribution->getFirstComponent();
-    const index_t myLastDOF=dof_distribution->getLastComponent();
-    const int mpiSize=MPIInfo->size;
-    const int myRank=MPIInfo->rank;
+    NodeMapping& mapping = (use_reduced_elements ?
+                     reducedDegreesOfFreedomMapping : degreesOfFreedomMapping);
+
+    const index_t myFirstDOF = dofDistribution->getFirstComponent();
+    const index_t myLastDOF = dofDistribution->getLastComponent();
+    const int mpiSize = MPIInfo->size;
+    const int myRank = MPIInfo->rank;
 
     index_t min_DOF, max_DOF;
     std::pair<index_t,index_t> DOF_range(util::getFlaggedMinMaxInt(
                                             numNodes, globalDOFIndex, -1));
 
     if (DOF_range.second < DOF_range.first) {
-        min_DOF=myFirstDOF;
-        max_DOF=myLastDOF-1;
+        min_DOF = myFirstDOF;
+        max_DOF = myLastDOF - 1;
     } else {
-        min_DOF=DOF_range.first;
-        max_DOF=DOF_range.second;
+        min_DOF = DOF_range.first;
+        max_DOF = DOF_range.second;
     }
 
-    int p_min=mpiSize;
-    int p_max=-1;
+    int p_min = mpiSize;
+    int p_max = -1;
     if (max_DOF >= min_DOF) {
-        for (int p=0; p<mpiSize; ++p) {
-            if (dof_distribution->first_component[p]<=min_DOF) p_min=p;
-            if (dof_distribution->first_component[p]<=max_DOF) p_max=p;
+        for (int p = 0; p < mpiSize; ++p) {
+            if (dofDistribution->first_component[p] <= min_DOF)
+                p_min = p;
+            if (dofDistribution->first_component[p] <= max_DOF)
+                p_max = p;
         }
     }
 
-    if (!((min_DOF<=myFirstDOF) && (myLastDOF-1<=max_DOF))) {
-        setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
-        return;
+    std::stringstream ss;
+    if (myFirstDOF<myLastDOF && !(min_DOF <= myFirstDOF && myLastDOF-1 <= max_DOF)) {
+        ss << "createDOFMappingAndCoupling: Local elements do not span local "
+              "degrees of freedom. min_DOF=" << min_DOF << ", myFirstDOF="
+           << myFirstDOF << ", myLastDOF-1=" << myLastDOF-1
+           << ", max_DOF=" << max_DOF << " on rank=" << MPIInfo->rank;
+    }
+    const std::string msg(ss.str());
+    int error = msg.length();
+    int gerror = error;
+    escript::checkResult(error, gerror, MPIInfo);
+    if (gerror > 0) {
+        char* gmsg;
+        escript::shipString(msg.c_str(), &gmsg, MPIInfo->comm);
+        throw FinleyException(gmsg);
     }
+
     const index_t UNUSED = -1;
-    const index_t len_loc_dof=max_DOF-min_DOF+1;
-    std::vector<index_t> shared(numNodes*(p_max-p_min+1));
-    std::vector<index_t> offsetInShared(mpiSize+1);
+    const dim_t len_loc_dof = max_DOF - min_DOF + 1;
+    std::vector<index_t> shared(numNodes * (p_max - p_min + 1));
     std::vector<index_t> locDOFMask(len_loc_dof, UNUSED);
 
-#pragma omp parallel 
+#ifdef BOUNDS_CHECK
+    ESYS_ASSERT(myLastDOF-min_DOF <= len_loc_dof, "BOUNDS_CHECK");
+#endif
+
+#pragma omp parallel
     {
 #pragma omp for
-        for (index_t i=0;i<numNodes;++i) {
-            const index_t k=globalDOFIndex[i];
+        for (index_t i = 0; i < numNodes; ++i) {
+            const index_t k = globalDOFIndex[i];
             if (k > -1) {
 #ifdef BOUNDS_CHECK
-                if ((k-min_DOF)>=len_loc_dof) {
-                    printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
-                    exit(1);
-                }
+                ESYS_ASSERT(k - min_DOF < len_loc_dof, "BOUNDS_CHECK");
 #endif
-                locDOFMask[k-min_DOF]=UNUSED-1;
+                locDOFMask[k - min_DOF] = UNUSED - 1;
             }
        }
-#ifdef BOUNDS_CHECK
-       if (myLastDOF-min_DOF > len_loc_dof) {
-           printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
-           exit(1);
-       }
-#endif
 #pragma omp for
-       for (index_t i=myFirstDOF-min_DOF; i<myLastDOF-min_DOF; ++i) {
-            locDOFMask[i]=i-myFirstDOF+min_DOF;
+        for (index_t i = myFirstDOF - min_DOF; i < myLastDOF - min_DOF; ++i) {
+            locDOFMask[i] = i - myFirstDOF + min_DOF;
         }
     }
 
     std::vector<index_t> wanted_DOFs(numNodes);
     std::vector<index_t> rcv_len(mpiSize);
     std::vector<index_t> snd_len(mpiSize);
-    std::vector<Esys_MPI_rank> neighbor(mpiSize);
-    int numNeighbors=0;
-    index_t n=0;
-    index_t lastn=n;
-    for (int p=p_min; p<=p_max; ++p) {
+    std::vector<int> neighbour;
+    std::vector<index_t> offsetInShared;
+    dim_t n = 0;
+    dim_t lastn = n;
+
+    for (int p = p_min; p <= p_max; ++p) {
         if (p != myRank) {
-            const index_t firstDOF=std::max(min_DOF, dof_distribution->first_component[p]);
-            const index_t lastDOF=std::min(max_DOF+1, dof_distribution->first_component[p+1]);
+            const index_t firstDOF = std::max(min_DOF, dofDistribution->first_component[p]);
+            const index_t lastDOF = std::min(max_DOF + 1, dofDistribution->first_component[p + 1]);
 #ifdef BOUNDS_CHECK
-            if (firstDOF-min_DOF<0 || lastDOF-min_DOF>len_loc_dof) {
-                printf("BOUNDS_CHECK %s %d p=%d\n", __FILE__, __LINE__, p);
-                exit(1);
-            }
+            ESYS_ASSERT(lastDOF - min_DOF <= len_loc_dof, "BOUNDS_CHECK");
 #endif
-            for (index_t i=firstDOF-min_DOF; i<lastDOF-min_DOF; ++i) {
-                if (locDOFMask[i] == UNUSED-1) {
-                   locDOFMask[i]=myLastDOF-myFirstDOF+n;
-                   wanted_DOFs[n]=i+min_DOF;
-                   ++n;
+            for (index_t i = firstDOF - min_DOF; i < lastDOF - min_DOF; ++i) {
+                if (locDOFMask[i] == UNUSED - 1) {
+                    locDOFMask[i] = myLastDOF - myFirstDOF + n;
+                    wanted_DOFs[n] = i + min_DOF;
+                    ++n;
                 }
             }
             if (n > lastn) {
-                rcv_len[p]=n-lastn;
-#ifdef BOUNDS_CHECK
-                if (numNeighbors >= mpiSize+1) {
-                    printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors, n);
-                    exit(1);
-                }
-#endif
-                neighbor[numNeighbors]=p;
-                offsetInShared[numNeighbors]=lastn;
-                numNeighbors++;
-                lastn=n;
+                rcv_len[p] = n - lastn;
+                neighbour.push_back(p);
+                offsetInShared.push_back(lastn);
+                lastn = n;
             }
         } // if p!=myRank
     } // for p
 
-#ifdef BOUNDS_CHECK
-    if (numNeighbors >= mpiSize+1) {
-        printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
-        exit(1);
-    }
-#endif
-    offsetInShared[numNeighbors]=lastn;
+    offsetInShared.push_back(lastn);
 
     // assign new DOF labels to nodes
     std::vector<index_t> nodeMask(numNodes, UNUSED);
 #pragma omp parallel for
-    for (index_t i=0; i<numNodes; ++i) {
-        const index_t k=globalDOFIndex[i];
+    for (index_t i = 0; i < numNodes; ++i) {
+        const index_t k = globalDOFIndex[i];
         if (k > -1)
-            nodeMask[i]=locDOFMask[k-min_DOF];
+            nodeMask[i] = locDOFMask[k - min_DOF];
     }
 
     // now we can set the mapping from nodes to local DOFs
-    if (use_reduced_elements) {
-        reducedDegreesOfFreedomMapping.assign(nodeMask, UNUSED);
-    } else {
-        degreesOfFreedomMapping.assign(nodeMask, UNUSED);
-    }
+    mapping.assign(nodeMask, UNUSED);
 
     // define how to get DOF values for controlled but other processors
 #ifdef BOUNDS_CHECK
-    if (numNodes && offsetInShared[numNeighbors] >= numNodes*(p_max-p_min+1)) {
-        printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
-        exit(1);
-    }
+    ESYS_ASSERT(numNodes == 0 || offsetInShared.back() < numNodes * (p_max - p_min + 1), "BOUNDS_CHECK");
 #endif
 #pragma omp parallel for
-    for (index_t i=0; i<lastn; ++i)
-        shared[i]=myLastDOF-myFirstDOF+i;
+    for (index_t i = 0; i < lastn; ++i)
+        shared[i] = myLastDOF - myFirstDOF + i;
 
-    index_t *p = shared.empty() ? NULL : &shared[0];
+#ifdef ESYS_HAVE_PASO
+    index_t* p = shared.empty() ? NULL : &shared[0];
     paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
-            myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], p,
-            &offsetInShared[0], 1, 0, MPIInfo));
+            myLastDOF - myFirstDOF, neighbour, p, offsetInShared));
+#endif
 
     /////////////////////////////////
     //   now we build the sender   //
     /////////////////////////////////
 #ifdef ESYS_MPI
-    std::vector<MPI_Request> mpi_requests(mpiSize*2);
-    std::vector<MPI_Status> mpi_stati(mpiSize*2);
+    std::vector<MPI_Request> mpi_requests(mpiSize * 2);
+    std::vector<MPI_Status> mpi_stati(mpiSize * 2);
     MPI_Alltoall(&rcv_len[0], 1, MPI_DIM_T, &snd_len[0], 1, MPI_DIM_T, MPIInfo->comm);
-    int count=0;
-#else
-    snd_len[0]=rcv_len[0];
-#endif
-
-    for (int p=0; p<rcv_shcomp->numNeighbors; p++) {
-#ifdef ESYS_MPI
-        MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
-                rcv_shcomp->offsetInShared[p+1]-rcv_shcomp->offsetInShared[p],
-                MPI_DIM_T, rcv_shcomp->neighbor[p],
-                MPIInfo->msg_tag_counter+myRank, MPIInfo->comm,
-                &mpi_requests[count]);
+    int count = 0;
+    for (int p = 0; p < neighbour.size(); p++) {
+        MPI_Isend(&wanted_DOFs[offsetInShared[p]],
+                offsetInShared[p+1] - offsetInShared[p],
+                MPI_DIM_T, neighbour[p], MPIInfo->counter() + myRank,
+                MPIInfo->comm, &mpi_requests[count]);
         count++;
-#endif
     }
-    n=0;
-    numNeighbors=0;
-    for (int p=0; p<mpiSize; p++) {
+    n = 0;
+    neighbour.clear();
+    offsetInShared.clear();
+    for (int p = 0; p < mpiSize; p++) {
         if (snd_len[p] > 0) {
-#ifdef ESYS_MPI
             MPI_Irecv(&shared[n], snd_len[p], MPI_DIM_T, p,
-                    MPIInfo->msg_tag_counter+p, MPIInfo->comm,
-                    &mpi_requests[count]);
+                      MPIInfo->counter()+p, MPIInfo->comm,
+                      &mpi_requests[count]);
             count++;
-#endif
-            neighbor[numNeighbors]=p;
-            offsetInShared[numNeighbors]=n;
-            numNeighbors++;
-            n+=snd_len[p];
+            neighbour.push_back(p);
+            offsetInShared.push_back(n);
+            n += snd_len[p];
         }
     }
-    ESYS_MPI_INC_COUNTER(*MPIInfo, MPIInfo->size)
-    offsetInShared[numNeighbors]=n;
-#ifdef ESYS_MPI
+    MPIInfo->incCounter(MPIInfo->size);
     MPI_Waitall(count, &mpi_requests[0], &mpi_stati[0]);
-#endif
-    // map global ids to local id's
+    offsetInShared.push_back(n);
+
+    // map global IDs to local IDs
 #pragma omp parallel for
-    for (index_t i=0; i<n; ++i) {
-        shared[i]=locDOFMask[shared[i]-min_DOF];
+    for (index_t i = 0; i < n; ++i) {
+        shared[i] = locDOFMask[shared[i] - min_DOF];
     }
+#endif // ESYS_MPI
 
+#ifdef ESYS_HAVE_PASO
     paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
-            myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], p,
-            &offsetInShared[0], 1, 0, MPIInfo));
-
-    if (noError()) {
-        if (use_reduced_elements) {
-            reducedDegreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
-        } else {
-            degreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+            myLastDOF - myFirstDOF, neighbour, p, offsetInShared));
+
+    if (use_reduced_elements) {
+        reducedDegreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+    } else {
+        degreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+    }
+#endif // ESYS_HAVE_PASO
+
+#ifdef ESYS_HAVE_TRILINOS
+    using namespace esys_trilinos;
+
+    const dim_t myNumTargets = myLastDOF - myFirstDOF;
+    const dim_t numTargets = mapping.getNumTargets();
+    IndexVector myRows(myNumTargets);
+    IndexVector columns(numTargets);
+    const IndexVector& dofMap = mapping.map;
+
+#pragma omp parallel
+    {
+#pragma omp for nowait
+        for (size_t i = 0; i < myNumTargets; i++) {
+            myRows[i] = globalDOFIndex[dofMap[i]];
+        }
+#pragma omp for
+        for (size_t i = 0; i < numTargets; i++) {
+            columns[i] = globalDOFIndex[dofMap[i]];
         }
+    } // end parallel section
+
+    const dim_t numTotal = dofDistribution->getGlobalNumComponents();
+    if (use_reduced_elements) {
+        trilinosReducedRowMap.reset(new MapType(numTotal, myRows, 0,
+                                      TeuchosCommFromEsysComm(MPIInfo->comm)));
+        trilinosReducedColMap.reset(new MapType(numTotal, columns, 0,
+                                      TeuchosCommFromEsysComm(MPIInfo->comm)));
+    } else {
+        trilinosRowMap.reset(new MapType(numTotal, myRows, 0,
+                                      TeuchosCommFromEsysComm(MPIInfo->comm)));
+        trilinosColMap.reset(new MapType(numTotal, columns, 0,
+                                      TeuchosCommFromEsysComm(MPIInfo->comm)));
     }
+#endif // ESYS_HAVE_TRILINOS
 }
 
-void NodeFile::createNodeMappings(const std::vector<index_t>& indexReducedNodes,
-                                  const std::vector<index_t>& dofDist,
-                                  const std::vector<index_t>& nodeDist)
+void NodeFile::createNodeMappings(const IndexVector& indexReducedNodes,
+                                  const IndexVector& dofDist,
+                                  const IndexVector& nodeDist)
 {
-    const int mpiSize=MPIInfo->size;
-    const int myRank=MPIInfo->rank;
+    const int mpiSize = MPIInfo->size;
+    const int myRank = MPIInfo->rank;
 
-    const index_t myFirstDOF=dofDist[myRank];
-    const index_t myLastDOF=dofDist[myRank+1];
-    const index_t myNumDOF=myLastDOF-myFirstDOF;
+    const index_t myFirstDOF = dofDist[myRank];
+    const index_t myLastDOF = dofDist[myRank+1];
+    const index_t myNumDOF = myLastDOF-myFirstDOF;
 
-    const index_t myFirstNode=nodeDist[myRank];
-    const index_t myLastNode=nodeDist[myRank+1];
-    const index_t myNumNodes=myLastNode-myFirstNode;
+    const index_t myFirstNode = nodeDist[myRank];
+    const index_t myLastNode = nodeDist[myRank+1];
+    const index_t myNumNodes = myLastNode-myFirstNode;
 
     std::vector<short> maskMyReducedDOF(myNumDOF, -1);
     std::vector<short> maskMyReducedNodes(myNumNodes, -1);
@@ -1025,103 +1020,94 @@ void NodeFile::createNodeMappings(const std::vector<index_t>& indexReducedNodes,
 
     // mark the nodes used by the reduced mesh
 #pragma omp parallel for
-    for (index_t i=0; i<iRNsize; ++i) {
-        index_t k=globalNodesIndex[indexReducedNodes[i]];
-        if (k>=myFirstNode && myLastNode>k)
-            maskMyReducedNodes[k-myFirstNode]=1;
-        k=globalDegreesOfFreedom[indexReducedNodes[i]];
-        if (k>=myFirstDOF && myLastDOF>k) {
-            maskMyReducedDOF[k-myFirstDOF]=1;
+    for (index_t i = 0; i < iRNsize; ++i) {
+        index_t k = globalNodesIndex[indexReducedNodes[i]];
+        if (k >= myFirstNode && myLastNode > k)
+            maskMyReducedNodes[k - myFirstNode] = 1;
+        k = globalDegreesOfFreedom[indexReducedNodes[i]];
+        if (k >= myFirstDOF && myLastDOF > k) {
+            maskMyReducedDOF[k - myFirstDOF] = 1;
         }
     }
-    std::vector<index_t> indexMyReducedDOF = util::packMask(maskMyReducedDOF);
-    index_t myNumReducedDOF=indexMyReducedDOF.size();
-    std::vector<index_t> indexMyReducedNodes = util::packMask(maskMyReducedNodes);
-    index_t myNumReducedNodes=indexMyReducedNodes.size();
+    IndexVector indexMyReducedDOF = util::packMask(maskMyReducedDOF);
+    index_t myNumReducedDOF = indexMyReducedDOF.size();
+    IndexVector indexMyReducedNodes = util::packMask(maskMyReducedNodes);
+    index_t myNumReducedNodes = indexMyReducedNodes.size();
 
-    std::vector<index_t> rdofDist(mpiSize+1);
-    std::vector<index_t> rnodeDist(mpiSize+1);
+    IndexVector rdofDist(mpiSize+1);
+    IndexVector rnodeDist(mpiSize+1);
 #ifdef ESYS_MPI
     MPI_Allgather(&myNumReducedNodes, 1, MPI_DIM_T, &rnodeDist[0], 1, MPI_DIM_T, MPIInfo->comm);
     MPI_Allgather(&myNumReducedDOF, 1, MPI_DIM_T, &rdofDist[0], 1, MPI_DIM_T, MPIInfo->comm);
 #else
-    rnodeDist[0]=myNumReducedNodes;
-    rdofDist[0]=myNumReducedDOF;
+    rnodeDist[0] = myNumReducedNodes;
+    rdofDist[0] = myNumReducedDOF;
 #endif
-    index_t globalNumReducedNodes=0;
-    index_t globalNumReducedDOF=0;
-    for (int i=0; i<mpiSize; ++i) {
-        index_t k=rnodeDist[i];
-        rnodeDist[i]=globalNumReducedNodes;
-        globalNumReducedNodes+=k;
-
-        k=rdofDist[i];
-        rdofDist[i]=globalNumReducedDOF;
-        globalNumReducedDOF+=k;
+    index_t globalNumReducedNodes = 0;
+    index_t globalNumReducedDOF = 0;
+    for (int i = 0; i < mpiSize; ++i) {
+        index_t k = rnodeDist[i];
+        rnodeDist[i] = globalNumReducedNodes;
+        globalNumReducedNodes += k;
+
+        k = rdofDist[i];
+        rdofDist[i] = globalNumReducedDOF;
+        globalNumReducedDOF += k;
     }
-    rnodeDist[mpiSize]=globalNumReducedNodes;
-    rdofDist[mpiSize]=globalNumReducedDOF;
-
-    // ==== distribution of Nodes ===============================
-    nodesDistribution.reset(new paso::Distribution(MPIInfo, &nodeDist[0], 1, 0));
-    // ==== distribution of DOFs ================================
-    degreesOfFreedomDistribution.reset(new paso::Distribution(MPIInfo, &dofDist[0], 1, 0));
-    // ==== distribution of reduced Nodes =======================
-    reducedNodesDistribution.reset(new paso::Distribution(MPIInfo, &rnodeDist[0], 1, 0));
-    // ==== distribution of reduced DOF =========================
-    reducedDegreesOfFreedomDistribution.reset(new paso::Distribution(
-                                                MPIInfo, &rdofDist[0], 1, 0));
-
-    std::vector<index_t> nodeMask(numNodes);
-
-    if (noError()) {
-        const index_t UNUSED = -1;
-        // ==== nodes mapping which is a dummy structure ========
+    rnodeDist[mpiSize] = globalNumReducedNodes;
+    rdofDist[mpiSize] = globalNumReducedDOF;
+
+    // ==== distribution of Nodes ====
+    nodesDistribution.reset(new escript::Distribution(MPIInfo, nodeDist));
+
+    // ==== distribution of DOFs ====
+    degreesOfFreedomDistribution.reset(new escript::Distribution(MPIInfo, dofDist));
+
+    // ==== distribution of reduced Nodes ====
+    reducedNodesDistribution.reset(new escript::Distribution(MPIInfo, rnodeDist));
+
+    // ==== distribution of reduced DOF ====
+    reducedDegreesOfFreedomDistribution.reset(new escript::Distribution(
+                                                MPIInfo, rdofDist));
+
+    IndexVector nodeMask(numNodes);
+    const index_t UNUSED = -1;
+
+    // ==== nodes mapping (dummy) ====
 #pragma omp parallel for
-        for (index_t i=0; i<numNodes; ++i)
-            nodeMask[i]=i;
-        nodesMapping.assign(nodeMask, UNUSED);
+    for (index_t i = 0; i < numNodes; ++i)
+        nodeMask[i] = i;
+    nodesMapping.assign(nodeMask, UNUSED);
 
-        // ==== mapping between nodes and reduced nodes ==========
+    // ==== mapping between nodes and reduced nodes ====
 #pragma omp parallel for
-        for (index_t i=0; i<numNodes; ++i)
-            nodeMask[i]=UNUSED;
+    for (index_t i = 0; i < numNodes; ++i)
+        nodeMask[i] = UNUSED;
 #pragma omp parallel for
-        for (index_t i=0; i<iRNsize; ++i)
-            nodeMask[indexReducedNodes[i]]=i;
-        reducedNodesMapping.assign(nodeMask, UNUSED);
-    }
+    for (index_t i = 0; i < iRNsize; ++i)
+        nodeMask[indexReducedNodes[i]] = i;
+    reducedNodesMapping.assign(nodeMask, UNUSED);
+
     // ==== mapping between nodes and DOFs + DOF connector
-    if (noError())
-        createDOFMappingAndCoupling(false);
+    createDOFMappingAndCoupling(false);
     // ==== mapping between nodes and reduced DOFs + reduced DOF connector
-    if (noError())
-        createDOFMappingAndCoupling(true);
+    createDOFMappingAndCoupling(true);
 
     // get the Ids for DOFs and reduced nodes
-    if (noError()) {
-        const index_t rnTargets = reducedNodesMapping.getNumTargets();
-        const index_t dofTargets = degreesOfFreedomMapping.getNumTargets();
-        const index_t rdofTargets = reducedDegreesOfFreedomMapping.getNumTargets();
+    const index_t rnTargets = reducedNodesMapping.getNumTargets();
+    const index_t dofTargets = degreesOfFreedomMapping.getNumTargets();
+    const index_t rdofTargets = reducedDegreesOfFreedomMapping.getNumTargets();
 #pragma omp parallel
-        {
-#pragma omp for
-         for (index_t i=0; i<rnTargets; ++i)
-             reducedNodesId[i]=Id[reducedNodesMapping.map[i]];
-#pragma omp for
-         for (index_t i=0; i<dofTargets; ++i)
-             degreesOfFreedomId[i]=Id[degreesOfFreedomMapping.map[i]];
+    {
+#pragma omp for nowait
+        for (index_t i = 0; i < rnTargets; ++i)
+            reducedNodesId[i] = Id[reducedNodesMapping.map[i]];
+#pragma omp for nowait
+        for (index_t i = 0; i < dofTargets; ++i)
+            degreesOfFreedomId[i] = Id[degreesOfFreedomMapping.map[i]];
 #pragma omp for
-         for (index_t i=0; i<rdofTargets; ++i)
-             reducedDegreesOfFreedomId[i]=Id[reducedDegreesOfFreedomMapping.map[i]];
-        }
-    } else {
-        nodesDistribution.reset();
-        reducedNodesDistribution.reset();
-        degreesOfFreedomDistribution.reset();
-        reducedDegreesOfFreedomDistribution.reset();
-        degreesOfFreedomConnector.reset();
-        reducedDegreesOfFreedomConnector.reset();
+        for (index_t i = 0; i < rdofTargets; ++i)
+            reducedDegreesOfFreedomId[i] = Id[reducedDegreesOfFreedomMapping.map[i]];
     }
 }
 
diff --git a/finley/src/NodeFile.h b/finley/src/NodeFile.h
index 9930baf..dc550eb 100644
--- a/finley/src/NodeFile.h
+++ b/finley/src/NodeFile.h
@@ -21,67 +21,116 @@
 
 #include "Finley.h"
 #include "NodeMapping.h"
+
+#include <escript/Distribution.h>
+
+#ifdef ESYS_HAVE_PASO
 #include <paso/Coupler.h>
-#include <paso/Distribution.h>
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/types.h>
+#endif
 
+namespace escript {
+    struct IndexList;
+}
 
 namespace finley {
 
 class NodeFile
 {
 public:
-    NodeFile(int nDim, esysUtils::JMPI& mpiInfo);
+
+    /// constructor - creates empty node file.
+    /// Use allocTable() to allocate the node table (Id,Coordinates).
+    NodeFile(int nDim, escript::JMPI MPIInfo);
+
+    /// destructor
     ~NodeFile();
 
+    /// allocates the node table within this node file to hold numNodes nodes.
     void allocTable(dim_t numNodes);
+
+    /// empties the node table and frees all memory
     void freeTable();
 
     void print() const;
+
     inline index_t getFirstNode() const;
     inline index_t getLastNode() const;
-    inline index_t getGlobalNumNodes() const;
-    inline index_t* borrowGlobalNodesIndex() const;
+    inline dim_t getGlobalNumNodes() const;
+    inline const index_t* borrowGlobalNodesIndex() const;
 
     inline index_t getFirstReducedNode() const;
     inline index_t getLastReducedNode() const;
     inline index_t getGlobalNumReducedNodes() const;
-    inline index_t* borrowGlobalReducedNodesIndex() const;
+    inline const index_t* borrowGlobalReducedNodesIndex() const;
 
-    /// returns the number of FEM nodes
+    /// returns the number of FEM nodes (on this rank)
     inline dim_t getNumNodes() const;
+
+    /// returns the number of reduced order FEM nodes (on this rank)
     inline dim_t getNumReducedNodes() const;
+
+    /// returns the number of degrees of freedom (on this rank)
     inline dim_t getNumDegreesOfFreedom() const;
+
+    /// returns the number of reduced order degrees of freedom (on this rank)
     inline dim_t getNumReducedDegreesOfFreedom() const;
 
-    inline const std::vector<index_t>& borrowReducedNodesTarget() const;
-    inline const std::vector<index_t>& borrowDegreesOfFreedomTarget() const;
-    inline const std::vector<index_t>& borrowNodesTarget() const;
-    inline const std::vector<index_t>& borrowReducedDegreesOfFreedomTarget() const;
+    /// returns the number of degrees of freedom targets (own and shared)
+    inline dim_t getNumDegreesOfFreedomTargets() const;
+
+    /// returns the number of reduced degrees of freedom targets (own and shared)
+    inline dim_t getNumReducedDegreesOfFreedomTargets() const;
+
+    inline const IndexVector& borrowReducedNodesTarget() const;
+    inline const IndexVector& borrowDegreesOfFreedomTarget() const;
+    inline const IndexVector& borrowNodesTarget() const;
+    inline const IndexVector& borrowReducedDegreesOfFreedomTarget() const;
 
     inline const index_t* borrowTargetReducedNodes() const;
     inline const index_t* borrowTargetDegreesOfFreedom() const;
+
+    /// returns the mapping from local nodes to a target
     inline const index_t* borrowTargetNodes() const;
     inline const index_t* borrowTargetReducedDegreesOfFreedom() const;
 
-    void createNodeMappings(const std::vector<index_t>& indexReducedNodes,
-                            const std::vector<index_t>& dofDistribution,
-                            const std::vector<index_t>& nodeDistribution);
+    inline void updateTagList();
+
+    /// creates a dense labeling of the global degrees of freedom and returns
+    /// the new number of global degrees of freedom
     dim_t createDenseDOFLabeling();
-    dim_t createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
-                                  const std::vector<index_t>& dofDistribution);
+
+    dim_t createDenseNodeLabeling(IndexVector& nodeDistribution,
+                                  const IndexVector& dofDistribution);
+
     dim_t createDenseReducedLabeling(const std::vector<short>& reducedMask,
                                      bool useNodes);
-    void assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF, const std::vector<index_t>& distribution);
+
+    void createNodeMappings(const IndexVector& indexReducedNodes,
+                            const IndexVector& dofDistribution,
+                            const IndexVector& nodeDistribution);
+
+
+    void assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
+                             const IndexVector& distribution);
 
     void copyTable(index_t offset, index_t idOffset, index_t dofOffset,
                    const NodeFile* in);
+
+    /// gathers nodes from the NodeFile `in` using the entries in
+    /// index[0:numNodes-1] which are between min_index and max_index
+    /// (exclusive)
     void gather(const index_t* index, const NodeFile* in);
+
     void gather_global(const index_t* index, const NodeFile* in);
     void scatter(const index_t* index, const NodeFile* in);
 
     void setCoordinates(const escript::Data& newX);
-    void setTags(const int newTag, const escript::Data& mask);
-    inline void updateTagList();
+
+    /// set tags to newTag where mask > 0
+    void setTags(int newTag, const escript::Data& mask);
 
     std::pair<index_t,index_t> getDOFRange() const;
 
@@ -89,60 +138,70 @@ private:
     std::pair<index_t,index_t> getGlobalIdRange() const;
     std::pair<index_t,index_t> getGlobalDOFRange() const;
     std::pair<index_t,index_t> getGlobalNodeIDIndexRange() const;
-    dim_t prepareLabeling(const std::vector<short>& mask,
-                          std::vector<index_t>& buffer,
-                          std::vector<index_t>& distribution, bool useNodes);
+    dim_t prepareLabeling(const std::vector<short>& mask, IndexVector& buffer,
+                          IndexVector& distribution, bool useNodes);
     void createDOFMappingAndCoupling(bool reduced);
 
     NodeMapping nodesMapping;
- 
+    NodeMapping degreesOfFreedomMapping;
+    NodeMapping reducedDegreesOfFreedomMapping;
+
+    /// number of nodes
+    dim_t numNodes;
+
 public:
     ///////////////////////////////////////
     // these should be private as well.
 
     NodeMapping reducedNodesMapping;
-    NodeMapping degreesOfFreedomMapping;
-    NodeMapping reducedDegreesOfFreedomMapping;
 
     /// MPI information
-    esysUtils::JMPI MPIInfo;
-    /// number of nodes
-    dim_t numNodes;
+    escript::JMPI MPIInfo;
     /// number of spatial dimensions
     int numDim;
-    /// Id[i] is the id number of node i. It needs to be unique.
-    index_t *Id;
-    /// Tag[i] is the tag of node i.
-    int *Tag;
+    /// Id[i] is the unique ID number of FEM node i
+    index_t* Id;
+    /// Tag[i] is the tag of node i
+    int* Tag;
     /// vector of tags which are actually used
     std::vector<int> tagsInUse;
+
     /// globalDegreesOfFreedom[i] is the global degree of freedom assigned
     /// to node i. This index is used to consider periodic boundary conditions
-    /// by assigning the same degreesOfFreedom to the same node.
+    /// by assigning the same degree of freedom to different nodes.
     index_t* globalDegreesOfFreedom;
     /// Coordinates[INDEX2(k,i,numDim)] is the k-th coordinate of node i
-    double *Coordinates;
-    /// assigns each local node a global unique Id in a dense labeling of
+    double* Coordinates;
+    /// assigns each local node a global unique ID in a dense labeling of
     /// reduced DOF. Value <0 indicates that the DOF is not used.
-    index_t *globalReducedDOFIndex;
-    /// assigns each local node a global unique Id in a dense labeling.
+    index_t* globalReducedDOFIndex;
+    /// assigns each local reduced node a global unique ID in a dense labeling
     /// Value <0 indicates that the DOF is not used
-    index_t *globalReducedNodesIndex;
-    /// assigns each local reduced node a global unique Id in a dense labeling
-    index_t *globalNodesIndex;
+    index_t* globalReducedNodesIndex;
+    /// assigns each local node a global unique ID in a dense labeling
+    index_t* globalNodesIndex;
 
-    paso::Distribution_ptr nodesDistribution;
-    paso::Distribution_ptr reducedNodesDistribution;
-    paso::Distribution_ptr degreesOfFreedomDistribution;
-    paso::Distribution_ptr reducedDegreesOfFreedomDistribution;
+    /// MPI distribution of nodes
+    escript::Distribution_ptr nodesDistribution;
+    escript::Distribution_ptr reducedNodesDistribution;
+    escript::Distribution_ptr degreesOfFreedomDistribution;
+    escript::Distribution_ptr reducedDegreesOfFreedomDistribution;
 
+#ifdef ESYS_HAVE_PASO
     paso::Connector_ptr degreesOfFreedomConnector;
     paso::Connector_ptr reducedDegreesOfFreedomConnector;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    esys_trilinos::const_TrilinosMap_ptr trilinosRowMap;
+    esys_trilinos::const_TrilinosMap_ptr trilinosReducedRowMap;
+    esys_trilinos::const_TrilinosMap_ptr trilinosColMap;
+    esys_trilinos::const_TrilinosMap_ptr trilinosReducedColMap;
+#endif
   
-    /// these are the packed versions of Id
-    index_t *reducedNodesId;        
-    index_t *degreesOfFreedomId;
-    index_t *reducedDegreesOfFreedomId;
+    // these are the packed versions of Id
+    index_t* reducedNodesId;        
+    index_t* degreesOfFreedomId;
+    index_t* reducedDegreesOfFreedomId;
 
     /// the status counts the updates done on the node coordinates.
     /// The value is increased by 1 when the node coordinates are updated.
@@ -163,12 +222,12 @@ inline index_t NodeFile::getLastNode() const
     return nodesDistribution->getLastComponent();
 }
 
-inline index_t NodeFile::getGlobalNumNodes() const
+inline dim_t NodeFile::getGlobalNumNodes() const
 {
     return nodesDistribution->getGlobalNumComponents();
 }
 
-inline index_t* NodeFile::borrowGlobalNodesIndex() const
+inline const index_t* NodeFile::borrowGlobalNodesIndex() const
 {
     return globalNodesIndex;
 }
@@ -188,7 +247,7 @@ inline dim_t NodeFile::getGlobalNumReducedNodes() const
     return reducedNodesDistribution->getGlobalNumComponents();
 }
 
-inline index_t* NodeFile::borrowGlobalReducedNodesIndex() const
+inline const index_t* NodeFile::borrowGlobalReducedNodesIndex() const
 {
     return globalReducedNodesIndex;
 }
@@ -213,22 +272,32 @@ inline dim_t NodeFile::getNumReducedDegreesOfFreedom() const
     return reducedDegreesOfFreedomDistribution->getMyNumComponents();
 }
 
-inline const std::vector<index_t>& NodeFile::borrowNodesTarget() const
+inline dim_t NodeFile::getNumDegreesOfFreedomTargets() const
+{
+    return degreesOfFreedomMapping.getNumTargets();
+}
+
+inline dim_t NodeFile::getNumReducedDegreesOfFreedomTargets() const
+{
+    return reducedDegreesOfFreedomMapping.getNumTargets();
+}
+
+inline const IndexVector& NodeFile::borrowNodesTarget() const
 {
     return nodesMapping.map;
 }
 
-inline const std::vector<index_t>& NodeFile::borrowReducedNodesTarget() const
+inline const IndexVector& NodeFile::borrowReducedNodesTarget() const
 {
     return reducedNodesMapping.map;
 }
 
-inline const std::vector<index_t>& NodeFile::borrowDegreesOfFreedomTarget() const
+inline const IndexVector& NodeFile::borrowDegreesOfFreedomTarget() const
 {
     return degreesOfFreedomMapping.map;
 }
 
-inline const std::vector<index_t>& NodeFile::borrowReducedDegreesOfFreedomTarget() const
+inline const IndexVector& NodeFile::borrowReducedDegreesOfFreedomTarget() const
 {
     return reducedDegreesOfFreedomMapping.map;
 }
diff --git a/finley/src/NodeMapping.h b/finley/src/NodeMapping.h
index 5554203..4fe7464 100644
--- a/finley/src/NodeMapping.h
+++ b/finley/src/NodeMapping.h
@@ -45,8 +45,7 @@ struct NodeMapping {
         std::pair<index_t,index_t> range(
             util::getFlaggedMinMaxInt(theTarget.size(), &theTarget[0], unused));
         if (range.first < 0) {
-            setError(VALUE_ERROR, "NodeMapping: target has negative entry.");
-            return;
+            throw escript::ValueError("NodeMapping: target has negative entry.");
         }
         // now we assume min(target)=0!
         const dim_t numTargets = range.first<=range.second ? range.second+1 : 0;
@@ -54,6 +53,7 @@ struct NodeMapping {
         const index_t targetSize = target.size();
         map.assign(numTargets, -1);
 
+        bool err = false;
 #pragma omp parallel
         {
 #pragma omp for
@@ -65,10 +65,13 @@ struct NodeMapping {
 #pragma omp for
             for (index_t i=0; i<numTargets; ++i) {
                 if (map[i]==-1) {
-                    setError(VALUE_ERROR, "NodeMapping: target does not define a continuous labeling.");
+#pragma omp critical
+                    err=true;
                 }
             }
         }
+        if (err)
+            throw escript::ValueError("NodeMapping: target does not define a continuous labeling.");
     }
 
     /// returns the number of target nodes (number of items in the map array)
diff --git a/finley/src/Quadrature.cpp b/finley/src/Quadrature.cpp
index 2e02f63..66884ee 100644
--- a/finley/src/Quadrature.cpp
+++ b/finley/src/Quadrature.cpp
@@ -22,9 +22,8 @@
 *****************************************************************************/
 
 #include "Quadrature.h"
-#include "esysUtils/index.h"
-#include "esysUtils/mem.h"
 
+#include <escript/index.h>
 
 #define QUADNODES(_K_,_I_) quadNodes[INDEX2(_K_,_I_,DIM)]
 #define QUADWEIGHTS(_I_) quadWeights[_I_]
@@ -51,7 +50,7 @@ const QuadInfo* QuadInfo_getInfo(QuadTypeId id)
        idx++;
     }
     if (out==NULL) {
-        setError(VALUE_ERROR,"QuadInfo_getInfo: cannot find requested quadrature scheme.");
+        throw escript::ValueError("QuadInfo_getInfo: cannot find requested quadrature scheme.");
     }
     return out;
 }
@@ -350,8 +349,6 @@ void Quad_getNodesTri(int numQuadNodes, std::vector<double>& quadNodes, std::vec
     } else {
         // get scheme on [0.1]^2
         Quad_getNodesRec(numQuadNodes, quadNodes, quadWeights);
-        if (!noError())
-            return;
 
         // squeeze it:
         for (int i=0; i<numQuadNodes; i++) {
@@ -959,8 +956,6 @@ void Quad_getNodesTet(int numQuadNodes, std::vector<double>& quadNodes, std::vec
     } else {
         // get scheme on [0.1]^3
         Quad_getNodesHex(numQuadNodes, quadNodes, quadWeights);
-        if (!noError())
-            return;
 
         // squeeze it:
         for (int i=0; i<numQuadNodes; i++) {
@@ -978,7 +973,7 @@ void Quad_getNodesTet(int numQuadNodes, std::vector<double>& quadNodes, std::vec
             const double JA33 = (1./3.)*Q1*Q2-(1./2.)*(Q1+Q2)+1.;
             const double DET = JA11*JA22*JA33 + JA12*JA23*JA31 + JA13*JA21*JA32
                               -JA13*JA22*JA31 - JA11*JA23*JA32 - JA12*JA21*JA33;
-            quadWeights[i]=quadWeights[i]*ABS(DET);
+            quadWeights[i]=quadWeights[i]*std::abs(DET);
             QUADNODES(0,i)=Q1*((1./3.)*Q2*Q3-(1./2.)*(Q2+Q3)+1.);
             QUADNODES(1,i)=Q2*((1./3.)*Q1*Q3-(1./2.)*(Q1+Q3)+1.);
             QUADNODES(2,i)=Q3*((1./3.)*Q1*Q2-(1./2.)*(Q1+Q2)+1.);
@@ -1004,19 +999,17 @@ void Quad_getNodesRec(int numQuadNodes, std::vector<double>& quadNodes, std::vec
             Quad_getNodesLine(numQuadNodes1d, quadNodes1d, quadWeights1d);
 
             // make 2D scheme:
-            if (noError()) {
-                int l=0;
-                for (int i=0; i<numQuadNodes1d; i++) {
-                    for (int j=0; j<numQuadNodes1d; j++) {
-                        QUADNODES(0,l)=quadNodes1d[i];
-                        QUADNODES(1,l)=quadNodes1d[j];
-                        QUADWEIGHTS(l)=quadWeights1d[i]*quadWeights1d[j];
-                        l++;
-                    }
+            int l=0;
+            for (int i=0; i<numQuadNodes1d; i++) {
+                for (int j=0; j<numQuadNodes1d; j++) {
+                    QUADNODES(0,l)=quadNodes1d[i];
+                    QUADNODES(1,l)=quadNodes1d[j];
+                    QUADWEIGHTS(l)=quadWeights1d[i]*quadWeights1d[j];
+                    l++;
                 }
-                set=true;
-                break;
             }
+            set=true;
+            break;
         }
     }
     if (!set) {
@@ -1024,7 +1017,7 @@ void Quad_getNodesRec(int numQuadNodes, std::vector<double>& quadNodes, std::vec
         ss << "Quad_getNodesRec: Illegal number of quadrature nodes "
            << numQuadNodes << " on hexahedron.";
         const std::string msg(ss.str());
-        setError(VALUE_ERROR, msg.c_str());
+        throw escript::ValueError(msg);
     }
 #undef DIM
 }
@@ -1045,22 +1038,20 @@ void Quad_getNodesHex(int numQuadNodes, std::vector<double>& quadNodes, std::vec
             Quad_getNodesLine(numQuadNodes1d, quadNodes1d, quadWeights1d);
 
             // make 3D scheme:
-            if (noError()) {
-                int l=0;
-                for (int i=0; i<numQuadNodes1d; i++) {
-                    for (int j=0; j<numQuadNodes1d; j++) {
-                        for (int k=0; k<numQuadNodes1d; k++) {
-                            QUADNODES(0,l)=quadNodes1d[i];
-                            QUADNODES(1,l)=quadNodes1d[j];
-                            QUADNODES(2,l)=quadNodes1d[k];
-                            QUADWEIGHTS(l)=quadWeights1d[i]*quadWeights1d[j]*quadWeights1d[k];
-                            l++;
-                        }
+            int l=0;
+            for (int i=0; i<numQuadNodes1d; i++) {
+                for (int j=0; j<numQuadNodes1d; j++) {
+                    for (int k=0; k<numQuadNodes1d; k++) {
+                        QUADNODES(0,l)=quadNodes1d[i];
+                        QUADNODES(1,l)=quadNodes1d[j];
+                        QUADNODES(2,l)=quadNodes1d[k];
+                        QUADWEIGHTS(l)=quadWeights1d[i]*quadWeights1d[j]*quadWeights1d[k];
+                        l++;
                     }
                 }
-                set=true;
-                break;
             }
+            set=true;
+            break;
         }
     }
     if (!set) {
@@ -1068,7 +1059,7 @@ void Quad_getNodesHex(int numQuadNodes, std::vector<double>& quadNodes, std::vec
         ss << "Quad_getNodesHex: Illegal number of quadrature nodes "
            << numQuadNodes << " on hexahedron.";
         const std::string msg(ss.str());
-        setError(VALUE_ERROR, msg.c_str());
+        throw escript::ValueError(msg);
     }
 #undef DIM
 }
@@ -1079,7 +1070,7 @@ void Quad_getNodesHex(int numQuadNodes, std::vector<double>& quadNodes, std::vec
 void Quad_getNodesPoint(int numQuadNodes, std::vector<double>& quadNodes, std::vector<double>& quadWeights)
 {
     if (numQuadNodes<0)
-        setError(VALUE_ERROR,
+        throw escript::ValueError(
                 "Quad_getNodesPoint: Illegal number of quadrature nodes.");
 }
 
@@ -1229,8 +1220,7 @@ void Quad_getNodesLine(int numQuadNodes, std::vector<double>& quadNodes, std::ve
             break;
 
         default:
-            setError(VALUE_ERROR,"Quad_getNodesLine: Invalid integration order.");
-            break;
+            throw escript::ValueError("Quad_getNodesLine: Invalid integration order.");
     }
 }
 
@@ -1248,18 +1238,15 @@ int Quad_getNumNodesPoint(int order)
 int Quad_getNumNodesLine(int order)
 {
     if (order < 0) {
-        setError(VALUE_ERROR, "Quad_getNumNodesLine: Negative integration order.");
-        return -1;
+        throw escript::ValueError("Quad_getNumNodesLine: Negative integration order.");
     } else if (order > 2*MAX_numQuadNodesLine-1) {
         std::stringstream ss;
         ss << "Quad_getNumNodesLine: requested integration order "
            << order << " on line is too large (>"
            << 2*MAX_numQuadNodesLine-1 << ").";
         const std::string msg(ss.str());
-        setError(VALUE_ERROR, msg.c_str());
-        return -1;
+        throw escript::ValueError(msg);
     } else {
-        resetError();
         return order/2+1;
     }
 }
@@ -1286,22 +1273,14 @@ int Quad_getNumNodesTri(int order)
         return 19;
     } else {
         const int numQuadNodesLine=Quad_getNumNodesLine(order+1);
-        if (noError()) {
-            return numQuadNodesLine*numQuadNodesLine;
-        } else {
-            return -1;
-        }
+        return numQuadNodesLine*numQuadNodesLine;
     }
 }
 
 int Quad_getNumNodesRec(int order)
 {
     const int numQuadNodesLine=Quad_getNumNodesLine(order);
-    if (noError()) {
-        return numQuadNodesLine*numQuadNodesLine;
-    } else {
-        return -1;
-    }
+    return numQuadNodesLine*numQuadNodesLine;
 }
 
 int Quad_getNumNodesTet(int order)
@@ -1324,22 +1303,14 @@ int Quad_getNumNodesTet(int order)
         return 45;
     } else {
         const int numQuadNodesLine=Quad_getNumNodesLine(order+2);
-        if (noError()) {
-            return numQuadNodesLine*numQuadNodesLine*numQuadNodesLine;
-        } else {
-            return -1;
-        }
+        return numQuadNodesLine*numQuadNodesLine*numQuadNodesLine;
     }
 }
 
 int Quad_getNumNodesHex(int order)
 {
     const int numQuadNodesLine=Quad_getNumNodesLine(order);
-    if (noError()) {
-        return numQuadNodesLine*numQuadNodesLine*numQuadNodesLine;
-    } else {
-        return -1;
-    }
+    return numQuadNodesLine*numQuadNodesLine*numQuadNodesLine;
 }
 
 int Quad_MacroPoint(int numSubElements, int numQuadNodes,
@@ -1359,7 +1330,7 @@ int Quad_MacroLine(int numSubElements, int numQuadNodes,
 {
 #define DIM 1
     if (new_len < numSubElements*numQuadNodes) {
-        setError(MEMORY_ERROR, "Quad_MacroLine: array for new quadrature scheme is too small");
+        throw FinleyException("Quad_MacroLine: array for new quadrature scheme is too small");
     }
     const double f=1./((double)numSubElements);
 
@@ -1388,8 +1359,7 @@ int Quad_MacroTri(int numSubElements, int numQuadNodes,
 {
 #define DIM 2
     if (new_len < numSubElements*numQuadNodes) {
-        setError(MEMORY_ERROR, "Quad_MacroTri: array for new quadrature scheme is too small");
-        return -1;
+        throw FinleyException("Quad_MacroTri: array for new quadrature scheme is too small");
     }
 
     if (numSubElements==1) {
@@ -1447,8 +1417,7 @@ int Quad_MacroTri(int numSubElements, int numQuadNodes,
             }
         }
     } else {
-        setError(VALUE_ERROR,"Quad_MacroTri: unable to create quadrature scheme for macro element.");
-        return -1;
+        throw escript::ValueError("Quad_MacroTri: unable to create quadrature scheme for macro element.");
     }
     return numSubElements*numQuadNodes;
 #undef DIM
@@ -1462,8 +1431,7 @@ int Quad_MacroRec(int numSubElements, int numQuadNodes,
 {
 #define DIM 2
     if (new_len < numSubElements*numQuadNodes) {
-        setError(MEMORY_ERROR, "Quad_MacroRec: array for new quadrature scheme is too small");
-        return -1;
+        throw FinleyException("Quad_MacroRec: array for new quadrature scheme is too small");
     }
 
     if (numSubElements==1) {
@@ -1521,8 +1489,7 @@ int Quad_MacroRec(int numSubElements, int numQuadNodes,
             }
         }
     } else {
-        setError(VALUE_ERROR, "Quad_MacroRec: unable to create quadrature scheme for macro element.");
-        return -1;
+        throw escript::ValueError("Quad_MacroRec: unable to create quadrature scheme for macro element.");
     }
     return numSubElements*numQuadNodes;
 #undef DIM
@@ -1535,8 +1502,7 @@ int Quad_MacroTet(int numSubElements, int numQuadNodes, const double* quadNodes,
 {
 #define DIM 3
     if (new_len < numSubElements*numQuadNodes) {
-        setError(MEMORY_ERROR, "Quad_MacroTet: array for new quadrature scheme is too small");
-        return -1;
+        throw FinleyException("Quad_MacroTet: array for new quadrature scheme is too small");
     }
 
     if (numSubElements==1) {
@@ -1644,8 +1610,7 @@ int Quad_MacroTet(int numSubElements, int numQuadNodes, const double* quadNodes,
             }
         }
     } else {
-        setError(VALUE_ERROR, "Quad_MacroTet: unable to create quadrature scheme for macro element.");
-        return -1;
+        throw escript::ValueError("Quad_MacroTet: unable to create quadrature scheme for macro element.");
     }
     return numSubElements*numQuadNodes;
 #undef DIM
@@ -1658,8 +1623,7 @@ int Quad_MacroHex(int numSubElements, int numQuadNodes, const double* quadNodes,
 {
 #define DIM 3
     if (new_len < numSubElements*numQuadNodes) {
-        setError(MEMORY_ERROR, "Quad_MacroHex: array for new quadrature scheme is too small");
-        return -1;
+        throw FinleyException("Quad_MacroHex: array for new quadrature scheme is too small");
     }
 
     if (numSubElements==1) {
@@ -1767,8 +1731,7 @@ int Quad_MacroHex(int numSubElements, int numQuadNodes, const double* quadNodes,
             }
         }
     } else {
-        setError(VALUE_ERROR, "Quad_MacroHex: unable to create quadrature scheme for macro element.");
-        return -1;
+        throw escript::ValueError("Quad_MacroHex: unable to create quadrature scheme for macro element.");
     }
     return numSubElements*numQuadNodes;
 #undef DIM
diff --git a/finley/src/RectangularMesh.h b/finley/src/RectangularMesh.h
deleted file mode 100644
index 6169767..0000000
--- a/finley/src/RectangularMesh.h
+++ /dev/null
@@ -1,56 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************
-
-  Finley: declaration of rectangular mesh generators in 1D, 2D, 3D.
-
-*****************************************************************************/
-
-#ifndef __FINLEY_RECTANGULARMESH_H__
-#define __FINLEY_RECTANGULARMESH_H__
-
-#include "Mesh.h"
-
-namespace finley {
-
-Mesh* RectangularMesh_Hex20(const dim_t* numElements, const double* length,
-                            const bool* periodic, int order, int reducedOrder,
-                            bool useElementsOnFace, bool useFullElementOrder,
-                            bool useMacroElements, bool optimize,
-                            esysUtils::JMPI& mpi_info);
-
-Mesh* RectangularMesh_Hex8(const dim_t* numElements, const double* length,
-                           const bool* periodic, int order, int reducedOrder,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool useMacroElements, esysUtils::JMPI& mpi_info);
-
-Mesh* RectangularMesh_Rec8(const dim_t* numElements, const double* length,
-                           const bool* periodic, int order, int reducedOrder,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool useMacroElements, bool optimize,
-                           esysUtils::JMPI& mpi_info);
-
-Mesh* RectangularMesh_Rec4(const dim_t* numElements, const double* length,
-                           const bool* periodic, int order, int reducedOrder,
-                           bool useElementsOnFace, bool useFullElementOrder,
-                           bool useMacroElements, esysUtils::JMPI& mpi_info);
-
-}
-
-#endif // __FINLEY_RECTANGULARMESH_H__
-
diff --git a/finley/src/ReferenceElementSets.h b/finley/src/ReferenceElementSets.h
index 7f0d9a0..ac961a6 100644
--- a/finley/src/ReferenceElementSets.h
+++ b/finley/src/ReferenceElementSets.h
@@ -29,23 +29,17 @@ struct ReferenceElementSet {
         const ReferenceElementInfo* id_info = ReferenceElement::getInfo(id);
         const ShapeFunctionInfo* bf_info = ShapeFunction::getInfo(
                                                     id_info->BasisFunctions);
-        if (!noError()) return;
-
         if (order<0)
             order=std::max(2*bf_info->numOrder, 0);
 
         referenceElement.reset(new ReferenceElement(id, order));
-        if (noError()) {
-            if (reduced_order<0)
-                reduced_order=std::max(2*(bf_info->numOrder-1), 0);
-            referenceElementReducedQuadrature.reset(
-                    new ReferenceElement(id, reduced_order));
-        }
+        if (reduced_order<0)
+            reduced_order=std::max(2*(bf_info->numOrder-1), 0);
+        referenceElementReducedQuadrature.reset(new ReferenceElement(id,
+                                                             reduced_order));
 
-        if (noError()) {
-            if (referenceElement->getNumNodes() != referenceElementReducedQuadrature->getNumNodes()) {
-                setError(VALUE_ERROR, "ReferenceElementSet: numNodes in referenceElement and referenceElementReducedQuadrature don't match.");
-            }
+        if (referenceElement->getNumNodes() != referenceElementReducedQuadrature->getNumNodes()) {
+            throw escript::ValueError("ReferenceElementSet: numNodes in referenceElement and referenceElementReducedQuadrature don't match.");
         }
     }
 
diff --git a/finley/src/ReferenceElements.cpp b/finley/src/ReferenceElements.cpp
index 119ed8f..64621f5 100644
--- a/finley/src/ReferenceElements.cpp
+++ b/finley/src/ReferenceElements.cpp
@@ -22,9 +22,9 @@
 *****************************************************************************/
 
 #include "ReferenceElements.h"
-#include "esysUtils/mem.h"
-#include <cstring>
+
 #include <algorithm> // std::max
+#include <cstring>
 
 namespace finley {
 
@@ -581,13 +581,11 @@ ReferenceElement::ReferenceElement(ElementTypeId id, int order) :
 {
     Type = getInfo(id);
     if (!Type) {
-        setError(VALUE_ERROR, "ReferenceElement: unable to identify element type.");
-        return;
+        throw escript::ValueError("ReferenceElement: unable to identify element type.");
     }
     LinearType = getInfo(Type->LinearTypeId);
     if (!LinearType) {
-        setError(VALUE_ERROR, "ReferenceElement: unable to identify linear element type.");
-        return;
+        throw escript::ValueError("ReferenceElement: unable to identify linear element type.");
     }
 
     const QuadInfo* quadscheme=QuadInfo_getInfo(Type->Quadrature);
@@ -632,10 +630,8 @@ ReferenceElement::ReferenceElement(ElementTypeId id, int order) :
                 &BasisFunctions->dSdv[0],
                 numQuadNodes*nsub, &quadNodes2[0], &quadWeights2[0],
                 DBasisFunctionDv);
-        if (noError()) {
-            Parametrization.reset(new ShapeFunction(parametrization->TypeId, quadscheme->numDim, numQuadNodes2, quadNodes2, quadWeights2));
-            LinearBasisFunctions.reset(new ShapeFunction(linearbasisfunction->TypeId, quadscheme->numDim, numQuadNodes2, quadNodes2, quadWeights2));
-        }
+        Parametrization.reset(new ShapeFunction(parametrization->TypeId, quadscheme->numDim, numQuadNodes2, quadNodes2, quadWeights2));
+        LinearBasisFunctions.reset(new ShapeFunction(linearbasisfunction->TypeId, quadscheme->numDim, numQuadNodes2, quadNodes2, quadWeights2));
     } else {
         Parametrization.reset(new ShapeFunction(parametrization->TypeId, quadscheme->numDim, numQuadNodes*nsub, quadNodes, quadWeights));
         BasisFunctions.reset(new ShapeFunction(basisfunction->TypeId, quadscheme->numDim, numQuadNodes, quadNodes, quadWeights));
@@ -674,7 +670,7 @@ const ReferenceElementInfo* ReferenceElement::getInfo(ElementTypeId id)
         ptr++;
     }
     if (out==NULL) {
-        setError(VALUE_ERROR, "ReferenceElement::getInfo: cannot find requested reference element.");
+        throw escript::ValueError("ReferenceElement::getInfo: cannot find requested reference element.");
     }
     return out;
 }
diff --git a/finley/src/SConscript b/finley/src/SConscript
index 497aa6f..f209c30 100644
--- a/finley/src/SConscript
+++ b/finley/src/SConscript
@@ -14,15 +14,9 @@
 #
 ##############################################################################
 
-import os
 Import('*')
 
-local_env = env.Clone()
-py_wrapper_local_env = env.Clone()
-
-# Remove the sharedlibrary prefix on all platform - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'finley'
 
 sources = """
     Assemble_AverageElementData.cpp
@@ -48,14 +42,15 @@ sources = """
     Assemble_integrate.cpp
     Assemble_interpolate.cpp
     Assemble_jacobians.cpp
+    DomainFactory.cpp
     ElementFile.cpp
     ElementFile_jacobians.cpp
-    Finley.cpp
+    FinleyDomain.cpp
     IndexList.cpp
-    Mesh.cpp
     Mesh_addPoints.cpp
     Mesh_findMatchingFaces.cpp
-    Mesh_getPattern.cpp
+    Mesh_getPasoPattern.cpp
+    Mesh_getTrilinosGraph.cpp
     Mesh_glueFaces.cpp
     Mesh_hex20.cpp
     Mesh_hex8.cpp
@@ -72,79 +67,60 @@ sources = """
     ReferenceElements.cpp
     ShapeFunctions.cpp
     Util.cpp
-    CPPAdapter/FinleyAdapterException.cpp
-    CPPAdapter/MeshAdapter.cpp
-    CPPAdapter/MeshAdapterFactory.cpp
 """.split()
 
 headers = """
     Assemble.h
+    DomainFactory.h
     ElementFile.h
     Finley.h
+    FinleyDomain.h
+    FinleyException.h
     FinleyVersion.h
     IndexList.h
-    Mesh.h
     NodeFile.h
     NodeMapping.h
     Quadrature.h
-    RectangularMesh.h
     ReferenceElements.h
     ShapeFunctions.h
     Util.h
     ReferenceElementSets.h
 """.split()
 
-cppadapter_headers = """
-    CPPAdapter/FinleyAdapterException.h
-    CPPAdapter/MeshAdapter.h
-    CPPAdapter/MeshAdapterFactory.h
-    CPPAdapter/system_dep.h
-""".split()
-
-local_env.Prepend(LIBS = ['pasowrap', 'escript', 'paso', 'esysUtils'])
+local_env = env.Clone()
 
 if IS_WINDOWS:
     local_env.Append(CPPDEFINES = ['FINLEY_EXPORTS'])
 
-module_name = 'finley'
+# collect dependencies for other modules
+finleylibs = []
+finleylibs += env['escript_libs']
+if env['parmetis']:
+    finleylibs += env['parmetis_libs']
+if env['paso']:
+    finleylibs += env['paso_libs']
+if env['trilinos']:
+    finleylibs += env['trilinoswrap_libs']
 
-lib = local_env.SharedLibrary(module_name, sources)
-env.Alias('build_finley_lib', lib)
+local_env.PrependUnique(LIBS = finleylibs)
 
-include_path = Dir('finley', local_env['incinstall'])
-cppadapter_include_path = Dir('CppAdapter', include_path)
+env['finley_libs'] = [module_name] + finleylibs
 
-hdr_inst1 = local_env.Install(include_path, headers)
-hdr_inst2 = local_env.Install(cppadapter_include_path, cppadapter_headers)
-env.Alias('install_finley_headers', [hdr_inst1, hdr_inst2])
+include_path = Dir(module_name, local_env['incinstall'])
+hdr_inst = local_env.Install(include_path, headers)
 
+lib = local_env.SharedLibrary(module_name, sources)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_finley_lib', lib_inst)
 
 ### Python wrapper ###
-if not env['build_shared']:
-    py_wrapper_local_env.Prepend(LIBS = ['finley', 'pasowrap', 'escript', 'esysUtils'])
-else:
-    py_wrapper_local_env.Prepend(LIBS = ['finley', 'pasowrap', 'escript', 'paso', 'esysUtils'])
-
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'CPPAdapter/finleycpp.cpp')
-env.Alias('build_finleycpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_finleycpp_lib', mod_inst)
+py_env = env.Clone()
+py_env.PrependUnique(LIBS = env['finley_libs'])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'finleycpp.cpp')
 
-# configure python module
-local_env.SConscript(dirs = ['#/finley/py_src'], variant_dir='py', duplicate=0)
+mod_path = Dir(module_name, local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
 
-# configure unit tests
-local_env.SConscript(dirs = ['#/finley/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
+build = env.Alias('build_finley', [hdr_inst, lib, py_lib])
+env.Alias('install_finley', [build, lib_inst, mod_inst])
 
diff --git a/finley/src/ShapeFunctions.cpp b/finley/src/ShapeFunctions.cpp
index 59c22e7..cf8774e 100644
--- a/finley/src/ShapeFunctions.cpp
+++ b/finley/src/ShapeFunctions.cpp
@@ -22,8 +22,9 @@
 *****************************************************************************/
 
 #include "ShapeFunctions.h"
-#include "esysUtils/mem.h"
-#include "esysUtils/index.h"
+
+#include <escript/index.h>
+
 #include <cstring>
 
 namespace finley {
@@ -66,8 +67,7 @@ ShapeFunction::ShapeFunction(ShapeFunctionTypeId id, int numQDim,
     const int numShapes=ShapeFunction_InfoList[id].numShapes;
 
     if (numQDim>numDim) {
-        setError(VALUE_ERROR, "ShapeFunction: number of spatial dimensions of quadrature scheme is larger than the spatial dimensionality of shape function.");
-        return;
+        throw escript::ValueError("ShapeFunction: number of spatial dimensions of quadrature scheme is larger than the spatial dimensionality of shape function.");
     }
 
     Type=getInfo(id);
@@ -111,7 +111,7 @@ const ShapeFunctionInfo* ShapeFunction::getInfo(ShapeFunctionTypeId id)
        idx++;
     }
     if (out==NULL) {
-        setError(VALUE_ERROR, "ShapeFunction::getInfo: cannot find requested shape function");
+        throw escript::ValueError("ShapeFunction::getInfo: cannot find requested shape function");
     }
     return out;
 }
diff --git a/finley/src/Util.cpp b/finley/src/Util.cpp
index 36b9a96..cdf358c 100644
--- a/finley/src/Util.cpp
+++ b/finley/src/Util.cpp
@@ -14,26 +14,18 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Some utility routines
-
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include "Finley.h"
 #include "Util.h"
-#include "esysUtils/index.h"
+
+#include <escript/index.h>
 
 #include <algorithm> // std::sort
-#include <limits>
 
 namespace finley {
 namespace util {
 
+using escript::DataTypes::real_t;
+using escript::DataTypes::cplx_t;
+
 /// comparison function for sortValueAndIndex
 bool ValueAndIndexCompare(const std::pair<int,int> &i, const std::pair<int, int> &j)
 {
@@ -43,66 +35,66 @@ bool ValueAndIndexCompare(const std::pair<int,int> &i, const std::pair<int, int>
     return i.first < j.first;
 }
 
-/// orders a ValueAndIndexList by value
 void sortValueAndIndex(ValueAndIndexList& array)
 {
     std::sort(array.begin(), array.end(), ValueAndIndexCompare);
 }
 
-/// gathers values into vector out from vector in using index:
-///   out(1:numData, 1:len) := in(1:numData, index(1:len))
-void gather(dim_t len, const index_t* index, dim_t numData, const double* in, double* out)
+void gather(int len, const index_t* index, int numData, const double* in,
+            double* out)
 {
-    for (index_t s=0; s<len; s++) {
-        for (index_t i=0; i<numData; i++) {
-            out[INDEX2(i,s,numData)] = in[INDEX2(i,index[s],numData)];
+    for (int s = 0; s < len; s++) {
+        for (int i = 0; i < numData; i++) {
+            out[INDEX2(i, s, numData)] = in[INDEX2(i, index[s], numData)];
         }
     }
 }
 
-/// adds a vector in into out using an index:
-///   out(1:numData,index[p])+=in(1:numData,p) where
-///   p={k=1...len, index[k]<upperBound}
-void addScatter(dim_t len, const index_t* index, dim_t numData,
-                const double* in, double* out, index_t upperBound)
+template<typename Scalar>
+void addScatter(int len, const index_t* index, int numData,
+                const Scalar* in, Scalar* out, index_t upperBound)
 {
-    for (index_t s=0; s<len; s++) {
-        for (index_t i=0; i<numData; i++) {
+    for (int s = 0; s < len; s++) {
+        for (int i = 0; i < numData; i++) {
             if (index[s] < upperBound) {
-                out[INDEX2(i,index[s],numData)]+=in[INDEX2(i,s,numData)];
+                out[INDEX2(i, index[s], numData)] += in[INDEX2(i, s, numData)];
             }
         }
     }
 }
 
-/// multiplies two matrices: A(1:A1,1:A2) := B(1:A1,1:B2)*C(1:B2,1:A2)
+template
+void addScatter<real_t>(int len, const index_t* index, int numData,
+                        const real_t* in, real_t* out, index_t upperBound);
+template
+void addScatter<cplx_t>(int len, const index_t* index, int numData,
+                        const cplx_t* in, cplx_t* out, index_t upperBound);
+
 void smallMatMult(int A1, int A2, double* A, int B2,
                   const std::vector<double>& B,
                   const std::vector<double>& C)
 {
-    for (int i=0; i<A1; i++) {
-        for (int j=0; j<A2; j++) {
-            double sum=0.;
-            for (int s=0; s<B2; s++)
-                sum+=B[INDEX2(i,s,A1)]*C[INDEX2(s,j,B2)];
-            A[INDEX2(i,j,A1)]=sum;
+    for (int i = 0; i < A1; i++) {
+        for (int j = 0; j < A2; j++) {
+            double sum = 0.;
+            for (int s = 0; s < B2; s++)
+                sum += B[INDEX2(i,s,A1)] * C[INDEX2(s,j,B2)];
+            A[INDEX2(i,j,A1)] = sum;
         }
     }
 }
 
-/// multiplies a set of matrices with a single matrix:
-///   A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2) for i=1,len
 void smallMatSetMult1(int len, int A1, int A2, double* A, int B2,
                       const std::vector<double>& B,
                       const std::vector<double>& C)
 {
-    for (int q=0; q<len; q++) {
-        for (int i=0; i<A1; i++) {
-            for (int j=0; j<A2; j++) {
-                double sum=0.;
-                for (int s=0; s<B2; s++)
-                    sum+=B[INDEX3(i,s,q,A1,B2)]*C[INDEX2(s,j,B2)];
-                A[INDEX3(i,j,q,A1,A2)]=sum;
+    for (int q = 0; q < len; q++) {
+        for (int i = 0; i < A1; i++) {
+            for (int j = 0; j < A2; j++) {
+                double sum = 0.;
+                for (int s = 0; s < B2; s++)
+                    sum += B[INDEX3(i,s,q,A1,B2)] * C[INDEX2(s,j,B2)];
+                A[INDEX3(i,j,q,A1,A2)] = sum;
             }
         }
     }
@@ -116,12 +108,11 @@ void invertSmallMat(int len, int dim, const double* A, double *invA, double* det
         case 1:
             for (int q=0; q<len; q++) {
                 const double D=A[q];
-                if (ABS(D) > 0) {
+                if (std::abs(D) > 0) {
                     det[q]=D;
                     invA[q]=1./D;
                 } else {
-                    setError(ZERO_DIVISION_ERROR, "InvertSmallMat: Non-regular matrix");
-                    break;
+                    throw escript::ValueError("InvertSmallMat: Non-regular matrix");
                 }
             }
             break;
@@ -134,15 +125,14 @@ void invertSmallMat(int len, int dim, const double* A, double *invA, double* det
                 const double A22=A[INDEX3(1,1,q,2,2)];
 
                 const double D = A11*A22-A12*A21;
-                if (ABS(D) > 0) {
+                if (std::abs(D) > 0) {
                     det[q]=D;
                     invA[INDEX3(0,0,q,2,2)]= A22/D;
                     invA[INDEX3(1,0,q,2,2)]=-A21/D;
                     invA[INDEX3(0,1,q,2,2)]=-A12/D;
                     invA[INDEX3(1,1,q,2,2)]= A11/D;
                 } else {
-                    setError(ZERO_DIVISION_ERROR, "InvertSmallMat: Non-regular matrix");
-                    break;
+                    throw escript::ValueError("InvertSmallMat: Non-regular matrix");
                 }
             }
             break;
@@ -160,7 +150,7 @@ void invertSmallMat(int len, int dim, const double* A, double *invA, double* det
                 const double A33=A[INDEX3(2,2,q,3,3)];
 
                 const double D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
-                if (ABS(D) > 0) {
+                if (std::abs(D) > 0) {
                     det[q]=D;
                     invA[INDEX3(0,0,q,3,3)]=(A22*A33-A23*A32)/D;
                     invA[INDEX3(1,0,q,3,3)]=(A31*A23-A21*A33)/D;
@@ -172,64 +162,60 @@ void invertSmallMat(int len, int dim, const double* A, double *invA, double* det
                     invA[INDEX3(1,2,q,3,3)]=(A13*A21-A11*A23)/D;
                     invA[INDEX3(2,2,q,3,3)]=(A11*A22-A12*A21)/D;
                 } else {
-                    setError(ZERO_DIVISION_ERROR, "InvertSmallMat: Non-regular matrix");
-                    break;
+                    throw escript::ValueError("InvertSmallMat: Non-regular matrix");
                 }
             }
             break;
 
         default:
-            setError(VALUE_ERROR, "InvertSmallMat: dim must be <=3");
+            throw escript::ValueError("InvertSmallMat: dim must be <=3");
             break;
     }
 }
 
-/// returns the normalized vector Normal[dim,len] orthogonal to A(:,0,q) and
-/// A(:,1,q) in the case of dim=3, or the vector A(:,0,q) in the case of dim=2
 void normalVector(int len, int dim, int dim1, const double* A, double* Normal)
 {
     int q;
-    double A11,A12,CO_A13,A21,A22,CO_A23,A31,A32,CO_A33,length,invlength;
 
-    switch(dim) {
+    switch (dim) {
         case 1:
-            for (q=0;q<len;q++) Normal[q]=1;
+            for (q = 0; q < len; q++)
+                Normal[q] = 1.;
             break;
         case 2:
-            for (q=0;q<len;q++) {
-                A11=A[INDEX3(0,0,q,2,dim1)];
-                A21=A[INDEX3(1,0,q,2,dim1)];
-                length = sqrt(A11*A11+A21*A21);
+            for (q = 0; q < len; q++) {
+                const double A11 = A[INDEX3(0,0,q,2,dim1)];
+                const double A21 = A[INDEX3(1,0,q,2,dim1)];
+                const double length = sqrt(A11*A11+A21*A21);
                 if (length <= 0) {
-                    setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero.");
-                    return;
+                    throw FinleyException("normalVector: area equals zero.");
                 } else {
-                    invlength=1./length;
-                    Normal[INDEX2(0,q,2)]=A21*invlength;
-                    Normal[INDEX2(1,q,2)]=-A11*invlength;
+                    const double invlength = 1./length;
+                    Normal[INDEX2(0,q,2)] =  A21*invlength;
+                    Normal[INDEX2(1,q,2)] = -A11*invlength;
                 }
             }
             break;
         case 3:
-            for (q=0;q<len;q++) {
-                A11=A[INDEX3(0,0,q,3,dim1)];
-                A21=A[INDEX3(1,0,q,3,dim1)];
-                A31=A[INDEX3(2,0,q,3,dim1)];
-                A12=A[INDEX3(0,1,q,3,dim1)];
-                A22=A[INDEX3(1,1,q,3,dim1)];
-                A32=A[INDEX3(2,1,q,3,dim1)];
-                CO_A13=A21*A32-A31*A22;
-                CO_A23=A31*A12-A11*A32;
-                CO_A33=A11*A22-A21*A12;
-                length=sqrt(CO_A13*CO_A13+CO_A23*CO_A23+CO_A33*CO_A33);
+            for (q = 0; q < len; q++) {
+                const double A11 = A[INDEX3(0,0,q,3,dim1)];
+                const double A21 = A[INDEX3(1,0,q,3,dim1)];
+                const double A31 = A[INDEX3(2,0,q,3,dim1)];
+                const double A12 = A[INDEX3(0,1,q,3,dim1)];
+                const double A22 = A[INDEX3(1,1,q,3,dim1)];
+                const double A32 = A[INDEX3(2,1,q,3,dim1)];
+                const double CO_A13 = A21*A32-A31*A22;
+                const double CO_A23 = A31*A12-A11*A32;
+                const double CO_A33 = A11*A22-A21*A12;
+                const double length = sqrt(CO_A13*CO_A13 + CO_A23*CO_A23
+                                           + CO_A33*CO_A33);
                 if (length <= 0) {
-                    setError(ZERO_DIVISION_ERROR, __FILE__ ": area equals zero.");
-                    return;
+                    throw FinleyException("normalVector: area equals zero.");
                 } else {
-                    invlength=1./length;
-                    Normal[INDEX2(0,q,3)]=CO_A13*invlength;
-                    Normal[INDEX2(1,q,3)]=CO_A23*invlength;
-                    Normal[INDEX2(2,q,3)]=CO_A33*invlength;
+                    const double invlength = 1./length;
+                    Normal[INDEX2(0,q,3)] = CO_A13*invlength;
+                    Normal[INDEX2(1,q,3)] = CO_A23*invlength;
+                    Normal[INDEX2(2,q,3)] = CO_A33*invlength;
                 }
             }
             break;
@@ -241,17 +227,17 @@ index_t getMinInt(int dim, dim_t N, const index_t* values)
 {
     index_t out = std::numeric_limits<index_t>::max();
     if (values && dim*N > 0) {
-        out=values[0];
+        out = values[0];
 #pragma omp parallel
         {
-            index_t out_local=out;
+            index_t out_local = out;
 #pragma omp for
             for (index_t j=0; j<N; j++) {
                 for (int i=0; i<dim; i++)
-                    out_local=std::min(out_local, values[INDEX2(i,j,dim)]);
+                    out_local = std::min(out_local, values[INDEX2(i,j,dim)]);
             }
 #pragma omp critical
-            out=std::min(out_local, out);
+            out = std::min(out_local, out);
         }
     }
     return out;
@@ -278,68 +264,64 @@ index_t getMaxInt(int dim, dim_t N, const index_t* values)
     return out;
 }
 
-std::pair<index_t,index_t> getMinMaxInt(int dim, dim_t N, const index_t* values)
+IndexPair getMinMaxInt(int dim, dim_t N, const index_t* values)
 {
-    index_t vmin = std::numeric_limits<index_t>::max();
-    index_t vmax = std::numeric_limits<index_t>::min();
+    index_t vmin = escript::DataTypes::index_t_max();
+    index_t vmax = escript::DataTypes::index_t_min();
     if (values && dim*N > 0) {
         vmin = vmax = values[0];
 #pragma omp parallel
         {
-            index_t vmin_local=vmin;
-            index_t vmax_local=vmax;
+            index_t vmin_local = vmin;
+            index_t vmax_local = vmax;
 #pragma omp for
-            for (index_t j=0; j<N; j++) {
-                for (int i=0; i<dim; i++) {
-                    vmin_local=std::min(vmin_local, values[INDEX2(i,j,dim)]);
-                    vmax_local=std::max(vmax_local, values[INDEX2(i,j,dim)]);
+            for (index_t j = 0; j < N; j++) {
+                for (int i = 0; i < dim; i++) {
+                    vmin_local = std::min(vmin_local, values[INDEX2(i,j,dim)]);
+                    vmax_local = std::max(vmax_local, values[INDEX2(i,j,dim)]);
                 }
             }
 #pragma omp critical
             {
-                vmin=std::min(vmin_local, vmin);
-                vmax=std::max(vmax_local, vmax);
+                vmin = std::min(vmin_local, vmin);
+                vmax = std::max(vmax_local, vmax);
             }
         }
     }
-    return std::pair<index_t,index_t>(vmin,vmax);
+    return IndexPair(vmin,vmax);
 }
 
-/// calculates the minimum and maximum value from an integer array of length N
-/// disregarding the value 'ignore'
-std::pair<index_t,index_t> getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore)
+IndexPair getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore)
 {
-    index_t vmin = std::numeric_limits<index_t>::max();
-    index_t vmax = std::numeric_limits<index_t>::min();
+    index_t vmin = escript::DataTypes::index_t_max();
+    index_t vmax = escript::DataTypes::index_t_min();
     if (values && N > 0) {
         vmin = vmax = values[0];
 #pragma omp parallel
         {
-            index_t vmin_local=vmin;
-            index_t vmax_local=vmax;
+            index_t vmin_local = vmin;
+            index_t vmax_local = vmax;
 #pragma omp for
-            for (index_t i=0; i<N; i++) {
+            for (index_t i = 0; i < N; i++) {
                 if (values[i] != ignore) {
-                    vmin_local=std::min(vmin_local, values[i]);
-                    vmax_local=std::max(vmax_local, values[i]);
+                    vmin_local = std::min(vmin_local, values[i]);
+                    vmax_local = std::max(vmax_local, values[i]);
                 }
             }
 #pragma omp critical
             {
-                vmin=std::min(vmin_local, vmin);
-                vmax=std::max(vmax_local, vmax);
+                vmin = std::min(vmin_local, vmin);
+                vmax = std::max(vmax_local, vmax);
             }
         }
     }
-    return std::pair<index_t,index_t>(vmin,vmax);
+    return IndexPair(vmin,vmax);
 }
 
-/// determines the indices of the positive entries in mask returning the
-/// length of index.
 std::vector<index_t> packMask(const std::vector<short>& mask)
 {
     std::vector<index_t> index;
-    for (index_t k=0; k<mask.size(); k++) {
+    for (index_t k = 0; k < mask.size(); k++) {
         if (mask[k] >= 0) {
             index.push_back(k);
         }
@@ -347,43 +329,45 @@ std::vector<index_t> packMask(const std::vector<short>& mask)
     return index;
 }
 
-void setValuesInUse(const int *values, const int numValues,
-                    std::vector<int>& valuesInUse, esysUtils::JMPI& mpiinfo)
+void setValuesInUse(const int* values, dim_t numValues,
+                    std::vector<int>& valuesInUse, escript::JMPI mpiinfo)
 {
-    int lastFoundValue=INDEX_T_MIN;
-    bool allFound=false;
+    const int MAX_VALUE = std::numeric_limits<int>::max();
+    int lastFoundValue = std::numeric_limits<int>::min();
+    bool allFound = false;
 
     valuesInUse.clear();
 
     while (!allFound) {
         // find smallest value bigger than lastFoundValue
-        int minFoundValue = INDEX_T_MAX;
+        int minFoundValue = MAX_VALUE;
 #pragma omp parallel
         {
-            int local_minFoundValue=minFoundValue;
+            int local_minFoundValue = minFoundValue;
 #pragma omp for
-            for (int i=0; i<numValues; i++) {
-                const int val=values[i];
-                if ((val>lastFoundValue) && (val<local_minFoundValue))
-                    local_minFoundValue=val;
+            for (index_t i = 0; i < numValues; i++) {
+                const int val = values[i];
+                if (val > lastFoundValue && val < local_minFoundValue)
+                    local_minFoundValue = val;
             }
 #pragma omp critical
             {
-                if (local_minFoundValue<minFoundValue)
-                    minFoundValue=local_minFoundValue;
+                if (local_minFoundValue < minFoundValue)
+                    minFoundValue = local_minFoundValue;
             }
         }
 #ifdef ESYS_MPI
-        int local_minFoundValue=minFoundValue;
-        MPI_Allreduce(&local_minFoundValue, &minFoundValue, 1, MPI_INT, MPI_MIN, mpiinfo->comm);
+        int local_minFoundValue = minFoundValue;
+        MPI_Allreduce(&local_minFoundValue, &minFoundValue, 1, MPI_INT,
+                      MPI_MIN, mpiinfo->comm);
 #endif
 
         // if we found a new value we need to add this to valuesInUse
-        if (minFoundValue < INDEX_T_MAX) {
+        if (minFoundValue < MAX_VALUE) {
             valuesInUse.push_back(minFoundValue);
-            lastFoundValue=minFoundValue;
+            lastFoundValue = minFoundValue;
         } else {
-            allFound=true;
+            allFound = true;
         }
     }
 }
diff --git a/finley/src/Util.h b/finley/src/Util.h
index b21177f..eb939b9 100644
--- a/finley/src/Util.h
+++ b/finley/src/Util.h
@@ -14,12 +14,7 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************
-
-  Some utility routines
-
-*****************************************************************************/
+/// Some utility routines
 
 #ifndef __FINLEY_UTIL_H__
 #define __FINLEY_UTIL_H__
@@ -31,11 +26,10 @@
 namespace finley {
 namespace util {
 
-typedef std::vector< std::pair<int,int> > ValueAndIndexList;
+typedef std::pair<index_t,index_t> IndexPair;
+typedef std::vector<IndexPair> ValueAndIndexList;
 
-/// sortValueAndIndex is used to sort items by a value.
-/// index points to the location of the original item array and can be used
-/// to reorder the array
+/// orders a ValueAndIndexList by value.
 void sortValueAndIndex(ValueAndIndexList& array);
 
 /// returns true if the data object is defined on reduced element types
@@ -47,16 +41,25 @@ inline bool hasReducedIntegrationOrder(const escript::Data& in)
                 || fs == FINLEY_REDUCED_CONTACT_ELEMENTS_2);
 }
 
-void gather(dim_t len, const index_t* index, dim_t numData, const double* in,
+/// gathers values into array `out` from array `in` using `index`:
+///   out(1:numData, 1:len) := in(1:numData, index(1:len))
+void gather(int len, const index_t* index, int numData, const double* in,
             double* out);
 
-void addScatter(dim_t len, const index_t* index, dim_t numData,
-                const double* in, double* out, index_t upperBound);
+/// adds array `in` into `out` using an `index`:
+///   out(1:numData,index[p])+=in(1:numData,p) where
+///   p={k=1...len, index[k]<upperBound}
+template<typename Scalar>
+void addScatter(int len, const index_t* index, int numData,
+                const Scalar* in, Scalar* out, index_t upperBound);
 
+/// multiplies two matrices: A(1:A1,1:A2) := B(1:A1,1:B2)*C(1:B2,1:A2)
 void smallMatMult(int A1, int A2, double* A, int B2,
                   const std::vector<double>& B,
                   const std::vector<double>& C);
 
+/// multiplies a set of matrices with a single matrix:
+///   A(1:A1,1:A2,i)=B(1:A1,1:B2,i)*C(1:B2,1:A2) for i=1,len
 void smallMatSetMult1(int len, int A1, int A2, double* A, int B2,
                       const std::vector<double>& B,
                       const std::vector<double>& C);
@@ -64,20 +67,28 @@ void smallMatSetMult1(int len, int A1, int A2, double* A, int B2,
 void invertSmallMat(int len, int dim, const double* A, double *invA,
                     double* det);
 
+/// returns the normalized vector normal[dim,len] orthogonal to A(:,0,q) and
+/// A(:,1,q) in the case of dim=3, or the vector A(:,0,q) in the case of dim=2
 void normalVector(int len, int dim, int dim1, const double* A, double* Normal);
 
 index_t getMinInt(int dim, dim_t N, const index_t* values);
 
 index_t getMaxInt(int dim, dim_t N, const index_t* values);
 
-std::pair<index_t,index_t> getMinMaxInt(int dim, dim_t N, const index_t* values);
+/// calculates the minimum and maximum value from an integer array of length
+/// N x dim
+IndexPair getMinMaxInt(int dim, dim_t N, const index_t* values);
 
-std::pair<index_t,index_t> getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore);
+/// calculates the minimum and maximum value from an integer array of length N
+/// disregarding the value `ignore`
+IndexPair getFlaggedMinMaxInt(dim_t N, const index_t* values, index_t ignore);
 
+/// extracts the positive entries in `mask` returning a contiguous vector of
+/// those entries
 std::vector<index_t> packMask(const std::vector<short>& mask);
 
-void setValuesInUse(const int *values, const int numValues,
-                    std::vector<int>& valuesInUse, esysUtils::JMPI& mpiinfo);
+void setValuesInUse(const int* values, dim_t numValues,
+                    std::vector<int>& valuesInUse, escript::JMPI mpiInfo);
 
 } // namespace util
 } // namespace finley
diff --git a/dudley/src/CPPAdapter/dudleycpp.cpp b/finley/src/finleycpp.cpp
similarity index 57%
rename from dudley/src/CPPAdapter/dudleycpp.cpp
rename to finley/src/finleycpp.cpp
index 906042b..4d54602 100644
--- a/dudley/src/CPPAdapter/dudleycpp.cpp
+++ b/finley/src/finleycpp.cpp
@@ -14,60 +14,40 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <finley/Finley.h>
+#include <finley/DomainFactory.h>
+#include <finley/FinleyDomain.h>
 
-
-#ifdef ESYS_MPI
-#include "esysUtils/Esys_MPI.h"
-#endif
-#include "../Dudley.h"
-
-#include <pasowrap/SystemMatrixAdapter.h>
-#include <pasowrap/TransportProblemAdapter.h>
-
-#include "MeshAdapter.h"
-#include "MeshAdapterFactory.h"
-
-#include "DudleyAdapterException.h"
-// #include "esysUtils/EsysException.h"
-#include "esysUtils/esysExceptionTranslator.h"
-
-#include "escript/AbstractContinuousDomain.h"
+#include <escript/ExceptionTranslators.h>
 
 #include <boost/python.hpp>
-#include <boost/python/module.hpp>
 #include <boost/python/def.hpp>
+#include <boost/python/module.hpp>
 #include <boost/python/detail/defaults_gen.hpp>
 #include <boost/version.hpp>
 
 using namespace boost::python;
 
-BOOST_PYTHON_MODULE(dudleycpp)
+BOOST_PYTHON_MODULE(finleycpp)
 {
 // This feature was added in boost v1.34
 #if ((BOOST_VERSION/100)%1000 > 34) || (BOOST_VERSION/100000 >1)
-  // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
-  docstring_options docopt(true, true, false);
+    // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
+    docstring_options docopt(true, true, false);
 #endif
 
-  scope().attr("__doc__") = "To use this module, please import esys.dudley";  
-  
-  
-  //
-  // NOTE: The return_value_policy is necessary for functions that
-  // return pointers.
-  //
-  register_exception_translator<dudley::DudleyAdapterException>(&(esysUtils::RuntimeErrorTranslator));
+    scope().attr("__doc__") = "To use this module, please import esys.finley";
+
+    // register escript's default translators
+    REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS;
+    register_exception_translator<finley::FinleyException>(&escript::RuntimeErrorTranslator);
 
-  def("LoadMesh",dudley::loadMesh,
-      (arg("fileName")="file.nc"),":rtype: `Domain`"
-/*      ,return_value_policy<manage_new_object>());*/
-      );
+  def("LoadMesh", finley::FinleyDomain::load,
+      (arg("fileName") = "file.nc"), ":rtype: `FinleyDomain`");
 
-  def("ReadMesh",dudley::readMesh,
-      (arg("fileName")="file.fly",arg("integrationOrder")=-1,  arg("reducedIntegrationOrder")=-1,  arg("optimize")=true)
-/*      ,return_value_policy<manage_new_object>());*/
+
+  def("__ReadMesh_driver", finley::readMesh_driver,
+      (arg("params"))
 	,"Read a mesh from a file. For MPI parallel runs fan out the mesh to multiple processes.\n\n"
 ":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
 ":param integrationOrder: order of the quadrature scheme. If *integrationOrder<0* the integration order is selected independently.\n"
@@ -75,14 +55,8 @@ BOOST_PYTHON_MODULE(dudleycpp)
 ":param reducedIntegrationOrder: order of the quadrature scheme. If *reducedIntegrationOrder<0* the integration order is selected independently.\n"
 ":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``");
 
-  def("ReadGmsh",dudley::readGmsh,
-      (arg("fileName")="file.msh",
-       arg("numDim"), 
-       arg("integrationOrder")=-1, 
-       arg("reducedIntegrationOrder")=-1, 
-       arg("optimize")=true,  
-       arg("useMacroElements")=false)
-//       ,return_value_policy<manage_new_object>());
+  def("__ReadGmsh_driver", finley::readGmsh_driver,
+      (arg("params"))  
 ,"Read a gmsh mesh file\n\n"
 ":rtype: `Domain`\n:param fileName:\n:type fileName: ``string``\n"
 ":param integrationOrder: order of the quadrature scheme. If *integrationOrder<0* the integration order is selected independently.\n"
@@ -92,24 +66,12 @@ BOOST_PYTHON_MODULE(dudleycpp)
 ":param useMacroElements: Enable the usage of macro elements instead of second order elements.\n:type useMacroElements: ``bool``"
 );
 
-  def ("__Brick_driver",dudley::brick_driver,
-      arg("args"),
-/*      
-      (arg("n0")=1,arg("n1")=1,arg("n2")=1,
-      arg("order")=1,
-      arg("l0")=1.0,arg("l1")=1.0,arg("l2")=1.0,
-      arg("periodic0")=false,arg("periodic1")=false,arg("periodic2")=false,
-      arg("integrationOrder")=-1,  arg("reducedIntegrationOrder")=-1,
-      arg("useElementsOnFace")=false,
-      arg("useFullElementOrder")=false,
-      arg("optimize")=false)
-*/      
-"Creates a tetrahedral mesh by subdividing n0 x n1 x n2 rectangular elements over the brick [0,l0] x [0,l1] x [0,l2]."
-"We accept floating point values for n0, n1 only to ease transition of scripts to python3 when the time comes."
+  def ("__Brick_driver",finley::brick_driver,
+      (arg("params"))
 ,"Creates a rectangular mesh with n0 x n1 x n2 elements over the brick [0,l0] x [0,l1] x [0,l2]."
 "\n\n:param n0: number of elements in direction 0\n:type n0: ``int``\n:param n1: number of elements in direction 1\n:type n1: ``int``\n"
 ":param n2:number of elements in direction 2\n:type n2: ``int``\n"
-":param order: gives the order of shape function. Only for compatibility with finley, must be 1.\n"
+":param order: =1, =-1 or =2 gives the order of shape function. If -1 macro elements of order 1 are used.\n"
 ":param l0: length of side 0\n"
 ":type  l0: ``float``\n"
 ":param l1: length of side 1\n"
@@ -121,25 +83,15 @@ BOOST_PYTHON_MODULE(dudleycpp)
 ":param periodic2: whether or not boundary conditions are periodic in direction 2\n:type periodic2: ``bool``\n"
 ":param integrationOrder: order of the quadrature scheme. If integrationOrder<0 the integration order is selected independently.\n"
 ":param reducedIntegrationOrder: order of the quadrature scheme. If reducedIntegrationOrder<0 the integration order is selected independently.\n"
-":param useElementsOnFace:  Not used\n"
+":param useElementsOnFace:  whether or not to use elements on face\n"
 ":type useElementsOnFace: ``int``\n"
 ":param useFullElementOrder: Whether or not to use Hex27 elements\n"":type useFullElementOrder: ``bool``\n"
 ":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``"
 );
 
-  def ("__Rectangle_driver",dudley::rectangle_driver,
-      arg("args"),
-/*      
-      (arg("n0")=1,arg("n1")=1,arg("order")=1,
-      arg("l0")=1.0,arg("l1")=1.0,
-      arg("periodic0")=false,arg("periodic1")=false,
-      arg("integrationOrder")=-1,  arg("reducedIntegrationOrder")=-1,
-      arg("useElementsOnFace")=false,
-      arg("useFullElementOrder")=false,
-      arg("optimize")=false)
-*/      
-"Creates a triangular mesh by subdividing n0 x n1 rectangular elements over the brick [0,l0] x [0,l1]."
-"We accept floating point values for n0, n1 only to ease transition of scripts to python3 when the time comes."
+  def ("__Rectangle_driver",finley::rectangle_driver,
+      (arg("args")) 
+,"Creates a rectangular mesh with n0 x n1 elements over the brick [0,l0] x [0,l1]."
 "\n\n:param n0:\n:type n0:\n:param n1:\n:type n1:\n"
 ":param order: =1, =-1 or =2 gives the order of shape function. If -1 macro elements of order 1 are used.\n"
 ":param l0: length of side 0\n:param l1:\n"
@@ -149,29 +101,47 @@ BOOST_PYTHON_MODULE(dudleycpp)
 ":type useElementsOnFace: ``int``"
 ":param periodic0:  whether or not boundary conditions are periodic\n"
 ":param periodic1:\n"
-":param useFullElementOrder: Not used: ``bool``\n"
-":param useMacroElements: Enable the usage of first order macro elements.\n:type useMacroElements: ``bool``\n"
+":param useFullElementOrder: Whether or not to use Rec9 elements\n"":type useFullElementOrder: ``bool``\n"
 ":param optimize: Enable optimisation of node labels\n:type optimize: ``bool``"
 );
 
-  class_<dudley::MeshAdapter, bases<escript::AbstractContinuousDomain> >
-      ("MeshAdapter","A concrete class representing a domain. For more details, please consult the c++ documentation.",init<optional <Dudley_Mesh*> >())
-      .def(init<const dudley::MeshAdapter&>())
-      .def("write",&dudley::MeshAdapter::write,args("filename"),
+  def("Merge", finley::meshMerge, args("meshList")
+,"Merges a list of meshes into one mesh.\n\n:rtype: `Domain`"
+  );
+
+  def("GlueFaces", finley::glueFaces,
+      (arg("meshList"), arg("safetyFactor")=0.2,
+      arg("tolerance")=1.e-8,
+      arg("optimize")=true)
+,"Detects matching faces in the mesh, removes them from the mesh and joins the elements touched by the face elements."
+	);
+
+  def("JoinFaces", finley::joinFaces,
+      (arg("meshList"), arg("safetyFactor")=0.2,
+      arg("tolerance")=1.e-8,
+      arg("optimize")=true)
+,"Detects matching faces in the mesh and replaces them by joint elements."
+	);
+
+
+    class_<finley::FinleyDomain, bases<escript::AbstractContinuousDomain> >
+      ("FinleyDomain","A concrete class representing a domain. For more details, please consult the C++ documentation.", no_init)
+      .def(init<const finley::FinleyDomain&>())
+      .def("write", &finley::FinleyDomain::write, args("filename"),
 "Write the current mesh to a file with the given name.")
-      .def("print_mesh_info",&dudley::MeshAdapter::Print_Mesh_Info,(arg("full")=false),
+      .def("print_mesh_info", &finley::FinleyDomain::Print_Mesh_Info, (arg("full")=false),
 ":param full:\n:type full: ``bool``")
-      .def("dump",&dudley::MeshAdapter::dump,args("fileName")
+      .def("dump", &finley::FinleyDomain::dump, args("fileName")
 ,"dumps the mesh to a file with the given name.")
-      .def("getDescription",&dudley::MeshAdapter::getDescription,
+      .def("getDescription", &finley::FinleyDomain::getDescription,
 ":return: a description for this domain\n:rtype: ``string``")
-      .def("getDim",&dudley::MeshAdapter::getDim,":rtype: ``int``")
-      .def("getDataShape",&dudley::MeshAdapter::getDataShape, args("functionSpaceCode"),
+      .def("getDim", &finley::FinleyDomain::getDim,":rtype: ``int``")
+      .def("getDataShape", &finley::FinleyDomain::getDataShape, args("functionSpaceCode"),
 ":return: a pair (dps, ns) where dps=the number of data points per sample, and ns=the number of samples\n:rtype: ``tuple``")
-      .def("getNumDataPointsGlobal",&dudley::MeshAdapter::getNumDataPointsGlobal,
+      .def("getNumDataPointsGlobal", &finley::FinleyDomain::getNumDataPointsGlobal,
 ":return: the number of data points summed across all MPI processes\n"
 ":rtype: ``int``")
-      .def("addPDEToSystem",&dudley::MeshAdapter::addPDEToSystem,
+      .def("addPDEToSystem", &finley::FinleyDomain::addPDEToSystem,
 args("mat", "rhs","A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
 "adds a PDE onto the stiffness matrix mat and a rhs\n\n"
 ":param mat:\n:type mat: `OperatorAdapter`\n:param rhs:\n:type rhs: `Data`\n"
@@ -185,7 +155,7 @@ args("mat", "rhs","A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contac
 ":param d_contact:\n:type d_contact: `Data`\n"
 ":param y_contact:\n:type y_contact: `Data`\n"
 )
-      .def("addPDEToLumpedSystem",&dudley::MeshAdapter::addPDEToLumpedSystem,
+      .def("addPDEToLumpedSystem", &finley::FinleyDomain::addPDEToLumpedSystem,
 args("mat", "D", "d"),
 "adds a PDE onto the lumped stiffness matrix\n\n"
 ":param mat:\n:type mat: `Data`\n"
@@ -193,7 +163,7 @@ args("mat", "D", "d"),
 ":param d:\n:type d: `Data`\n"
 ":param useHRZ:\n:type useHRZ: bool\n"
 )
-      .def("addPDEToRHS",&dudley::MeshAdapter::addPDEToRHS, 
+      .def("addPDEToRHS", &finley::FinleyDomain::addPDEToRHS, 
 args("rhs", "X", "Y", "y", "y_contact"),
 "adds a PDE onto the stiffness matrix mat and a rhs\n\n"
 ":param rhs:\n:type rhs: `Data`\n"
@@ -202,9 +172,9 @@ args("rhs", "X", "Y", "y", "y_contact"),
 ":param y:\n:type y: `Data`\n"
 ":param y_contact:\n:type y_contact: `Data`"
 )
-      .def("addPDEToTransportProblem",&dudley::MeshAdapter::addPDEToTransportProblem,
+      .def("addPDEToTransportProblem", &finley::FinleyDomain::addPDEToTransportProblem,
 args( "tp", "source", "M", "A", "B", "C", "D", "X", "Y", "d", "y", "d_contact", "y_contact"),
-":param tp:\n:type tp: `TransportProblemAdapter`\n"
+":param tp:\n:type tp: `AbstractTransportProblem`\n"
 ":param source:\n:type source: `Data`\n"
 ":param M:\n:type M: `Data`\n"
 ":param A:\n:type A: `Data`\n"
@@ -218,30 +188,30 @@ args( "tp", "source", "M", "A", "B", "C", "D", "X", "Y", "d", "y", "d_contact",
 ":param d_contact:\n:type d_contact: `Data`\n"
 ":param y_contact:\n:type y_contact: `Data`\n"
 )
-      .def("newOperator",&dudley::MeshAdapter::newSystemMatrix,
+      .def("newOperator", &finley::FinleyDomain::newSystemMatrix,
 args("row_blocksize", "row_functionspace", "column_blocksize", "column_functionspace", "type"),
-"creates a SystemMatrixAdapter stiffness matrix and initializes it with zeros\n\n"
+"creates a stiffness matrix and initializes it with zeros\n\n"
 ":param row_blocksize:\n:type row_blocksize: ``int``\n"
 ":param row_functionspace:\n:type row_functionspace: `FunctionSpace`\n"
 ":param column_blocksize:\n:type column_blocksize: ``int``\n"
 ":param column_functionspace:\n:type column_functionspace: `FunctionSpace`\n"
 ":param type:\n:type type: ``int``\n"
 )
-      .def("newTransportProblem",&dudley::MeshAdapter::newTransportProblem,
+      .def("newTransportProblem", &finley::FinleyDomain::newTransportProblem,
 args("theta", "blocksize", "functionspace", "type"),
-"creates a TransportProblemAdapter\n\n"
+"creates a TransportProblem\n\n"
 ":param theta:\n:type theta: ``float``\n"
 ":param blocksize:\n:type blocksize: ``int``\n"
 ":param functionspace:\n:type functionspace: `FunctionSpace`\n"
 ":param type:\n:type type: ``int``\n"
 )
-      .def("getSystemMatrixTypeId",&dudley::MeshAdapter::getSystemMatrixTypeId,
+      .def("getSystemMatrixTypeId", &finley::FinleyDomain::getSystemMatrixTypeId,
 args("options"),
 ":return: the identifier of the matrix type to be used for the global stiffness matrix when particular solver options are used.\n"
 ":rtype: ``int``\n"
 ":param options:\n:type options: `SolverBuddy`\n"
 )
-      .def("getTransportTypeId",&dudley::MeshAdapter::getTransportTypeId,
+      .def("getTransportTypeId", &finley::FinleyDomain::getTransportTypeId,
 args("solver", "preconditioner", "package", "symmetry"),
 ":return: the identifier of the transport problem type to be used when a particular solver, preconditioner, package and symmetric matrix is used.\n"
 ":rtype: ``int``\n"
@@ -250,26 +220,27 @@ args("solver", "preconditioner", "package", "symmetry"),
 ":param package:\n:type package: ``int``\n"
 ":param symmetry:\n:type symmetry: ``int``\n"
 )
-      .def("setX",&dudley::MeshAdapter::setNewX,
+      .def("setX", &finley::FinleyDomain::setNewX,
 args("arg"), "assigns new location to the domain\n\n:param arg:\n:type arg: `Data`")
-      .def("getX",&dudley::MeshAdapter::getX, ":return: locations in the FEM nodes\n\n"
+      .def("getX", &finley::FinleyDomain::getX, ":return: locations in the FEM nodes\n\n"
 ":rtype: `Data`")
-      .def("getNormal",&dudley::MeshAdapter::getNormal,
+      .def("getNormal", &finley::FinleyDomain::getNormal,
 ":return: boundary normals at the quadrature point on the face elements\n"
 ":rtype: `Data`")
-      .def("getSize",&dudley::MeshAdapter::getSize,":return: the element size\n"
+      .def("getSize", &finley::FinleyDomain::getSize,":return: the element size\n"
 ":rtype: `Data`")
-      .def("setTagMap",&dudley::MeshAdapter::setTagMap,args("name","tag"),
+      .def("setTagMap", &finley::FinleyDomain::setTagMap,args("name","tag"),
 "Give a tag number a name.\n\n:param name: Name for the tag\n:type name: ``string``\n"
 ":param tag: numeric id\n:type tag: ``int``\n:note: Tag names must be unique within a domain")
-      .def("getTag",&dudley::MeshAdapter::getTag,args("name"),":return: tag id for "
+      .def("getTag", &finley::FinleyDomain::getTag,args("name"),":return: tag id for "
 "``name``\n:rtype: ``string``")
-      .def("isValidTagName",&dudley::MeshAdapter::isValidTagName,args("name"),
+      .def("isValidTagName", &finley::FinleyDomain::isValidTagName,args("name"),
 ":return: True is ``name`` corresponds to a tag\n:rtype: ``bool``")
-      .def("showTagNames",&dudley::MeshAdapter::showTagNames,":return: A space separated list of tag names\n:rtype: ``string``")
-      .def("getMPISize",&dudley::MeshAdapter::getMPISize,":return: the number of processes used for this `Domain`\n:rtype: ``int``")
-      .def("getMPIRank",&dudley::MeshAdapter::getMPIRank,":return: the rank of this process\n:rtype: ``int``")
-      .def("MPIBarrier",&dudley::MeshAdapter::MPIBarrier,"Wait until all processes have reached this point")
-      .def("onMasterProcessor",&dudley::MeshAdapter::onMasterProcessor,":return: True if this code is executing on the master process\n:rtype: `bool`");
-
+      .def("showTagNames", &finley::FinleyDomain::showTagNames,":return: A space separated list of tag names\n:rtype: ``string``")
+      .def("getMPISize", &finley::FinleyDomain::getMPISize,":return: the number of processes used for this `Domain`\n:rtype: ``int``")
+      .def("getMPIRank", &finley::FinleyDomain::getMPIRank,":return: the rank of this process\n:rtype: ``int``")
+      .def("MPIBarrier", &finley::FinleyDomain::MPIBarrier,"Wait until all processes have reached this point")
+      .def("onMasterProcessor", &finley::FinleyDomain::onMasterProcessor,":return: True if this code is executing on the master process\n:rtype: `bool`")
+ ;
 }
+
diff --git a/finley/test/MeshAdapterTestCase.cpp b/finley/test/FinleyDomainTestCase.cpp
similarity index 50%
rename from finley/test/MeshAdapterTestCase.cpp
rename to finley/test/FinleyDomainTestCase.cpp
index 4af21a1..34a7065 100644
--- a/finley/test/MeshAdapterTestCase.cpp
+++ b/finley/test/FinleyDomainTestCase.cpp
@@ -14,16 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
+#include <escript/AbstractContinuousDomain.h>
 
+#include "FinleyDomainTestCase.h"
 
-#include "MeshAdapterTestCase.h"
-
-#include "finley/CppAdapter/MeshAdapter.h"
-#include "finley/CppAdapter/MeshAdapterFactory.h"
-
-#include "escript/AbstractContinuousDomain.h"
+#include <finley/DomainFactory.h>
 
 #include <cppunit/TestCaller.h>
 #include <boost/scoped_ptr.hpp>
@@ -32,20 +27,19 @@ using namespace escript;
 using namespace finley;
 using namespace CppUnit;
 
-void MeshAdapterTestCase::testAll()
+void FinleyDomainTestCase::testAll()
 {
-    // test construction of a mesh using the brick factory method
-    // boost::scoped_ptr<AbstractContinuousDomain> myMesh(brick());
-	esysUtils::JMPI info=esysUtils::makeInfo(MPI_COMM_WORLD);
-	brick(info);	// brick now returns a Domain_ptr which will auto delete
+    JMPI info = makeInfo(MPI_COMM_WORLD);
+	Domain_ptr dom(brick(info));
+    CPPUNIT_ASSERT(dom->getDim() == 3);
 }
 
-TestSuite* MeshAdapterTestCase::suite()
+TestSuite* FinleyDomainTestCase::suite()
 {
-    TestSuite *testSuite = new TestSuite("MeshAdapterTestCase");
+    TestSuite *testSuite = new TestSuite("FinleyDomainTestCase");
 
-    testSuite->addTest(new TestCaller<MeshAdapterTestCase>(
-                "testAll",&MeshAdapterTestCase::testAll));
+    testSuite->addTest(new TestCaller<FinleyDomainTestCase>(
+                "testAll",&FinleyDomainTestCase::testAll));
     return testSuite;
 }
 
diff --git a/finley/test/MeshAdapterTestCase.h b/finley/test/FinleyDomainTestCase.h
similarity index 81%
rename from finley/test/MeshAdapterTestCase.h
rename to finley/test/FinleyDomainTestCase.h
index 7f0d18f..31148f9 100644
--- a/finley/test/MeshAdapterTestCase.h
+++ b/finley/test/FinleyDomainTestCase.h
@@ -15,13 +15,13 @@
 *****************************************************************************/
 
 
-#if !defined  MeshAdapterTestCase_20040705_H
-#define  MeshAdapterTestCase_20040705_H
+#ifndef __FINLEYDOMAIN_TESTCASE_H__
+#define __FINLEYDOMAIN_TESTCASE_H__
 
 #include <cppunit/TestFixture.h>
 #include <cppunit/TestSuite.h>
 
-class MeshAdapterTestCase : public CppUnit::TestFixture
+class FinleyDomainTestCase : public CppUnit::TestFixture
 {
 public:
     void testAll();
@@ -29,5 +29,5 @@ public:
     static CppUnit::TestSuite* suite();
 };
 
-#endif
+#endif // __FINLEYDOMAIN_TESTCASE_H__
 
diff --git a/finley/test/SConscript b/finley/test/SConscript
index fd91ade..a803574 100644
--- a/finley/test/SConscript
+++ b/finley/test/SConscript
@@ -14,17 +14,16 @@
 #
 ##############################################################################
 
-
 Import('*')
 local_env = env.Clone()
 
 if local_env['cppunit']:
     # get the test source file names
-    sources = Glob('*.cpp')+Glob('*.c')
+    sources = Glob('*.cpp')
     testname='finley_UnitTest'
 
     # build the executable
-    local_env.Prepend(LIBS=['finley', 'escript', 'paso', 'esysUtils']+env['cppunit_libs'])
+    local_env.PrependUnique(LIBS=env['finley_libs']+env['cppunit_libs'])
     program = local_env.Program(testname, sources)
 
     # run the tests - but only if test_targets are stale
@@ -35,10 +34,10 @@ if local_env['cppunit']:
     Alias("run_tests", testname+'.passed')
 
     # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/finley/test", ('./'+testname,))
+    from grouptest import GroupTest
+    tgroup=GroupTest("finleycpp", "$BINRUNNER ", (), "", "$BUILD_DIR/finley/test", ('./'+testname,))
     TestGroups.append(tgroup)
 
 # configure python unit tests
-local_env.SConscript(dirs = ['#/finley/test/python'], variant_dir='python', duplicate=0, exports=['py_wrapper_lib'])
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/finley/test/finley_UnitTests.cpp b/finley/test/finley_UnitTests.cpp
index 4ea53eb..6b247e1 100644
--- a/finley/test/finley_UnitTests.cpp
+++ b/finley/test/finley_UnitTests.cpp
@@ -14,8 +14,10 @@
 *
 *****************************************************************************/
 
+#include <escript/EsysMPI.h>
+
+#include "FinleyDomainTestCase.h"
 
-#include "MeshAdapterTestCase.h"
 #include <cppunit/CompilerOutputter.h>
 #include <cppunit/TestResult.h>
 #include <cppunit/TestResultCollector.h>
@@ -24,8 +26,6 @@
 
 using namespace CppUnit;
 
-#include "esysUtils/Esys_MPI.h"
-
 int main(int argc, char* argv[])
 {
 #ifdef ESYS_MPI
@@ -39,9 +39,9 @@ int main(int argc, char* argv[])
     TestResultCollector result;
     controller.addListener(&result);
     TestRunner runner;
-    runner.addTest(MeshAdapterTestCase::suite());
+    runner.addTest(FinleyDomainTestCase::suite());
     runner.run(controller);
-    CompilerOutputter outputter( &result, std::cerr );
+    CompilerOutputter outputter(&result, std::cerr);
     outputter.write();
 #ifdef ESYS_MPI
     MPI_Finalize();
diff --git a/finley/test/python/SConscript b/finley/test/python/SConscript
index 9fa84f4..a24f2d1 100644
--- a/finley/test/python/SConscript
+++ b/finley/test/python/SConscript
@@ -20,38 +20,12 @@ Import('*')
 local_env = env.Clone()
 
 # files defining test runs (passing in a release)
-testruns = []
-testruns += ['run_escriptOnFinley.py']
-testruns += ['run_generators.py']
-testruns += ['run_inputOutput.py']
-testruns += ['run_linearPDEsOnFinley1_2D1.py']
-testruns += ['run_linearPDEsOnFinley1_2D2.py']
-testruns += ['run_linearPDEsOnFinley1_3D1.py']
-#testruns += ['run_linearPDEsOnFinley1_3D2.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part1.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part2.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part3-1.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part3-2.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part3-3.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part3-4.py']
-testruns += ['run_linearPDEsOnFinley1_3D2_part4.py']
-testruns += ['run_linearPDEsOnFinley2.py']
-testruns += ['run_linearPDEsOnFinley3.py']
-testruns += ['run_linearPDEsOnFinleyMacro.py']
-testruns += ['run_models.py']
-testruns += ['run_darcy.py']
-testruns += ['run_simplesolve.py']
-testruns += ['run_utilOnFinley.py']
-# testruns += ['run_visualization_interface.py']
-testruns += ['run_amg.py'] 
-testruns += ['run_nlpde2dOnFinley.py']
-testruns += ['run_nlpde3dOnFinley.py']
-testruns += ['run_splitworldOnFinley.py']
+testruns = Glob('run_*.py', strings=True)
 
 #  files defining a few tests for a quick test
 scalable_tests = []
 scalable_tests += ['run_inputOutput.py']
-scalable_tests += ['run_simplesolve.py']
+scalable_tests += ['run_pasoSolversOnFinley.py']
 
 # files defining tests run locally (not as part of a release)
 localtestruns = [x for x in Glob('*.py', strings=True) if not x.startswith('run_')]
@@ -68,24 +42,23 @@ env.Alias('build_py_tests', test_pyc)
 local_env.PrependENVPath('PYTHONPATH', Dir('.').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/finley/test/python').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/escriptcore/test/python').abspath)
-local_env['ENV']['FINLEY_TEST_DATA']=Dir('.').srcnode().abspath
-local_env['ENV']['FINLEY_WORKDIR']=Dir('.').abspath
+local_env['ENV']['FINLEY_TEST_DATA'] = Dir('.').srcnode().abspath
+local_env['ENV']['FINLEY_WORKDIR'] = Dir('.').abspath
 # needed for a test from the util base class in escript
-local_env['ENV']['ESCRIPT_WORKDIR']=Dir('.').abspath
+local_env['ENV']['ESCRIPT_WORKDIR'] = Dir('.').abspath
 env.Alias('local_py_tests',[splitext(x)[0]+'.passed' for x in alltestruns])
 env.Alias('py_tests', [splitext(x)[0]+'.passed' for x in testruns ])
 env.Alias('scalable_tests', [splitext(x)[0]+'.passed' for x in scalable_tests])
 
 # run all tests
 program = local_env.RunPyUnitTest(alltestruns)
-Depends(program, [py_wrapper_lib, 'install_finley_py'])
-Depends(program, 'build_py_tests')
+Requires(program, ['install_escript', 'build_py_tests'])
 if env['usempi']:
-    Depends(program, env['prefix']+"/lib/pythonMPI")
+    Requires(program, ['install_pythonMPI'])
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("FINLEY_TEST_DATA","$BATCH_ROOT/finley/test/python"),('FINLEY_WORKDIR','$BUILD_DIR/finley/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/finley/test/python","$BATCH_ROOT/finley/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("finley", "$PYTHONRUNNER ", (("FINLEY_TEST_DATA","$BATCH_ROOT/finley/test/python"),('FINLEY_WORKDIR','$BUILD_DIR/finley/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/finley/test/python", "$BATCH_ROOT/finley/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/finley/test/python")
 TestGroups.append(tgroup)
 
diff --git a/finley/test/python/data_meshes/brick_4x4x4.fly b/finley/test/python/data_meshes/brick_4x4x4.fly
index 295165c..5259c03 100644
--- a/finley/test/python/data_meshes/brick_4x4x4.fly
+++ b/finley/test/python/data_meshes/brick_4x4x4.fly
@@ -296,3 +296,4 @@ front 10
 left 1
 right 2
 top 200
+
diff --git a/finley/test/python/data_meshes/brick_8x10x12.fly b/finley/test/python/data_meshes/brick_8x10x12.fly
index b76ece8..d6082d4 100644
--- a/finley/test/python/data_meshes/brick_8x10x12.fly
+++ b/finley/test/python/data_meshes/brick_8x10x12.fly
@@ -2850,3 +2850,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_2D_macro.msh b/finley/test/python/data_meshes/hex_2D_macro.msh
index 0d1f53e..af05fc0 100644
--- a/finley/test/python/data_meshes/hex_2D_macro.msh
+++ b/finley/test/python/data_meshes/hex_2D_macro.msh
@@ -23,3 +23,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order1.msh b/finley/test/python/data_meshes/hex_2D_order1.msh
index 009fcb9..e9c92cd 100644
--- a/finley/test/python/data_meshes/hex_2D_order1.msh
+++ b/finley/test/python/data_meshes/hex_2D_order1.msh
@@ -18,3 +18,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order1_macro.msh b/finley/test/python/data_meshes/hex_2D_order1_macro.msh
index 1af91b6..3f4f5d6 100644
--- a/finley/test/python/data_meshes/hex_2D_order1_macro.msh
+++ b/finley/test/python/data_meshes/hex_2D_order1_macro.msh
@@ -23,3 +23,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order1_onFace.msh b/finley/test/python/data_meshes/hex_2D_order1_onFace.msh
index d837d0d..3591f95 100644
--- a/finley/test/python/data_meshes/hex_2D_order1_onFace.msh
+++ b/finley/test/python/data_meshes/hex_2D_order1_onFace.msh
@@ -18,3 +18,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order2.msh b/finley/test/python/data_meshes/hex_2D_order2.msh
index f90e615..03d91f7 100644
--- a/finley/test/python/data_meshes/hex_2D_order2.msh
+++ b/finley/test/python/data_meshes/hex_2D_order2.msh
@@ -22,3 +22,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order2_onFace.msh b/finley/test/python/data_meshes/hex_2D_order2_onFace.msh
index e953c79..aa9a872 100644
--- a/finley/test/python/data_meshes/hex_2D_order2_onFace.msh
+++ b/finley/test/python/data_meshes/hex_2D_order2_onFace.msh
@@ -22,3 +22,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_2D_order2p.msh b/finley/test/python/data_meshes/hex_2D_order2p.msh
index 3ec746d..894dc6f 100644
--- a/finley/test/python/data_meshes/hex_2D_order2p.msh
+++ b/finley/test/python/data_meshes/hex_2D_order2p.msh
@@ -23,3 +23,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/hex_3D_macro.msh b/finley/test/python/data_meshes/hex_3D_macro.msh
index 2015e8f..e6b2120 100644
--- a/finley/test/python/data_meshes/hex_3D_macro.msh
+++ b/finley/test/python/data_meshes/hex_3D_macro.msh
@@ -45,3 +45,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order1.msh b/finley/test/python/data_meshes/hex_3D_order1.msh
index cd66eef..91f12fe 100644
--- a/finley/test/python/data_meshes/hex_3D_order1.msh
+++ b/finley/test/python/data_meshes/hex_3D_order1.msh
@@ -26,3 +26,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order1_macro.msh b/finley/test/python/data_meshes/hex_3D_order1_macro.msh
index 31f6d30..968c445 100644
--- a/finley/test/python/data_meshes/hex_3D_order1_macro.msh
+++ b/finley/test/python/data_meshes/hex_3D_order1_macro.msh
@@ -45,3 +45,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order1_onFace.msh b/finley/test/python/data_meshes/hex_3D_order1_onFace.msh
index 1bbf771..e02875f 100644
--- a/finley/test/python/data_meshes/hex_3D_order1_onFace.msh
+++ b/finley/test/python/data_meshes/hex_3D_order1_onFace.msh
@@ -26,3 +26,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order2.msh b/finley/test/python/data_meshes/hex_3D_order2.msh
index c134f55..f875a87 100644
--- a/finley/test/python/data_meshes/hex_3D_order2.msh
+++ b/finley/test/python/data_meshes/hex_3D_order2.msh
@@ -38,3 +38,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order2_onFace.msh b/finley/test/python/data_meshes/hex_3D_order2_onFace.msh
index e0b6dd7..f241a5b 100644
--- a/finley/test/python/data_meshes/hex_3D_order2_onFace.msh
+++ b/finley/test/python/data_meshes/hex_3D_order2_onFace.msh
@@ -38,3 +38,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_3D_order2p.msh b/finley/test/python/data_meshes/hex_3D_order2p.msh
index 468bb98..15a5c9c 100644
--- a/finley/test/python/data_meshes/hex_3D_order2p.msh
+++ b/finley/test/python/data_meshes/hex_3D_order2p.msh
@@ -45,3 +45,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/finley/test/python/data_meshes/hex_contact_2D_order1.msh b/finley/test/python/data_meshes/hex_contact_2D_order1.msh
index 0521486..846bf99 100644
--- a/finley/test/python/data_meshes/hex_contact_2D_order1.msh
+++ b/finley/test/python/data_meshes/hex_contact_2D_order1.msh
@@ -21,3 +21,4 @@ Line2 6
 Line2_Contact 1
 4 10 3 2 5 4
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_2D_order1_onFace.msh b/finley/test/python/data_meshes/hex_contact_2D_order1_onFace.msh
index dddb25a..e959c05 100644
--- a/finley/test/python/data_meshes/hex_contact_2D_order1_onFace.msh
+++ b/finley/test/python/data_meshes/hex_contact_2D_order1_onFace.msh
@@ -21,3 +21,4 @@ Rec4Face 6
 Rec4Face_Contact 1
 4 10 3 2 0 1 5 4 6 7
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_2D_order2.msh b/finley/test/python/data_meshes/hex_contact_2D_order2.msh
index 827035b..aabb44f 100644
--- a/finley/test/python/data_meshes/hex_contact_2D_order2.msh
+++ b/finley/test/python/data_meshes/hex_contact_2D_order2.msh
@@ -29,3 +29,4 @@ Line3 6
 Line3_Contact 1
 4 10 8 6 7 11 9 10
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_2D_order2_onFace.msh b/finley/test/python/data_meshes/hex_contact_2D_order2_onFace.msh
index 6f39b9d..517c3e0 100644
--- a/finley/test/python/data_meshes/hex_contact_2D_order2_onFace.msh
+++ b/finley/test/python/data_meshes/hex_contact_2D_order2_onFace.msh
@@ -29,3 +29,4 @@ Rec8Face 6
 Rec8Face_Contact 1
 4 10 8 6 0 2 7 3 1 5 11 9 15 17 10 12 16 14
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_3D_order1.msh b/finley/test/python/data_meshes/hex_contact_3D_order1.msh
index 505f352..a437b56 100644
--- a/finley/test/python/data_meshes/hex_contact_3D_order1.msh
+++ b/finley/test/python/data_meshes/hex_contact_3D_order1.msh
@@ -33,3 +33,4 @@ Rec4 10
 Rec4_Contact 1
 2 100 4 5 7 6 8 9 11 10
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_3D_order1_onFace.msh b/finley/test/python/data_meshes/hex_contact_3D_order1_onFace.msh
index d6947c8..fbb3661 100644
--- a/finley/test/python/data_meshes/hex_contact_3D_order1_onFace.msh
+++ b/finley/test/python/data_meshes/hex_contact_3D_order1_onFace.msh
@@ -33,3 +33,4 @@ Hex8Face 10
 Hex8Face_Contact 1
 2 100 4 5 7 6 0 1 3 2 8 9 11 10 12 13 15 14
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_3D_order2.msh b/finley/test/python/data_meshes/hex_contact_3D_order2.msh
index a6b371b..f4666a7 100644
--- a/finley/test/python/data_meshes/hex_contact_3D_order2.msh
+++ b/finley/test/python/data_meshes/hex_contact_3D_order2.msh
@@ -57,3 +57,4 @@ Rec8 10
 Rec8_Contact 1
 2 100 18 20 26 24 19 23 25 21 27 29 35 33 28 32 34 30
 Point1 0
+
diff --git a/finley/test/python/data_meshes/hex_contact_3D_order2_onFace.msh b/finley/test/python/data_meshes/hex_contact_3D_order2_onFace.msh
index f259980..5e2c2c2 100644
--- a/finley/test/python/data_meshes/hex_contact_3D_order2_onFace.msh
+++ b/finley/test/python/data_meshes/hex_contact_3D_order2_onFace.msh
@@ -57,3 +57,4 @@ Hex20Face 10
 Hex20Face_Contact 1
 2 100 18 20 26 24 0 2 8 6 19 23 25 21 9 11 17 15 1 5 7 3 27 29 35 33 45 47 53 51 28 32 34 30 36 38 44 42 46 50 52 48
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_2Do1_Contact.fly b/finley/test/python/data_meshes/mesh_2Do1_Contact.fly
index a6b6642..24545ce 100644
--- a/finley/test/python/data_meshes/mesh_2Do1_Contact.fly
+++ b/finley/test/python/data_meshes/mesh_2Do1_Contact.fly
@@ -198,3 +198,4 @@ Line2_Contact 8
 46 1 34 39 75 80
 47 1 39 44 80 85
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_2Do1_Contact_withElementsOnFace.fly b/finley/test/python/data_meshes/mesh_2Do1_Contact_withElementsOnFace.fly
index f8dbff1..4abb0f3 100644
--- a/finley/test/python/data_meshes/mesh_2Do1_Contact_withElementsOnFace.fly
+++ b/finley/test/python/data_meshes/mesh_2Do1_Contact_withElementsOnFace.fly
@@ -198,3 +198,4 @@ Rec4Face_Contact 8
 46 1 34 39 38 33 75 80 81 76
 47 1 39 44 43 38 80 85 86 81
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_2Do2_Contact.fly b/finley/test/python/data_meshes/mesh_2Do2_Contact.fly
index 23e637f..5a35342 100644
--- a/finley/test/python/data_meshes/mesh_2Do2_Contact.fly
+++ b/finley/test/python/data_meshes/mesh_2Do2_Contact.fly
@@ -350,3 +350,4 @@ Line3_Contact 8
 46 1 116 134 125 261 279 270
 47 1 134 152 143 279 297 288
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_2Do2_Contact_withElementsOnFace.fly b/finley/test/python/data_meshes/mesh_2Do2_Contact_withElementsOnFace.fly
index 8c61bba..52cfec8 100644
--- a/finley/test/python/data_meshes/mesh_2Do2_Contact_withElementsOnFace.fly
+++ b/finley/test/python/data_meshes/mesh_2Do2_Contact_withElementsOnFace.fly
@@ -350,3 +350,4 @@ Rec8Face_Contact 8
 46 1 116 134 132 114 125 133 123 115 261 279 281 263 270 280 272 262
 47 1 134 152 150 132 143 151 141 133 279 297 299 281 288 298 290 280
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_3Do1_Contact.fly b/finley/test/python/data_meshes/mesh_3Do1_Contact.fly
index 4e20875..a9fb7fe 100644
--- a/finley/test/python/data_meshes/mesh_3Do1_Contact.fly
+++ b/finley/test/python/data_meshes/mesh_3Do1_Contact.fly
@@ -1774,3 +1774,4 @@ Rec4_Contact 64
 446 1 349 354 399 394 750 755 800 795
 447 1 354 359 404 399 755 760 805 800
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_3Do1_Contact_withElementsOnFace.fly b/finley/test/python/data_meshes/mesh_3Do1_Contact_withElementsOnFace.fly
index 2bfae5b..9650eef 100644
--- a/finley/test/python/data_meshes/mesh_3Do1_Contact_withElementsOnFace.fly
+++ b/finley/test/python/data_meshes/mesh_3Do1_Contact_withElementsOnFace.fly
@@ -1774,3 +1774,4 @@ Hex8Face_Contact 64
 446 1 349 354 399 394 348 353 398 393 750 755 800 795 751 756 801 796
 447 1 354 359 404 399 353 358 403 398 755 760 805 800 756 761 806 801
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_3Do2_Contact.fly b/finley/test/python/data_meshes/mesh_3Do2_Contact.fly
index ecdf055..7eac77b 100644
--- a/finley/test/python/data_meshes/mesh_3Do2_Contact.fly
+++ b/finley/test/python/data_meshes/mesh_3Do2_Contact.fly
@@ -3862,3 +3862,4 @@ Rec8_Contact 64
 446 1 2258 2276 2582 2564 2267 2429 2573 2411 4851 4869 5175 5157 4860 5022 5166 5004
 447 1 2276 2294 2600 2582 2285 2447 2591 2429 4869 4887 5193 5175 4878 5040 5184 5022
 Point1 0
+
diff --git a/finley/test/python/data_meshes/mesh_3Do2_Contact_withElementsOnFace.fly b/finley/test/python/data_meshes/mesh_3Do2_Contact_withElementsOnFace.fly
index 87ec27c..25dc99f 100644
--- a/finley/test/python/data_meshes/mesh_3Do2_Contact_withElementsOnFace.fly
+++ b/finley/test/python/data_meshes/mesh_3Do2_Contact_withElementsOnFace.fly
@@ -3862,3 +3862,4 @@ Hex20Face_Contact 64
 446 1 2258 2276 2582 2564 2256 2274 2580 2562 2267 2429 2573 2411 2257 2275 2581 2563 2265 2427 2571 2409 4851 4869 5175 5157 4853 4871 5177 5159 4860 5022 5166 5004 4852 4870 5176 5158 4862 5024 5168 5006
 447 1 2276 2294 2600 2582 2274 2292 2598 2580 2285 2447 2591 2429 2275 2293 2599 2581 2283 2445 2589 2427 4869 4887 5193 5175 4871 4889 5195 5177 4878 5040 5184 5022 4870 4888 5194 5176 4880 5042 5186 5024
 Point1 0
+
diff --git a/finley/test/python/data_meshes/rect_4x4.fly b/finley/test/python/data_meshes/rect_4x4.fly
index 37d5dc1..ce14f3d 100644
--- a/finley/test/python/data_meshes/rect_4x4.fly
+++ b/finley/test/python/data_meshes/rect_4x4.fly
@@ -66,3 +66,4 @@ bottom 10
 left 1
 right 2
 top 20
+
diff --git a/finley/test/python/data_meshes/rect_test.msh b/finley/test/python/data_meshes/rect_test.msh
index 289a0b9..3bf5238 100644
--- a/finley/test/python/data_meshes/rect_test.msh
+++ b/finley/test/python/data_meshes/rect_test.msh
@@ -89,3 +89,4 @@ $Elements
 67 4 2 0 26 14 12 3 10
 68 4 2 0 26 9 4 14 12
 $EndElements
+
diff --git a/finley/test/python/data_meshes/rectangle_8x10.fly b/finley/test/python/data_meshes/rectangle_8x10.fly
index 3aabbc8..54fff8b 100644
--- a/finley/test/python/data_meshes/rectangle_8x10.fly
+++ b/finley/test/python/data_meshes/rectangle_8x10.fly
@@ -224,3 +224,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/finley/test/python/data_meshes/tagtest.msh b/finley/test/python/data_meshes/tagtest.msh
index 170cae8..893057e 100644
--- a/finley/test/python/data_meshes/tagtest.msh
+++ b/finley/test/python/data_meshes/tagtest.msh
@@ -20,3 +20,4 @@ $PhysicalNames
 2 2 "tag2"
 2 3 "tag3"
 $EndPhysicalNames
+
diff --git a/finley/test/python/data_meshes/test_Add.msh b/finley/test/python/data_meshes/test_Add.msh
index 982ad67..7806751 100644
--- a/finley/test/python/data_meshes/test_Add.msh
+++ b/finley/test/python/data_meshes/test_Add.msh
@@ -7242,3 +7242,4 @@ $Elements
 5803 4 2 1 52 763 7 115 930
 5804 4 2 1 52 102 6 764 599
 $EndElements
+
diff --git a/finley/test/python/data_meshes/tet10.fly b/finley/test/python/data_meshes/tet10.fly
index fcb52cd..fbf2669 100644
--- a/finley/test/python/data_meshes/tet10.fly
+++ b/finley/test/python/data_meshes/tet10.fly
@@ -63,3 +63,4 @@ Tri6 12
 15 24 7 6 8 32 24 25
 Tri6_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet10_gmsh.msh b/finley/test/python/data_meshes/tet10_gmsh.msh
index bb5ce72..d71afbd 100644
--- a/finley/test/python/data_meshes/tet10_gmsh.msh
+++ b/finley/test/python/data_meshes/tet10_gmsh.msh
@@ -75,3 +75,4 @@ $ELM
 35 11 34 34 10 18 2 4 6 43 20 41 39 36 27
 36 11 34 34 10 4 18 6 8 41 39 36 29 24 37
 $ENDELM
+
diff --git a/finley/test/python/data_meshes/tet10_macro.fly b/finley/test/python/data_meshes/tet10_macro.fly
index 5edd6b4..6c4886d 100644
--- a/finley/test/python/data_meshes/tet10_macro.fly
+++ b/finley/test/python/data_meshes/tet10_macro.fly
@@ -63,3 +63,4 @@ Tri6Macro 12
 15 24 7 6 8 32 24 25
 Tri6_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet4.fly b/finley/test/python/data_meshes/tet4.fly
index 1d66b23..d8ecf5b 100644
--- a/finley/test/python/data_meshes/tet4.fly
+++ b/finley/test/python/data_meshes/tet4.fly
@@ -37,3 +37,4 @@ Tri3 12
 15 24 7 6 8
 Tri3_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet4_gmsh.msh b/finley/test/python/data_meshes/tet4_gmsh.msh
index 7c6ead8..6cd45a6 100644
--- a/finley/test/python/data_meshes/tet4_gmsh.msh
+++ b/finley/test/python/data_meshes/tet4_gmsh.msh
@@ -49,3 +49,4 @@ $ELM
 35 4 34 34 4 18 2 4 6
 36 4 34 34 4 4 18 6 8
 $ENDELM
+
diff --git a/finley/test/python/data_meshes/tet_2D_macro.fly b/finley/test/python/data_meshes/tet_2D_macro.fly
index 4c48736..38c4756 100644
--- a/finley/test/python/data_meshes/tet_2D_macro.fly
+++ b/finley/test/python/data_meshes/tet_2D_macro.fly
@@ -89,3 +89,4 @@ Line3Macro 12
 31 20 0 21 23 
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet_2D_order1.fly b/finley/test/python/data_meshes/tet_2D_order1.fly
index 33e66d5..25c901d 100644
--- a/finley/test/python/data_meshes/tet_2D_order1.fly
+++ b/finley/test/python/data_meshes/tet_2D_order1.fly
@@ -53,3 +53,4 @@ Line2 12
 31 20 0 11
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet_2D_order2.fly b/finley/test/python/data_meshes/tet_2D_order2.fly
index 0e71694..0568007 100644
--- a/finley/test/python/data_meshes/tet_2D_order2.fly
+++ b/finley/test/python/data_meshes/tet_2D_order2.fly
@@ -89,3 +89,4 @@ Line3 12
 31 20 0 21 23 
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet_3D_macro.fly b/finley/test/python/data_meshes/tet_3D_macro.fly
index 523ebe6..78aa10a 100644
--- a/finley/test/python/data_meshes/tet_3D_macro.fly
+++ b/finley/test/python/data_meshes/tet_3D_macro.fly
@@ -145,3 +145,4 @@ Tri6Macro 36
 53 10 61 53 56 63 57 68
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet_3D_order1.fly b/finley/test/python/data_meshes/tet_3D_order1.fly
index 17a4983..fd1bd14 100644
--- a/finley/test/python/data_meshes/tet_3D_order1.fly
+++ b/finley/test/python/data_meshes/tet_3D_order1.fly
@@ -84,3 +84,4 @@ Tri3 36
 53 10 17 18 19 
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tet_3D_order2.fly b/finley/test/python/data_meshes/tet_3D_order2.fly
index a062a92..c883404 100644
--- a/finley/test/python/data_meshes/tet_3D_order2.fly
+++ b/finley/test/python/data_meshes/tet_3D_order2.fly
@@ -145,3 +145,4 @@ Tri6 36
 53 10 61 53 56 63 57 68
 Point1_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tri3.fly b/finley/test/python/data_meshes/tri3.fly
index 34973f1..997966a 100644
--- a/finley/test/python/data_meshes/tri3.fly
+++ b/finley/test/python/data_meshes/tri3.fly
@@ -14,3 +14,4 @@ Line2 4
 3 7 3 4
 Line2_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tri3_gmsh.msh b/finley/test/python/data_meshes/tri3_gmsh.msh
index 3a52f7a..92f9f14 100644
--- a/finley/test/python/data_meshes/tri3_gmsh.msh
+++ b/finley/test/python/data_meshes/tri3_gmsh.msh
@@ -14,3 +14,4 @@ $ELM
 5 2 10 10 3 2 3 4
 6 2 10 10 3 1 2 4
 $ENDELM
+
diff --git a/finley/test/python/data_meshes/tri6.fly b/finley/test/python/data_meshes/tri6.fly
index 34a3be8..cc536cc 100644
--- a/finley/test/python/data_meshes/tri6.fly
+++ b/finley/test/python/data_meshes/tri6.fly
@@ -19,3 +19,4 @@ Line3 4
 3 7 3 4 7
 Line3_Contact 0
 Point1 0
+
diff --git a/finley/test/python/data_meshes/tri6_gmsh.msh b/finley/test/python/data_meshes/tri6_gmsh.msh
index 63377d0..851a805 100644
--- a/finley/test/python/data_meshes/tri6_gmsh.msh
+++ b/finley/test/python/data_meshes/tri6_gmsh.msh
@@ -19,3 +19,4 @@ $ELM
 5 9 10 10 6 2 3 4 6 7 9
 6 9 10 10 6 1 2 4 5 9 8
 $ENDELM
+
diff --git a/finley/test/python/data_meshes/tri6_macro.fly b/finley/test/python/data_meshes/tri6_macro.fly
index 10ec730..bb9be06 100644
--- a/finley/test/python/data_meshes/tri6_macro.fly
+++ b/finley/test/python/data_meshes/tri6_macro.fly
@@ -19,3 +19,4 @@ Line3Macro 4
 3 7 3 4 7
 Line3_Contact 0
 Point1 0
+
diff --git a/finley/test/python/run_amg.py b/finley/test/python/run_amg.py
index a621c22..ebcb889 100644
--- a/finley/test/python/run_amg.py
+++ b/finley/test/python/run_amg.py
@@ -41,18 +41,19 @@ __author__="Lutz Gross, l.gross at uq.edu.au"
 import esys.escriptcore.utestselect as unittest, sys
 from esys.escriptcore.testing import *
 from esys.escript import *
-from esys.finley import Rectangle,Brick
+from esys.finley import Rectangle, Brick
 from esys.escript.linearPDEs import LinearPDE, SolverOptions
 import numpy
-OPTIMIZE=True # and False
-SOLVER_VERBOSE=True or False
+
+OPTIMIZE = True
+SOLVER_VERBOSE = False
 
 MIN_MATRIX_SIZE=1
 MIN_SPARSITY=1.
 MIN_MATRIX_SIZE=None
 MIN_SPARSITY=None
 MAX_LEVEL=None
-USE_AMG=True or False
+HAVE_PASO_AMG = hasFeature('paso') and (getMPISizeWorld() == 1)
 
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
@@ -64,10 +65,10 @@ FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
 NE_TOTAL=4096
 #NE_TOTAL=4
 
+ at unittest.skipUnless(HAVE_PASO_AMG, "PASO AMG tests require PASO and #ranks == 1")
 class AMG(unittest.TestCase): #subclassing required
 
    def test_Poisson(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Scalar(1,Solution(self.domain))
@@ -85,10 +86,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -100,7 +98,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_PoissonWithDirectInterpolation(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Scalar(1,Solution(self.domain))
@@ -118,10 +115,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setNumPreSweeps(3)
         pde.getSolverOptions().setNumPostSweeps(3)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
@@ -136,7 +130,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_PoissonClassic(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Scalar(1,Solution(self.domain))
@@ -154,10 +147,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         pde.getSolverOptions().setNumPreSweeps(3)
         pde.getSolverOptions().setNumPostSweeps(3)
@@ -172,7 +162,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_PoissonClassicWithFFCoupling(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Scalar(1,Solution(self.domain))
@@ -190,10 +179,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         pde.getSolverOptions().setNumPreSweeps(3)
         pde.getSolverOptions().setNumPostSweeps(3)
@@ -208,7 +194,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_PoissonSqueezedX(self):
-        global USE_AMG
         x=self.domain.getX().copy()
         x[0]*=0.5
         self.domain.setX(x)
@@ -229,10 +214,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -245,7 +227,6 @@ class AMG(unittest.TestCase): #subclassing required
 
 
    def test_Poisson2(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(2,),Solution(self.domain))
@@ -268,10 +249,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -283,7 +261,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_Poisson3(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(3,),Solution(self.domain))
@@ -309,10 +286,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -324,7 +298,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_Poisson4(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(4,),Solution(self.domain))
@@ -353,10 +326,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -367,7 +337,6 @@ class AMG(unittest.TestCase): #subclassing required
         error=Lsup(u-u_ex)/Lsup(u_ex)
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
    def test_Poisson2Classic(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(2,),Solution(self.domain))
@@ -390,10 +359,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -406,7 +372,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_Poisson3Classic(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(3,),Solution(self.domain))
@@ -432,10 +397,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -448,7 +410,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_Poisson4Classic(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(4,),Solution(self.domain))
@@ -477,10 +438,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -493,7 +451,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_WeakCoupled2(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(2,),Solution(self.domain))
@@ -523,10 +480,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -538,7 +492,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_WeakCoupled3(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(3,),Solution(self.domain))
@@ -572,10 +525,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -586,7 +536,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_WeakCoupled4(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(4,),Solution(self.domain))
@@ -625,10 +574,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -639,7 +585,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_StrongCoupled2(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(2,),Solution(self.domain))
@@ -669,10 +614,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -684,7 +626,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_StrongCoupled3(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(3,),Solution(self.domain))
@@ -718,10 +659,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -732,7 +670,6 @@ class AMG(unittest.TestCase): #subclassing required
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
 
    def test_StrongCoupled4(self):
-        global USE_AMG
         x=Solution(self.domain).getX()
         # --- set exact solution ----
         u_ex=Data(1.,(4,),Solution(self.domain))
@@ -771,10 +708,7 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
@@ -786,7 +720,6 @@ class AMG(unittest.TestCase): #subclassing required
 
 
    def test_Square(self):
-        global USE_AMG
         # PDE constants
         h1 = 0.5
         h2 = 0.5
@@ -796,7 +729,7 @@ class AMG(unittest.TestCase): #subclassing required
 
         # domain masks and domain-specific constants
         x = Solution(self.domain).getX(); x0 = x[0]; x1 = x[1]
-        omega2 = wherePositive(x0-h1)*wherePositive(x1-h2) 
+        omega2 = wherePositive(x0-h1)*wherePositive(x1-h2)
         omega1 = 1-omega2
         ratio = alpha1/alpha2
         alpha = alpha1*omega1 + alpha2*omega2
@@ -811,7 +744,7 @@ class AMG(unittest.TestCase): #subclassing required
         b2 = ratio*b1
         c2 = ratio*c1
         d2 = ratio*d1
-        
+
         u_ex = omega1*(a1 + b1*x0 + c1*x1 + d1*x0*x1) + \
                 omega2*(a2 + b2*x0 + c2*x1 + d2*x0*x1)
 
@@ -822,18 +755,18 @@ class AMG(unittest.TestCase): #subclassing required
         q = whereZero(x0) + whereZero(x1) + \
             whereZero(sup(x0)-x0) + whereZero(sup(x1)-x1)
         pde.setValue(q=q,r=u_ex)
-              
+
         # create X points in the centre of the grid elements
         xe = Function(self.domain).getX()
         x0 = xe[0]
         x1 = xe[1]
 
         # redefine omega so that apha is more precise on the diagonal (?)
-        omega2 = wherePositive(x0-h1)*wherePositive(x1-h2) 
+        omega2 = wherePositive(x0-h1)*wherePositive(x1-h2)
         omega1 = 1-omega2
         ratio = alpha1/alpha2
         alpha = alpha1*omega1 + alpha2*omega2
-        
+
         # set up PDE coefficients
         pde.setValue(A=alpha*kronecker(self.domain), D=beta, Y=beta*u_ex)
         pde.setSymmetryOn()
@@ -841,20 +774,17 @@ class AMG(unittest.TestCase): #subclassing required
         # -------- get the solution ---------------------------
         pde.getSolverOptions().setTolerance(self.SOLVER_TOL)
         pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        if USE_AMG and getEscriptParamInt('DISABLE_AMG',0):
-             print("AMG is disabled for MPI builds")
-             USE_AMG=0
-        if (USE_AMG): pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
+        pde.getSolverOptions().setPreconditioner(SolverOptions.AMG)
         pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
         if MIN_MATRIX_SIZE!= None: pde.getSolverOptions().setMinCoarseMatrixSize(MIN_MATRIX_SIZE)
         if MIN_SPARSITY!=None: pde.getSolverOptions().setMinCoarseMatrixSparsity(MIN_SPARSITY)
         if MAX_LEVEL!=None: pde.getSolverOptions().setLevelMax(MAX_LEVEL)
         u = pde.getSolution()
-        
+
         # -------- test the solution ---------------------------
         error=Lsup(u-u_ex)/Lsup(u_ex)
         self.assertTrue(error<self.RES_TOL, "solution error %s is too big."%error)
-        
+
 
 class Test_AMGOnFinleyHex2DOrder1(AMG):
    RES_TOL=5.e-7
@@ -867,4 +797,4 @@ class Test_AMGOnFinleyHex2DOrder1(AMG):
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
-    
+
diff --git a/finley/test/python/run_darcy.py b/finley/test/python/run_darcy.py
index e250b1a..29e82c4 100644
--- a/finley/test/python/run_darcy.py
+++ b/finley/test/python/run_darcy.py
@@ -26,21 +26,15 @@ __url__="https://launchpad.net/escript-finley"
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-import tempfile
-      
-
-
-VERBOSE=False  and True
 
 from esys.escript import *
 from esys.escript.models import DarcyFlow
 from esys.finley import Rectangle, Brick
 
-from math import pi
-import numpy
-import sys
 import os
-#====================================================================================================================
+
+VERBOSE=False
+
 try:
      FINLEY_WORKDIR=os.environ['FINLEY_WORKDIR']
 except KeyError:
@@ -312,7 +306,7 @@ class Darcy2D(Darcy): #subclassing required
     WIDTH=1.
     SOLVER=DarcyFlow.POST
     def setUp(self):
-        NE=40  # wrning smaller NE may case a failure for VarioF tests due to discretization errors.
+        NE=40  # warning: smaller NE may case a failure for VarioF tests due to discretization errors.
         self.dom = Rectangle(NE,NE)
         self.rescaleDomain()
     def tearDown(self):
@@ -334,7 +328,7 @@ class Darcy3D(Darcy): #subclassing required
     WIDTH=1.
     SOLVER=DarcyFlow.POST
     def setUp(self):
-        NE=29  # wrning smaller NE may case a failure for VarioF tests due to discretization errors.
+        NE=29  # warning: smaller NE may case a failure for VarioF tests due to discretization errors.
         self.dom = Brick(NE,NE,NE)
         self.rescaleDomain()
     def tearDown(self):
diff --git a/finley/test/python/run_escriptOnFinley.py b/finley/test/python/run_escriptOnFinley.py
index 0622b41..f60905b 100644
--- a/finley/test/python/run_escriptOnFinley.py
+++ b/finley/test/python/run_escriptOnFinley.py
@@ -30,15 +30,14 @@ from esys.escriptcore.testing import *
 from esys.escript import *
 from esys.finley import Rectangle, Brick, ReadMesh, ReadGmsh
 from test_objects import Test_Dump, Test_SetDataPointValue, Test_saveCSV, \
-        Test_TableInterpolation, Test_Domain, Test_GlobalMinMax, Test_Lazy
+        Test_TableInterpolation, Test_Domain, Test_Lazy
 from test_shared import Test_Shared
 
 try:
      FINLEY_WORKDIR=os.environ['FINLEY_WORKDIR']
 except KeyError:
      FINLEY_WORKDIR='.'
-     
-     
+
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
 except KeyError:
@@ -47,7 +46,7 @@ except KeyError:
 FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
 
 mpisize=getMPISizeWorld()
-NE=4 # number elements, must be even
+NE=4 # number of elements, must be even
 
 class Test_SharedOnFinley(Test_Shared):
   def setUp(self):
@@ -73,7 +72,7 @@ class Test_DomainOnFinley(Test_Domain):
        domain=Rectangle(NE,NE)
        x=domain.getX()
        z=interpolate(x, Function(domain))
-       self.assertRaises(RuntimeError, domain.setX, z)
+       self.assertRaises(ValueError, domain.setX, z)
        del x
        del z
        del domain
@@ -127,25 +126,38 @@ class Test_DomainOnFinley(Test_Domain):
        self.assertTrue(len(tags)==len(ref_tags), "tags list has wrong length.")
        for i in ref_tags: self.assertTrue(i in tags,"tag %s is missing."%i)
 
-class Test_DataOpsOnFinley(Test_Dump, Test_SetDataPointValue, Test_GlobalMinMax, Test_Lazy):
+class Test_DumpOnFinley(Test_Dump):
    def setUp(self):
-       self.domain =Rectangle(NE,NE+1,2)
+       self.domain = Rectangle(NE, NE+1, 2)
        self.domain_with_different_number_of_samples =Rectangle(2*NE,NE+1,2)
        self.domain_with_different_number_of_data_points_per_sample =Rectangle(2*NE,NE+1,2,integrationOrder=2)
        self.domain_with_different_sample_ordering =Rectangle(NE,NE+1,2, optimize=True)
        self.filename_base=FINLEY_WORKDIR
-       self.mainfs=Function(self.domain)
-       self.otherfs=Solution(self.domain)
 
    def tearDown(self):
        del self.domain
        del self.domain_with_different_number_of_samples
        del self.domain_with_different_number_of_data_points_per_sample
        del self.domain_with_different_sample_ordering
+
+class Test_SetDataPointValueOnFinley(Test_SetDataPointValue):
+   def setUp(self):
+       self.domain = Rectangle(NE, NE+1, 2)
+
+   def tearDown(self):
+       del self.domain
+
+class Test_LazyOnFinley(Test_Lazy):
+   def setUp(self):
+       self.domain = Rectangle(NE,NE+1,2)
+       self.mainfs = Function(self.domain)
+       self.otherfs = Solution(self.domain)
+
+   def tearDown(self):
+       del self.domain
        del self.mainfs
        del self.otherfs
 
-
 class Test_TableInterpolationOnFinley(Test_TableInterpolation):
     def setUp(self):
         self.domain=Brick(4,4,4)
@@ -163,17 +175,18 @@ class Test_TableInterpolationOnFinley(Test_TableInterpolation):
         del self.functionspaces
 
 
-#This functionality is only testes on Finley.
-#It is not finley specific but it does need a known set of input points so I've chosen to put it here
+# This functionality is only tested on Finley.
+# It is not finley specific but it does need a known set of input points
+# so I've chosen to put it here
 class Test_OtherInterpolationOnFinley(unittest.TestCase):
     def setUp(self):
         self.r=Rectangle(4,1).getX()[0]+2
         self.z=Data(2)
-        
+
     def tearDown(self):
         del self.z
         del self.r
-       
+
     def test_nonuniformint(self):
         self.assertRaises(RuntimeError, self.z.nonuniformInterpolate, [0,1], [5,6], True)
         self.assertRaises(RuntimeError, self.z.nonuniformInterpolate, [3,4], [5,6], True)
@@ -184,7 +197,7 @@ class Test_OtherInterpolationOnFinley(unittest.TestCase):
         self.assertTrue(Lsup(inf(tmp)-0.090909)<0.00001, "Internal interpolate failed")
         tmp=self.r.nonuniformInterpolate([2.125, 2.4, 2.5, 3.2], [1, -1, 2, 4], False)
         self.assertTrue(Lsup(sup(tmp)-3.42857)<0.00001, "Internal interpolate failed")
-        
+
     def test_nonuniformSlope(self):
         self.assertRaises(RuntimeError, self.z.nonuniformSlope, [0,1], [5,6], True)
         self.assertRaises(RuntimeError, self.z.nonuniformSlope, [3,4], [5,6], True)
@@ -193,7 +206,7 @@ class Test_OtherInterpolationOnFinley(unittest.TestCase):
         tmp=self.r.nonuniformSlope([2.125, 2.4, 2.5, 3.2], [1, -1, 2, 4], False)
         self.assertTrue(Lsup(sup(tmp)-30)<0.00001, "Internal interpolate failed")
         self.assertTrue(Lsup(inf(tmp)+7.27273)<0.00001, "Internal interpolate failed")
-        
+
 class Test_CSVOnFinley(Test_saveCSV):
     def setUp(self):
         try:
@@ -228,7 +241,7 @@ class Test_CSVOnFinley(Test_saveCSV):
     def tearDown(self):
         del self.domain
 
-    @unittest.skipIf(mpisize>1, "more than 1 MPI rank")
+    @unittest.skipIf(mpisize > 1, "more than 1 MPI rank")
     def test_csv_multiFS(self):
         fname=os.path.join(self.workdir, "test_multifs.csv")
         sol=Data(8,Solution(self.domain))
@@ -252,164 +265,161 @@ class Test_CSVOnFinley(Test_saveCSV):
         self.assertRaises(RuntimeError, saveDataCSV, fname, A=dirac, B=rfun)
         self.assertRaises(RuntimeError, saveDataCSV, fname, A=bound, B=conzz)
 
-        
 class Test_DiracOnFinley(unittest.TestCase):
   def test_rectconstr(self):
-    self.assertRaises(RuntimeError, Rectangle, 4,4, diracPoints=[(0,0)])
-    self.assertRaises(RuntimeError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
-    self.assertRaises(RuntimeError, Rectangle, 4,4, diracPoints=[(0,)], diracTags=["test"])
+    self.assertRaises(ValueError, Rectangle, 4,4, diracPoints=[(0,0)])
+    self.assertRaises(ValueError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=[40])
+    self.assertRaises(ValueError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=[40])
+    self.assertRaises(ValueError, Rectangle, 4,4, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
+    self.assertRaises(ValueError, Rectangle, 4,4, diracPoints=[(0,)], diracTags=["test"])
     z=Rectangle(4,4, diracPoints=[(0,0), (0.25,0.25)], diracTags=[40,51])
-    z=Rectangle(4,4, diracPoints=[(0.125,0.625), (0.5,1), (0.75, 0.25), (0.89, 0.875)], diracTags=["A", "B", "A", "C"]) 
+    z=Rectangle(4,4, diracPoints=[(0.125,0.625), (0.5,1), (0.75, 0.25), (0.89, 0.875)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0.0, 0.5), (0.5, 1.0), (0.75, 0.25), (1.0, 0.75)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
-      self.assertEquals(inf(v[0]), 0)
-      self.assertEquals(inf(v[1]), 0.25)
-      self.assertEquals(Lsup(v[0]), 1)
-      self.assertEquals(Lsup(v[1]), 1)
+      self.assertEqual(v.toListOfTuples(),[(0.0, 0.5), (0.5, 1.0), (0.75, 0.25), (1.0, 0.75)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(inf(v[0]), 0)
+      self.assertEqual(inf(v[1]), 0.25)
+      self.assertEqual(Lsup(v[0]), 1)
+      self.assertEqual(Lsup(v[1]), 1)
     v.setTaggedValue("A",(-10,0.5))
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
     v.setTaggedValue(500,(-100,-100))   # non-existant tag
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-    self.assertEquals(z.showTagNames(), 'A, B, C, bottom, left, right, top')
-    self.assertEquals(z.getTag("C"), 42)
-   
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+    self.assertEqual(z.showTagNames(), 'A, B, C, bottom, left, right, top')
+    self.assertEqual(z.getTag("C"), 42)
+
   def test_brickconstr(self):
-    self.assertRaises(RuntimeError, Brick, 4,4, diracPoints=[(0,0,0)])
-    self.assertRaises(RuntimeError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
-    self.assertRaises(RuntimeError, Brick, 4,4, diracPoints=[(0,0)], diracTags=["test"])
+    self.assertRaises(ValueError, Brick, 4,4, diracPoints=[(0,0,0)])
+    self.assertRaises(ValueError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
+    self.assertRaises(ValueError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
+    self.assertRaises(ValueError, Brick, 4,4, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
+    self.assertRaises(ValueError, Brick, 4,4, diracPoints=[(0,0)], diracTags=["test"])
     z=Brick(4,4, diracPoints=[(0,0,0), (0.25,0.25, 0.25)], diracTags=[40,51])
-    z=Brick(4,4, diracPoints=[(0.125,0.625,0), (0.5,1,0), (0.75, 0.25, 0.51), (0.89, 0.875,1)], diracTags=["A", "B", "A", "C"]) 
+    z=Brick(4,4, diracPoints=[(0.125,0.625,0), (0.5,1,0), (0.75, 0.25, 0.51), (0.89, 0.875,1)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0.0, 0.5, 0.0), (0.5, 1.0, 0.0), (0.75, 0.25, 1), (1.0, 0.75, 1.0)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
-      self.assertEquals(inf(v[0]), 0)
-      self.assertEquals(inf(v[1]), 0.25)
-      self.assertEquals(Lsup(v[0]), 1)
-      self.assertEquals(Lsup(v[1]), 1)
+      self.assertEqual(v.toListOfTuples(),[(0.0, 0.5, 0.0), (0.5, 1.0, 0.0), (0.75, 0.25, 1), (1.0, 0.75, 1.0)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(inf(v[0]), 0)
+      self.assertEqual(inf(v[1]), 0.25)
+      self.assertEqual(Lsup(v[0]), 1)
+      self.assertEqual(Lsup(v[1]), 1)
     v.setTaggedValue("A",(-10,0.5,-500))
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-      self.assertEquals(inf(v[2]),-500)
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+      self.assertEqual(inf(v[2]),-500)
     v.setTaggedValue(500,(-100,-100, -100))     # non-existant tag
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-      self.assertEquals(z.showTagNames(), 'A, B, C, back, bottom, front, left, right, top')
-    self.assertEquals(z.getTag("C"), 42)
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+      self.assertEqual(z.showTagNames(), 'A, B, C, back, bottom, front, left, right, top')
+    self.assertEqual(z.getTag("C"), 42)
 
 
   def test_rectReadMesh(self):
     fname=os.path.join(FINLEY_TEST_MESH_PATH,'rect_4x4.fly')
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,)])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0)])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0), (1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
-    z=ReadMesh(fname, diracPoints=[(0,0), (0.25,0.25)], diracTags=[40,51])   
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,)])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0)])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0), (1,1)], diracTags=[40])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
+    z=ReadMesh(fname, diracPoints=[(0,0), (0.25,0.25)], diracTags=[40,51])
     z=ReadMesh(fname, diracPoints=[(0.125,0.625), (0.5,1), (0.75, 0.25), (0.89, 0.875)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0.0, 0.5), (0.5, 1.0), (0.75, 0.25), (1.0, 0.75)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
-      self.assertEquals(inf(v[0]), 0)
-      self.assertEquals(inf(v[1]), 0.25)
-      self.assertEquals(Lsup(v[0]), 1)
-      self.assertEquals(Lsup(v[1]), 1)
+      self.assertEqual(v.toListOfTuples(),[(0.0, 0.5), (0.5, 1.0), (0.75, 0.25), (1.0, 0.75)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(inf(v[0]), 0)
+      self.assertEqual(inf(v[1]), 0.25)
+      self.assertEqual(Lsup(v[0]), 1)
+      self.assertEqual(Lsup(v[1]), 1)
     v.setTaggedValue("A",(-10,0.5))
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
     v.setTaggedValue(500,(-100,-100))   # non-existant tag
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-    self.assertEquals(z.showTagNames(), 'A, B, C, bottom, left, right, top')
-    self.assertEquals(z.getTag("C"), 42)    
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+    self.assertEqual(z.showTagNames(), 'A, B, C, bottom, left, right, top')
+    self.assertEqual(z.getTag("C"), 42)
 
 
   def test_brickReadMesh(self):
     fname=os.path.join(FINLEY_TEST_MESH_PATH,'brick_4x4x4.fly')
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0)])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0,0)])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, ReadMesh, fname, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
-    z=ReadMesh(fname, diracPoints=[(0,0,1), (0.25,0.25, 0.25)], diracTags=[40,51])   
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0)])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0,0)])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
+    self.assertRaises(ValueError, ReadMesh, fname, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
+    z=ReadMesh(fname, diracPoints=[(0,0,1), (0.25,0.25, 0.25)], diracTags=[40,51])
     z=ReadMesh(fname, diracPoints=[(0.125,0.625,0), (0.5,1,1), (0.75, 0.25,0), (0.89, 0.875, 0.5)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0.0, 0.5, 0.0), (0.5, 1.0, 1.0), (0.75, 0.25, 0.0), (1.0, 0.75, 0.5)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
-      self.assertEquals(inf(v[0]), 0)
-      self.assertEquals(inf(v[1]), 0.25)
-      self.assertEquals(Lsup(v[0]), 1)
-      self.assertEquals(Lsup(v[1]), 1)
+      self.assertEqual(v.toListOfTuples(),[(0.0, 0.5, 0.0), (0.5, 1.0, 1.0), (0.75, 0.25, 0.0), (1.0, 0.75, 0.5)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(inf(v[0]), 0)
+      self.assertEqual(inf(v[1]), 0.25)
+      self.assertEqual(Lsup(v[0]), 1)
+      self.assertEqual(Lsup(v[1]), 1)
     v.setTaggedValue("A",(-10,0.5,-0.5))
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-      self.assertEquals(inf(v[2]), -0.5)
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+      self.assertEqual(inf(v[2]), -0.5)
     v.setTaggedValue(500,(-100,-100, -100))     # non-existant tag
     if mpisize==1:
-      self.assertEquals(inf(v[0]), -10)
-      self.assertEquals(inf(v[1]), 0.5)
-      self.assertEquals(inf(v[2]), -0.5)
-    self.assertEquals(z.showTagNames(), 'A, B, C, back, bottom, front, left, right, top')
-    self.assertEquals(z.getTag("C"), 203)    
-
-
+      self.assertEqual(inf(v[0]), -10)
+      self.assertEqual(inf(v[1]), 0.5)
+      self.assertEqual(inf(v[2]), -0.5)
+    self.assertEqual(z.showTagNames(), 'A, B, C, back, bottom, front, left, right, top')
+    self.assertEqual(z.getTag("C"), 203)
 
   def test_rectReadGmsh(self):
     fname=os.path.join(FINLEY_TEST_MESH_PATH, 'rect_test.msh')
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 2, diracPoints=[(0,0)])
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 2, diracPoints=[(0,0), (1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 2, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
+    self.assertRaises(ValueError, ReadGmsh, fname, 2, diracPoints=[(0,0)])
+    self.assertRaises(ValueError, ReadGmsh, fname, 2, diracPoints=[(0,0), (1,1)], diracTags=[40])
+    self.assertRaises(ValueError, ReadGmsh, fname, 2, diracPoints=[(0,0), (1,1)], diracTags=["cows"])
     z=ReadGmsh(fname, 2, diracPoints=[(0,0), (1,1)], diracTags=[40,51])
     z=ReadGmsh(fname, 2, diracPoints=[(0,0),(0,1),(1,0),(1,1)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0,0), (0,1), (1,0), (1,1)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(v.toListOfTuples(),[(0,0), (0,1), (1,0), (1,1)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
     v.setTaggedValue("A",(-10,99))
-    self.assertEquals(inf(v[0]), -10)
-    self.assertEquals(Lsup(v[1]), 99)
+    self.assertEqual(inf(v[0]), -10)
+    self.assertEqual(Lsup(v[1]), 99)
     v.setTaggedValue(500,(-100,-100))   # non-existant tag
-    self.assertEquals(inf(v[0]), -10)
-    self.assertEquals(Lsup(v[1]), 99)
-    self.assertEquals(z.showTagNames(), 'A, B, C')
-    self.assertEquals(z.getTag("C"), 42)
+    self.assertEqual(inf(v[0]), -10)
+    self.assertEqual(Lsup(v[1]), 99)
+    self.assertEqual(z.showTagNames(), 'A, B, C')
+    self.assertEqual(z.getTag("C"), 42)
 
   def test_brickReadGmsh(self):
     fname=os.path.join(FINLEY_TEST_MESH_PATH, 'brick_test.msh')
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 3, diracPoints=[(0,0)])
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 3, diracPoints=[(0,0,0)])
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 3, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
-    self.assertRaises(RuntimeError, ReadGmsh, fname, 3, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
+    self.assertRaises(ValueError, ReadGmsh, fname, 3, diracPoints=[(0,0)])
+    self.assertRaises(ValueError, ReadGmsh, fname, 3, diracPoints=[(0,0,0)])
+    self.assertRaises(ValueError, ReadGmsh, fname, 3, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40])
+    self.assertRaises(ValueError, ReadGmsh, fname, 3, diracPoints=[(0,0,0), (1,1,1)], diracTags=["cows"])
     z=ReadGmsh(fname, 3, diracPoints=[(0,0,0), (1,1,1)], diracTags=[40,51])
     z=ReadGmsh(fname, 3, diracPoints=[(0,0,0),(0,1,0),(1,0,1),(1,1,1)], diracTags=["A", "B", "A", "C"])
     v=interpolate(z.getX(), DiracDeltaFunctions(z))
     if mpisize==1:
-      self.assertEquals(v.toListOfTuples(),[(0,0,0), (0,1,0), (1,0,1), (1,1,1)])
-      self.assertEquals(v.getNumberOfDataPoints(), 4)
+      self.assertEqual(v.toListOfTuples(),[(0,0,0), (0,1,0), (1,0,1), (1,1,1)])
+      self.assertEqual(v.getNumberOfDataPoints(), 4)
     v.setTaggedValue("A",(-10,99,-98))
-    self.assertEquals(inf(v[0]), -10)
-    self.assertEquals(Lsup(v[1]), 99)
-    self.assertEquals(inf(v[2]), -98)
+    self.assertEqual(inf(v[0]), -10)
+    self.assertEqual(Lsup(v[1]), 99)
+    self.assertEqual(inf(v[2]), -98)
     v.setTaggedValue(500,(-100,-100,-100))   # non-existant tag
-    self.assertEquals(inf(v[0]), -10)
-    self.assertEquals(Lsup(v[1]), 99)
-    self.assertEquals(z.showTagNames(), 'A, B, C')
-    self.assertEquals(z.getTag("C"), 42)
+    self.assertEqual(inf(v[0]), -10)
+    self.assertEqual(Lsup(v[1]), 99)
+    self.assertEqual(z.showTagNames(), 'A, B, C')
+    self.assertEqual(z.getTag("C"), 42)
 
 
 if __name__ == '__main__':
diff --git a/finley/test/python/run_generators.py b/finley/test/python/run_generators.py
index bc72684..c89cc86 100644
--- a/finley/test/python/run_generators.py
+++ b/finley/test/python/run_generators.py
@@ -27,12 +27,11 @@ __url__="https://launchpad.net/escript-finley"
 checks the mesh generators against the reference meshes in test_meshes and test the finley integration schemes.
 """
 
-import sys
 import os
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadGmsh, ReadMesh
+from esys.finley import Rectangle, Brick, JoinFaces, ReadGmsh, ReadMesh
 
 mpisize = getMPISizeWorld()
 
@@ -54,7 +53,7 @@ FINLEY_WORKDIR_PATH=FINLEY_WORKDIR
 TEST_FILE_PRE="test_"
 
 @unittest.skipIf(mpisize>1, "multiple processes not supported for mesh writes")
-class Test_Generators(unittest.TestCase):
+class Test_GeneratorsOnFinley(unittest.TestCase):
 
    def checker(self, dom, reference):
       dom_file=os.path.join(FINLEY_WORKDIR_PATH, TEST_FILE_PRE+reference)
@@ -62,7 +61,7 @@ class Test_Generators(unittest.TestCase):
 # Uncomment this section to dump the files for regression testing
       #if True:
       #   dom.write(os.path.join(FINLEY_TEST_MESH_PATH,reference))
-      dom_string=open(dom_file).read().splitlines() 
+      dom_string=open(dom_file).read().splitlines()
       ref_string=open(os.path.join(FINLEY_TEST_MESH_PATH,reference)).read().splitlines()
       self.assertEqual(len(dom_string),len(ref_string),"number of lines in mesh files does not match reference")
       taglist=[]
@@ -199,7 +198,7 @@ class Test_Generators(unittest.TestCase):
       self.checker(my_dom,file)
 
 @unittest.skipIf(mpisize>1, "multiple processes not supported for mesh writes")
-class Test_GMSHReader(unittest.TestCase):
+class Test_GmshReaderOnFinley(unittest.TestCase):
    def compare(self, test_file, reference_file):
       dom_string=open(test_file).read().splitlines()
       ref_string=open(reference_file).read().splitlines()
@@ -259,7 +258,7 @@ class Test_GMSHReader(unittest.TestCase):
          self.compare(test, os.path.join(FINLEY_TEST_MESH_PATH,ref))
 
 @unittest.skipIf(mpisize>1, "multiple processes not supported for mesh writes")
-class Test_Reader(unittest.TestCase):
+class Test_ReaderOnFinley(unittest.TestCase):
    def test_ReadWriteTagNames(self):
        file="hex_2D_order2.msh"
        test = os.path.join(FINLEY_WORKDIR,"test.fly")
@@ -276,7 +275,7 @@ class Test_Reader(unittest.TestCase):
        self.assertTrue(dom2.isValidTagName("A"))
        self.assertTrue(dom2.isValidTagName("B"))
 
-class Test_Integration(unittest.TestCase):
+class Test_IntegrationOnFinley(unittest.TestCase):
    TOL=EPSILON*500.
    def __test_2DQ(self,dom,order):
        x=Function(dom).getX()
@@ -300,7 +299,7 @@ class Test_Integration(unittest.TestCase):
                 ref += 1./(i+1)
              error=abs(res-ref)/abs(ref)
              self.assertTrue(error<=self.TOL,"surface integration for order (%s,%s) failed. True value = %s, calculated = %s"%(i,j,ref,res))
-            
+
    def __test_2DT(self,dom,order,raise_tol=1.):
        x=Function(dom).getX()
        x_bound=FunctionOnBoundary(dom).getX()
@@ -384,7 +383,7 @@ class Test_Integration(unittest.TestCase):
                 error=abs(res-ref)/abs(ref)
                 self.assertTrue(error<=self.TOL*raise_tol,"surface integration for order (%s,%s,%s) failed. True value = %s, calculated = %s (error=%e)"%(i,j,k,ref,res,error))
 
-   #===================================================================================================
+   #==========================================================================
    def test_hex2D_order1_integorder1(self):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,integrationOrder=1)
@@ -425,7 +424,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,integrationOrder=10)
       self.__test_2DQ(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_hex2D_order2_integorder1(self):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,order=2,integrationOrder=1)
@@ -466,7 +465,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,order=2,integrationOrder=10)
       self.__test_2DQ(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_hex2D_macro_integorder1(self):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,order=-1,useElementsOnFace=0,integrationOrder=1)
@@ -507,7 +506,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Rectangle(NE,NE,order=-1,useElementsOnFace=0,integrationOrder=10)
       self.__test_2DQ(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_Tet2D_order1_integorder1(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri3.fly"),optimize=False,integrationOrder=1)
       self.__test_2DT(my_dom,1)
@@ -538,7 +537,7 @@ class Test_Integration(unittest.TestCase):
    def test_Tet2D_order1_integorder10(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri3.fly"),optimize=False,integrationOrder=10)
       self.__test_2DT(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_Tet2D_order2_integorder2(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri6.fly"),optimize=False,integrationOrder=1)
       self.__test_2DT(my_dom,1)
@@ -569,7 +568,7 @@ class Test_Integration(unittest.TestCase):
    def test_Tet2D_order2_integorder10(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri6.fly"),optimize=False,integrationOrder=10)
       self.__test_2DT(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_Tet2D_macro_integmacro(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri6_macro.fly"),optimize=False,integrationOrder=1)
       self.__test_2DT(my_dom,1)
@@ -600,7 +599,7 @@ class Test_Integration(unittest.TestCase):
    def test_Tet2D_macro_integorder10(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tri6_macro.fly"),optimize=False,integrationOrder=10)
       self.__test_2DT(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_hex3D_order1_integorder1(self):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,integrationOrder=1)
@@ -641,7 +640,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,integrationOrder=10)
       self.__test_3DQ(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_hex3D_order2_integorder2(self):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,order=2,integrationOrder=1)
@@ -682,7 +681,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,order=2,integrationOrder=10)
       self.__test_3DQ(my_dom,10)
-   #===================================================================================================
+   #==========================================================================
    def test_hex3D_macro_integmacro(self):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,useElementsOnFace=0,order=-1,integrationOrder=1)
@@ -723,7 +722,7 @@ class Test_Integration(unittest.TestCase):
       NE=getMPIRankWorld()
       my_dom=Brick(NE,NE,NE,order=-1,useElementsOnFace=0,integrationOrder=10)
       self.__test_3DQ(my_dom,10)
-   #==========================================================================================
+   #==========================================================================
    def test_Tet3D_order1_integorder1(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet4.fly"),optimize=False,integrationOrder=1)
       self.__test_3DT(my_dom,1)
@@ -754,7 +753,7 @@ class Test_Integration(unittest.TestCase):
    def test_Tet3D_order1_integorder10(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet4.fly"),optimize=False,integrationOrder=10)
       self.__test_3DT(my_dom,10,1./sqrt(EPSILON))
-   #==========================================================================================
+   #==========================================================================
    def test_Tet3D_order2_integorder2(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet10.fly"),optimize=False,integrationOrder=1)
       self.__test_3DT(my_dom,1)
@@ -785,7 +784,7 @@ class Test_Integration(unittest.TestCase):
    def test_Tet3D_order2_integorder10(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet10.fly"),optimize=False,integrationOrder=10)
       self.__test_3DT(my_dom,10,1./sqrt(EPSILON))
-   #==========================================================================================
+   #==========================================================================
    def test_Tet3D_macro_integmacro(self):
       my_dom = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet10_macro.fly"),optimize=False,integrationOrder=1)
       self.__test_3DT(my_dom,1)
@@ -819,3 +818,4 @@ class Test_Integration(unittest.TestCase):
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_inputOutput.py b/finley/test/python/run_inputOutput.py
index 816374c..bfc569c 100644
--- a/finley/test/python/run_inputOutput.py
+++ b/finley/test/python/run_inputOutput.py
@@ -60,8 +60,7 @@ NE0 = 7 * mpisize
 NE1 = 11
 NE2 = 5
 
-class Test_InputOutput(unittest.TestCase):
-
+class Test_InputOutputOnFinley(unittest.TestCase):
      # Check that two domains are equal using Fourier integrals
      # We cannot compare the X coordinates since they are on different domains
      def domainsEqual(self, m1, m2, nft=100):
@@ -154,7 +153,7 @@ class Test_InputOutput(unittest.TestCase):
         self.assertEqual(dom.getTag('tag1'),1,'error with tag1')
         self.assertEqual(dom.getTag('tag2'),2,'error with tag2')
         self.assertEqual(dom.getTag('tag3'),3,'error with tag3')
-        self.assertRaises(RuntimeError, dom.getTag, 'tag4')
+        self.assertRaises(ValueError, dom.getTag, 'tag4')
      
      @unittest.skipIf(mpisize>1, "more than 1 MPI rank")
      def test_gmshNamedTags(self):
@@ -172,7 +171,7 @@ class Test_InputOutput(unittest.TestCase):
         self.assertEqual(dom.getTag('bottom'),10,'error with bottom,')
         self.assertEqual(dom.getTag('left'),1,'error with left')
         self.assertEqual(dom.getTag('right'),2,'error with reight')
-        self.assertRaises(RuntimeError, dom.getTag, 'tag4')
+        self.assertRaises(ValueError, dom.getTag, 'tag4')
 
      @unittest.skipIf(not loadIsConfigured(), "load not configured")
      def test_mesh_dump_to_NetCDF_brick(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_2D1.py b/finley/test/python/run_linearPDEsOnFinley1_2D1.py
index 0edf900..5653a6e 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_2D1.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_2D1.py
@@ -35,29 +35,43 @@ Test suite for the linearPDE  and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do1
+from test_pdetools import Test_pdetools
+from esys.finley import Rectangle
 
+NE=10 # number of element in each spatial direction (must be even)
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
+class Test_AssemblageOnFinleyHex2DOrder1(Test_assemblage_2Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
+class Test_LinearPDEOnFinleyHex2DOrder1(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-NE=10 # number of element in each spatial direction (must be even)
+class Test_PDEToolsOnFinleyHex2DOrder1(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnFinleyHex2DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_TransportPDEOnFinleyHex2DOrder1(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -68,3 +82,4 @@ class Test_LinearPDEOnFinleyHex2DOrder1(Test_LinearPDE,Test_pdetools,Test_assemb
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_linearPDEsOnFinley1_2D2.py b/finley/test/python/run_linearPDEsOnFinley1_2D2.py
index f618b1f..1a3d3f8 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_2D2.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_2D2.py
@@ -35,29 +35,43 @@ Test suite for the linearPDE  and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do2
+from test_pdetools import Test_pdetools
+from esys.finley import Rectangle
 
+NE=10 # number of element in each spatial direction (must be even)
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
+class Test_AssemblageOnFinleyHex2DOrder2(Test_assemblage_2Do2):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,2)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
 
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
+class Test_LinearPDEOnFinleyHex2DOrder2(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,2)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
 
-NE=10 # number of element in each spatial direction (must be even)
+class Test_PDEToolsOnFinleyHex2DOrder2(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,2)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnFinleyHex2DOrder2(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do2, Test_TransportPDE):
+class Test_TransportPDEOnFinleyHex2DOrder2(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -68,3 +82,4 @@ class Test_LinearPDEOnFinleyHex2DOrder2(Test_LinearPDE,Test_pdetools,Test_assemb
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D1.py b/finley/test/python/run_linearPDEsOnFinley1_3D1.py
index 3f41ad3..fca10c9 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D1.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D1.py
@@ -35,29 +35,43 @@ Test suite for the linearPDE  and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_3Do1
+from test_pdetools import Test_pdetools
+from esys.finley import Brick
 
+NE=10 # number of element in each spatial direction (must be even)
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
+class Test_AssemblageOnFinleyHex3DOrder1(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
+class Test_LinearPDEOnFinleyHex3DOrder1(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-NE=10 # number of element in each spatial direction (must be even)
+class Test_PDEToolsOnFinleyHex3DOrder1(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,1)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnFinleyHex3DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_TransportPDEOnFinleyHex3DOrder1(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part1.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part1.py
index 13cd7fc..beb5cbd 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part1.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part1.py
@@ -31,21 +31,21 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_LinearPDE
-from esys.escript import *
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
 from esys.finley import Brick
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
+class Test_LinearPDEOnFinleyHex3DOrder2(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,2)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnFinleyHex3DOrder2_part1(Test_LinearPDE):
+class Test_TransportPDEOnFinleyHex3DOrder2(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part2.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part2.py
index 0e52a0a..4d0472a 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part2.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part2.py
@@ -27,27 +27,19 @@ Test suite for the linearPDE and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from test_pdetools import Test_pdetools
-from esys.escript import *
+from esys.escript import hasFeature
 from esys.finley import Brick
 
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
+# trilinos and paso are different
+TOL = 1.e-7 if hasFeature('paso') else 5.e-7
 
-class Test_LinearPDEOnFinleyHex3DOrder2_part2(Test_pdetools):
-   RES_TOL=1.e-7
+class Test_PDEToolsOnFinleyHex3DOrder2(Test_pdetools):
+   RES_TOL=TOL
    ABS_TOL=1.e-8
    def setUp(self):
         self.domain = Brick(NE,NE,NE,2)
@@ -57,4 +49,4 @@ class Test_LinearPDEOnFinleyHex3DOrder2_part2(Test_pdetools):
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
-    
+
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-1.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-1.py
index 13d49cb..714e4d9 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-1.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-1.py
@@ -27,24 +27,14 @@ Test suite for the linearPDE and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from test_assemblage_3Do2 import Test_assemblage_3Do2
 from esys.finley import Brick
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
-
-class Test_LinearPDEOnFinleyHex3DOrder2_part3_1(Test_assemblage_3Do2):
+class Test_AssemblageOnFinleyHex3DOrder2_part3_1(Test_assemblage_3Do2):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-2.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-2.py
index ebf2196..1a348f4 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-2.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-2.py
@@ -27,25 +27,14 @@ Test suite for the linearPDE and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from test_assemblage_3Do2 import Test_assemblage_3Do2_cont
 from esys.finley import Brick
 
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
-
-class Test_LinearPDEOnFinleyHex3DOrder2_part3_2(Test_assemblage_3Do2_cont):
+class Test_AssemblageOnFinleyHex3DOrder2_part3_2(Test_assemblage_3Do2_cont):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-3.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-3.py
index 0617aec..55ad7b5 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-3.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-3.py
@@ -27,24 +27,14 @@ Test suite for the linearPDE and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from test_assemblage_3Do2 import Test_assemblage_3Do2_cont2
 from esys.finley import Brick
 
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
-
-class Test_LinearPDEOnFinleyHex3DOrder2_part3_3(Test_assemblage_3Do2_cont2):
+class Test_AssemblageOnFinleyHex3DOrder2_part3_3(Test_assemblage_3Do2_cont2):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-4.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-4.py
index 9621698..e6443c3 100644
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-4.py
+++ b/finley/test/python/run_linearPDEsOnFinley1_3D2_part3-4.py
@@ -27,25 +27,14 @@ Test suite for the linearPDE and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from test_assemblage_3Do2 import Test_assemblage_3Do2_cont3 
 from esys.finley import Brick
 
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
 NE=10 # number of element in each spatial direction (must be even)
 
-
-class Test_LinearPDEOnFinleyHex3DOrder2_part3_4(Test_assemblage_3Do2_cont3):
+class Test_AssemblageOnFinleyHex3DOrder2_part3_4(Test_assemblage_3Do2_cont3):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_linearPDEsOnFinley1_3D2_part4.py b/finley/test/python/run_linearPDEsOnFinley1_3D2_part4.py
deleted file mode 100644
index 1f8bc87..0000000
--- a/finley/test/python/run_linearPDEsOnFinley1_3D2_part4.py
+++ /dev/null
@@ -1,60 +0,0 @@
-
-########################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# Earth Systems Science Computational Center (ESSCC)
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-########################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-Earth Systems Science Computational Center (ESSCC)
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE and pdetools test on finley
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import os
-
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from test_linearPDEs import Test_TransportPDE
-from esys.escript import *
-from esys.finley import Brick
-
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
-NE=10 # number of element in each spatial direction (must be even)
-
-
-class Test_LinearPDEOnFinleyHex3DOrder2_part4(Test_TransportPDE):
-   RES_TOL=1.e-7
-   ABS_TOL=1.e-8
-   def setUp(self):
-        self.domain = Brick(NE,NE,NE,2)
-        self.order = 2
-   def tearDown(self):
-        del self.domain
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
-
diff --git a/finley/test/python/run_linearPDEsOnFinley2.py b/finley/test/python/run_linearPDEsOnFinley2.py
index bd4a550..abb3db1 100644
--- a/finley/test/python/run_linearPDEsOnFinley2.py
+++ b/finley/test/python/run_linearPDEsOnFinley2.py
@@ -39,14 +39,11 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
+from test_linearPDEs import Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2
 from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
-
+from esys.escript import hasFeature
+from esys.finley import ReadMesh
 
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
@@ -55,9 +52,19 @@ except KeyError:
 
 FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
 
-NE=6 # number of element in each spatial direction (must be even)
+# paso and trilinos are different
+TOL = 1.e-7 if hasFeature('paso') else 5.e-7
+
+class Test_AssemblageOnFinleyTet2DOrder1(Test_assemblage_2Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
 
-class Test_LinearPDEOnFinleyTet2DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_LinearPDEOnFinleyTet2DOrder1(Test_LinearPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -66,7 +73,52 @@ class Test_LinearPDEOnFinleyTet2DOrder1(Test_LinearPDE,Test_pdetools,Test_assemb
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyTet2DOrder2(Test_LinearPDE_noLumping,Test_pdetools_noLumping,Test_assemblage_2Do2, Test_TransportPDE):
+class Test_PDEToolsOnFinleyTet2DOrder1(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet2DOrder1(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order1.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_AssemblageOnFinleyTet2DOrder2(Test_assemblage_2Do2):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order2.fly"),optimize=False)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyTet2DOrder2(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order2.fly"),optimize=False)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyTet2DOrder2(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_order2.fly"),optimize=False)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet2DOrder2(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -75,20 +127,74 @@ class Test_LinearPDEOnFinleyTet2DOrder2(Test_LinearPDE_noLumping,Test_pdetools_n
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyTet3DOrder1(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_AssemblageOnFinleyTet3DOrder1(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order1.fly"), optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyTet3DOrder1(Test_LinearPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=False)
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
         self.order = 1
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyTet3DOrder2(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do2, Test_TransportPDE):
+class Test_PDEToolsOnFinleyTet3DOrder1(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet3DOrder1(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order1.fly"),optimize=True)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_AssemblageOnFinleyTet3DOrder2(Test_assemblage_3Do2):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order2.fly"),optimize=True)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyTet3DOrder2(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order2.fly"),optimize=True)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyTet3DOrder2(Test_pdetools):
+   RES_TOL=TOL
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order2.fly"),optimize=True)
+        self.order = 2
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet3DOrder2(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order2.fly"),optimize=False)
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_order2.fly"),optimize=True)
         self.order = 2
    def tearDown(self):
         del self.domain
diff --git a/finley/test/python/run_linearPDEsOnFinley3.py b/finley/test/python/run_linearPDEsOnFinley3.py
index 8cf26c9..b19359a 100644
--- a/finley/test/python/run_linearPDEsOnFinley3.py
+++ b/finley/test/python/run_linearPDEsOnFinley3.py
@@ -39,14 +39,9 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE, Test_Helmholtz, Test_LameEquation
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
-
+from test_linearPDEs import Test_Helmholtz, Test_LameEquation, Test_Poisson
+from test_assemblage import Test_assemblage_2Do1_Contact, Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
+from esys.finley import Rectangle, ReadMesh
 
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
@@ -61,14 +56,15 @@ class Test_LameOnFinley(Test_LameEquation):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = Rectangle(NE,NE,2,useElementsOnFace=0, useFullElementOrder=True)
+        self.domain = Rectangle(NE, NE, 2, useElementsOnFace=0, useFullElementOrder=True)
    def tearDown(self):
         del self.domain
+
 class Test_PoissonOnFinley(Test_Poisson):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = Rectangle(NE,NE,2,useElementsOnFace=0,useFullElementOrder=True)
+        self.domain = Rectangle(NE, NE, 2, useElementsOnFace=0, useFullElementOrder=True)
    def tearDown(self):
         del self.domain
 
@@ -76,23 +72,15 @@ class Test_HelmholtzOnFinley(Test_Helmholtz):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-        self.domain = Rectangle(NE,NE,2,useElementsOnFace=0,useFullElementOrder=True)
+        self.domain = Rectangle(NE, NE, 2, useElementsOnFace=0, useFullElementOrder=True)
    def tearDown(self):
         del self.domain
 
-
 class Test_AssemblePDEwithFinley_2Do1_Contact(Test_assemblage_2Do1_Contact):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=1)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=1)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do1_Contact.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do1_Contact.fly"))
    def tearDown(self):
         del self.domain
 
@@ -100,14 +88,7 @@ class Test_AssemblePDEwithFinley_2Do2_Contact(Test_assemblage_2Do2_Contact):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=2)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=2)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do2_Contact.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do2_Contact.fly"))
    def tearDown(self):
         del self.domain
 
@@ -115,14 +96,7 @@ class Test_AssemblePDEwithFinley_3Do1_Contact(Test_assemblage_3Do1_Contact):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=1)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=1)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do1_Contact.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do1_Contact.fly"))
    def tearDown(self):
         del self.domain
 
@@ -130,14 +104,7 @@ class Test_AssemblePDEwithFinley_3Do2_Contact(Test_assemblage_3Do2_Contact):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=2)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=2)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do2_Contact.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do2_Contact.fly"))
    def tearDown(self):
         del self.domain
 
@@ -146,14 +113,7 @@ class Test_AssemblePDEwithFinley_2Do1_Contact_withElementsOnFace(Test_assemblage
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=1,useElementsOnFace=True)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=1,useElementsOnFace=True)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do1_Contact_withElementsOnFace.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do1_Contact_withElementsOnFace.fly"))
    def tearDown(self):
         del self.domain
 
@@ -161,14 +121,7 @@ class Test_AssemblePDEwithFinley_2Do2_Contact_withElementsOnFace(Test_assemblage
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=2,useElementsOnFace=True)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Rectangle(n0=int(NE/2),n1=NE,l0=0.5,order=2,useElementsOnFace=True)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do2_Contact_withElementsOnFace.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_2Do2_Contact_withElementsOnFace.fly"))
    def tearDown(self):
         del self.domain
 
@@ -176,14 +129,7 @@ class Test_AssemblePDEwithFinley_3Do1_Contact_withElementsOnFace(Test_assemblage
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=1,useElementsOnFace=True)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=1,useElementsOnFace=True)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do1_Contact_withElementsOnFace.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do1_Contact_withElementsOnFace.fly"))
    def tearDown(self):
         del self.domain
 
@@ -191,14 +137,7 @@ class Test_AssemblePDEwithFinley_3Do2_Contact_withElementsOnFace(Test_assemblage
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
-       # d1 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=2,useElementsOnFace=True)
-       # x1 = ContinuousFunction(d1).getX()
-       # ContinuousFunction(d1).setTags(1,Scalar(1,ContinuousFunction(d1)))
-       # d2 = Brick(n0=int(NE/2),n1=NE,n2=NE,l0=0.5,order=2,useElementsOnFace=True)
-       # ContinuousFunction(d2).setTags(2,Scalar(1,ContinuousFunction(d2)))
-       # d2.setX(d2.getX()+[0.5,0.,0.])
-       # self.domain = JoinFaces([d1,d2],optimize=False)
-       self.domain=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do2_Contact_withElementsOnFace.fly"))
+       self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"mesh_3Do2_Contact_withElementsOnFace.fly"))
    def tearDown(self):
         del self.domain
 
diff --git a/finley/test/python/run_linearPDEsOnFinleyMacro.py b/finley/test/python/run_linearPDEsOnFinleyMacro.py
index ecd6b80..7e673e7 100644
--- a/finley/test/python/run_linearPDEsOnFinleyMacro.py
+++ b/finley/test/python/run_linearPDEsOnFinleyMacro.py
@@ -39,14 +39,10 @@ import os
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson,Test_LinearPDE, Test_LinearPDE_noLumping, Test_TransportPDE
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_2Do2, Test_assemblage_3Do1, Test_assemblage_3Do2, \
-                            Test_assemblage_2Do1_Contact,Test_assemblage_2Do2_Contact, Test_assemblage_3Do1_Contact, Test_assemblage_3Do2_Contact
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces, ReadMesh
-import sys
-
+from test_linearPDEs import Test_LinearPDE, Test_TransportPDE
+from test_assemblage import Test_assemblage_2Do1, Test_assemblage_3Do1
+from test_pdetools import Test_pdetools
+from esys.finley import Rectangle, Brick, ReadMesh
 
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
@@ -57,7 +53,34 @@ FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
 
 NE=3
 
-class Test_LinearPDEOnFinleyHex2DMacro(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_AssemblageOnFinleyHex2DMacro(Test_assemblage_2Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyHex2DMacro(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyHex2DMacro(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Rectangle(NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyHex2DMacro(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -66,7 +89,34 @@ class Test_LinearPDEOnFinleyHex2DMacro(Test_LinearPDE,Test_pdetools,Test_assembl
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyHex3DMacro(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_AssemblageOnFinleyHex3DMacro(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyHex3DMacro(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyHex3DMacro(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = Brick(NE,NE,NE,-1,useElementsOnFace=0)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyHex3DMacro(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
@@ -75,17 +125,70 @@ class Test_LinearPDEOnFinleyHex3DMacro(Test_LinearPDE,Test_pdetools,Test_assembl
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyTet2DMacro(Test_LinearPDE,Test_pdetools,Test_assemblage_2Do1, Test_TransportPDE):
+class Test_AssemblageOnFinleyTet2DMacro(Test_assemblage_2Do1):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
         self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_macro.fly"),optimize=False)
-        # self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"../rec.fly"),optimize=False)
         self.order = 1
    def tearDown(self):
         del self.domain
 
-class Test_LinearPDEOnFinleyTet3DMacro(Test_LinearPDE,Test_pdetools,Test_assemblage_3Do1, Test_TransportPDE):
+class Test_LinearPDEOnFinleyTet2DMacro(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyTet2DMacro(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet2DMacro(Test_TransportPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_2D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_AssemblageOnFinleyTet3DMacro(Test_assemblage_3Do1):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnFinleyTet3DMacro(Test_LinearPDE):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_PDEToolsOnFinleyTet3DMacro(Test_pdetools):
+   RES_TOL=1.e-7
+   ABS_TOL=1.e-8
+   def setUp(self):
+        self.domain = ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"tet_3D_macro.fly"),optimize=False)
+        self.order = 1
+   def tearDown(self):
+        del self.domain
+
+class Test_TransportPDEOnFinleyTet3DMacro(Test_TransportPDE):
    RES_TOL=1.e-7
    ABS_TOL=1.e-8
    def setUp(self):
diff --git a/finley/test/python/run_models.py b/finley/test/python/run_models.py
index 1b25456..65cad11 100644
--- a/finley/test/python/run_models.py
+++ b/finley/test/python/run_models.py
@@ -25,24 +25,17 @@ __url__="https://launchpad.net/escript-finley"
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-      
+
 VERBOSE = False
 
 from esys.escript import *
-from esys.escript.models import StokesProblemCartesian, PowerLaw, IncompressibleIsotropicFlowCartesian, FaultSystem
-from esys.escript.models import Mountains
+from esys.escript.models import StokesProblemCartesian, PowerLaw, IncompressibleIsotropicFlowCartesian, FaultSystem, Mountains
 from esys.finley import Rectangle, Brick
 
 from math import pi
-import numpy, os, sys, tempfile
-#======================================================================
-try:
-     FINLEY_WORKDIR=os.environ['FINLEY_WORKDIR']
-except KeyError:
-     FINLEY_WORKDIR='.'
-
-#======================================================================
-class Test_StokesProblemCartesian2D(unittest.TestCase):
+import numpy
+
+class Test_StokesProblemCartesianOnFinley2D(unittest.TestCase):
    def setUp(self):
        NE=6
        self.TOL=1e-3
@@ -59,15 +52,15 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0*mask,p0,verbose=VERBOSE,max_iter=100,usePCG=True)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])/0.25
        error_p=Lsup(p+P1*x[0]*x[1])
@@ -85,9 +78,9 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
@@ -110,16 +103,16 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=100,usePCG=True)
        # u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=100,usePCG=True)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])/0.25
        error_p=Lsup(P1*x[0]*x[1]+p)/P1
@@ -137,15 +130,15 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=50,usePCG=False,iter_restart=18)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])/0.25
        error_p=Lsup(P1*x[0]*x[1]+p)
@@ -163,15 +156,15 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=20,usePCG=False)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])/0.25
        error_p=Lsup(P1*x[0]*x[1]+p)
@@ -190,23 +183,23 @@ class Test_StokesProblemCartesian2D(unittest.TestCase):
               +whereZero(x[0]-1)  * [1.,1.] \
               +whereZero(x[1])    * [1.,0.] \
               +whereZero(x[1]-1)  * [1.,1.]
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*[0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=100,usePCG=False)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])/0.25
        error_p=Lsup(P1*x[0]*x[1]+p)/P1
        self.assertTrue(error_p<10*self.TOL, "pressure error too large.")
        self.assertTrue(error_v0<10*self.TOL, "0-velocity error too large.")
        self.assertTrue(error_v1<10*self.TOL, "1-velocity error too large.")
-#======================================================================
-class Test_StokesProblemCartesian3D(unittest.TestCase):
+
+class Test_StokesProblemCartesianOnFinley3D(unittest.TestCase):
    def setUp(self):
        NE=6
        self.TOL=1e-4
@@ -226,16 +219,15 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE ,max_iter=100,usePCG=True)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])
        error_v2=Lsup(u[2]-u0[2])/0.25**2
@@ -257,10 +249,9 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
@@ -287,20 +278,19 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE ,max_iter=100,usePCG=True)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])
        error_v2=Lsup(u[2]-u0[2])/0.25**2
-       error_p=Lsup(P1*x[0]*x[1]*x[2]+p)/P1 
+       error_p=Lsup(P1*x[0]*x[1]*x[2]+p)/P1
        self.assertTrue(error_p<10*self.TOL, "pressure error too large.")
        self.assertTrue(error_v0<10*self.TOL, "0-velocity error too large.")
        self.assertTrue(error_v1<10*self.TOL, "1-velocity error too large.")
@@ -319,16 +309,15 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=100,usePCG=False,iter_restart=20)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])
        error_v2=Lsup(u[2]-u0[2])/0.25**2
@@ -349,16 +338,15 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL/10)
        u,p=sp.solve(u0,p0, verbose=VERBOSE,max_iter=100,usePCG=False)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])
        error_v2=Lsup(u[2]-u0[2])/0.25**2
@@ -379,16 +367,15 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
               +whereZero(x[1]-1)  * [1.,1.,1.] \
               +whereZero(x[2])    * [1.,1.,0.] \
               +whereZero(x[2]-1)  * [1.,1.,1.]
-       
-       
+
        sp=StokesProblemCartesian(self.domain)
-       
+
        sp.initialize(f=F,fixed_u_mask=mask,eta=ETA)
        u0=(1-x[0])*x[0]*(1-x[1])*x[1]*[0.,0.,1.]
        p0=Scalar(-P1,ReducedSolution(self.domain))
        sp.setTolerance(self.TOL)
        u,p=sp.solve(u0,p0, verbose=VERBOSE ,max_iter=100,usePCG=False)
-       
+
        error_v0=Lsup(u[0]-u0[0])
        error_v1=Lsup(u[1]-u0[1])
        error_v2=Lsup(u[2]-u0[2])/0.25**2
@@ -397,9 +384,8 @@ class Test_StokesProblemCartesian3D(unittest.TestCase):
        self.assertTrue(error_v0<10*self.TOL, "0-velocity error too large.")
        self.assertTrue(error_v1<10*self.TOL, "1-velocity error too large.")
        self.assertTrue(error_v2<10*self.TOL, "2-velocity error too large.")
-#======================================================================
 
-class Test_Mountains3D(unittest.TestCase):
+class Test_MountainsOnFinley3D(unittest.TestCase):
    def setUp(self):
        NE=16
        self.TOL=1e-4
@@ -424,11 +410,11 @@ class Test_Mountains3D(unittest.TestCase):
        mts=Mountains(self.domain,eps=EPS)
        mts.setVelocity(v)
        Z=mts.update()
-       
+
        error_int=abs(integrate(Z*whereZero(FunctionOnBoundary(self.domain).getX()[2]-1.)))
        self.assertTrue(error_int<self.TOL, "Boundary intergral is too large.")
 
-class Test_Mountains2D(unittest.TestCase):
+class Test_MountainsOnFinley2D(unittest.TestCase):
    def setUp(self):
        NE=16
        self.TOL=1e-4
@@ -446,18 +432,16 @@ class Test_Mountains2D(unittest.TestCase):
        a1=-(a0*n0)/n1
        v[0]=a0*sin(pi*n0*x[0])* cos(pi*n1*x[1])
        v[1]=a1*cos(pi*n0*x[0])* sin(pi*n1*x[1])
-       
+
        H_t=Scalar(0.0, Solution(self.domain))
        mts=Mountains(self.domain,eps=EPS)
        mts.setVelocity(v)
        Z=mts.update()
-       
+
        error_int=abs(integrate(Z*whereZero(FunctionOnBoundary(self.domain).getX()[1]-1.)))
        self.assertTrue(error_int<self.TOL, "Boundary intergral is too large.")
-       
-
 
-class Test_Rheologies(unittest.TestCase):
+class Test_RheologiesOnFinley(unittest.TestCase):
      """
      this is the program used to generate the powerlaw tests:
 
@@ -541,7 +525,7 @@ class Test_Rheologies(unittest.TestCase):
          self.assertTrue(eta>=0,"eta needs to be positive (test %s)"%id)
          error=abs(gamma_dot_*eta-tau_ref)
          self.assertTrue(error<=self.TOL*tau_ref,"eta is wrong: error = gamma_dot_*eta-tau_ref = %s * %s - %s = %s (test %s)"%(gamma_dot_,eta,tau_ref,error,id))
-        
+
      def test_PowerLaw_Linear(self):
          taus= [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
          gamma_dot_s=[0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, 75.0]
@@ -550,7 +534,7 @@ class Test_Rheologies(unittest.TestCase):
          pl.setPowerLaw(eta_N=2.)
          pl.setEtaTolerance(self.TOL)
          for i in range(len(taus)): self.checkResult(i,gamma_dot_s[i], pl.getEtaEff(gamma_dot_s[i]),taus[i])
-        
+
      def test_PowerLaw_QuadLarge(self):
          taus=[0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
          gamma_dot_s=[0.0, 405.0, 1610.0, 3615.0, 6420.0, 10025.0, 14430.0, 19635.0, 25640.0, 32445.0, 40050.0, 44055.0, 48060.0, 52065.0, 56070.0, 60075.0]
@@ -637,12 +621,12 @@ class Test_Rheologies(unittest.TestCase):
          self.assertRaises(ValueError, pl.getEtaEff,gamma_dot_s[0])
          for i in range(len(taus)): self.checkResult(i,gamma_dot_s[i], pl.getEtaEff(gamma_dot_s[i],dt=dt),taus[i])
 
-class Test_IncompressibleIsotropicFlowCartesian(unittest.TestCase):
+class Test_IncompressibleIsotropicFlowCartesianOnFinley(unittest.TestCase):
    TOL=1.e-5
    VERBOSE=False # or True
    A=1.
    P_max=100
-   NE=2*getMPISizeWorld()
+   NE=min(24, 2*getMPISizeWorld())
    tau_Y=10.
    N_dt=10
 
@@ -654,8 +638,8 @@ class Test_IncompressibleIsotropicFlowCartesian(unittest.TestCase):
    eta_2=400.
    N_1=2.
    N_2=3.
-   def getReference(self, t):
 
+   def getReference(self, t):
       B=self.tau_Y/sqrt((self.dom.getDim()-1)*self.dom.getDim()*0.5)
       x=self.dom.getX()
 
@@ -708,7 +692,7 @@ class Test_IncompressibleIsotropicFlowCartesian(unittest.TestCase):
             else:
                v_mask[d]=whereZero(x[d])
       mod.setExternals(F=BF,fixed_v_mask=v_mask)
-       
+
       n=self.dom.getNormal()
       N_t=0
       errors=[]
@@ -734,6 +718,7 @@ class Test_IncompressibleIsotropicFlowCartesian(unittest.TestCase):
          self.assertTrue( error_v <= 10*self.TOL, "time step %s: velocity error %s too high."%(N_t,error_v) )
          self.assertTrue( error_t <= 10*self.TOL, "time step %s: time marker error %s too high."%(N_t,error_t) )
          self.assertTrue( error_s <= 10*self.TOL, "time step %s: stress error %s too high."%(N_t,error_s) )
+
    def tearDown(self):
         del self.dom
 
@@ -819,8 +804,7 @@ class Test_IncompressibleIsotropicFlowCartesian(unittest.TestCase):
        self.latestart=False
        self.runIt(free=0)
 
-
-class Test_FaultSystem(unittest.TestCase):
+class Test_FaultSystemOnFinley(unittest.TestCase):
    EPS=1.e-8
    NE=10
    def test_Fault_MaxValue(self):
@@ -912,7 +896,6 @@ class Test_FaultSystem(unittest.TestCase):
       self.assertTrue(  t == 1, "wrong min tag")
       self.assertTrue(  abs(l-0.70710678118654) <= self.EPS,  "wrong min location")
 
-      
    def test_Fault2D(self):
       f=FaultSystem(dim=2)
       top1=[ [1.,0.], [1.,1.], [0.,1.] ]
@@ -1251,7 +1234,7 @@ class Test_FaultSystem(unittest.TestCase):
       self.assertTrue( isinstance(sn[1], numpy.ndarray), "wrong class of bottom vertex 0")
       self.assertTrue( numpy.linalg.norm(sn[1]-[0.,0.70710678118654746,0.70710678118654746]) < self.EPS, "wrong bottom vertex 1 ")
       dv=f.getDepthVectors(1)
-      self.assertTrue( len(dv) == 3, "wrong number of depth vectors.") 
+      self.assertTrue( len(dv) == 3, "wrong number of depth vectors.")
       self.assertTrue( isinstance(dv[0], numpy.ndarray), "wrong class of depth vector 0")
       self.assertTrue( numpy.linalg.norm(dv[0]-[14.142135623730951, 0., -14.142135623730951]) < self.EPS, "wrong depth vector 0 ")
       self.assertTrue( isinstance(dv[1], numpy.ndarray), "wrong class of depth vector 1")
@@ -1423,7 +1406,6 @@ class Test_FaultSystem(unittest.TestCase):
       self.assertTrue( s<0, "wrong side.")
       self.assertTrue( abs(d-1.)<self.EPS, "wrong distance.")
 
-    
       s,d=f.getSideAndDistance([0.,0.,0.], tag=2)
       self.assertTrue( s<0, "wrong side.")
       self.assertTrue( abs(d-10.)<self.EPS, "wrong distance.")
diff --git a/finley/test/python/run_nlpde3dOnFinley.py b/finley/test/python/run_nonlinearPDEsOnFinley.py
similarity index 79%
rename from finley/test/python/run_nlpde3dOnFinley.py
rename to finley/test/python/run_nonlinearPDEsOnFinley.py
index 8cac15e..ddd5cb6 100644
--- a/finley/test/python/run_nlpde3dOnFinley.py
+++ b/finley/test/python/run_nonlinearPDEsOnFinley.py
@@ -35,17 +35,18 @@ Test suite for the linearPDE  and pdetools test on finley
 
 __author__="Lutz Gross, l.gross at uq.edu.au"
 
-import os
-
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_nonLinearPDE import Test_nonLinearPDEs, Test_nlpde
-from esys.escript import *
-from esys.finley import Rectangle,Brick
-import sys
+from test_nonLinearPDE import Test_nlpde
+from esys.finley import Rectangle, Brick
 
+class Test_nonLinearPDEOnFinley2D(Test_nlpde):
+   def setUp(self):
+        self.domain = Rectangle(l0=1.,l1=1.,n0=10, n1=10) 
+   def tearDown(self):
+        del self.domain
 
-class Test_nonLinearPDE(Test_nlpde):
+class Test_nonLinearPDEOnFinley3D(Test_nlpde):
    def setUp(self):
         self.domain = Brick(l0=1.,l1=1.,l2=1.,n0=10, n1=10,n2=10) 
    def tearDown(self):
@@ -53,3 +54,4 @@ class Test_nonLinearPDE(Test_nlpde):
 
 if __name__ == '__main__':
     run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_pasoSolversOnFinley.py b/finley/test/python/run_pasoSolversOnFinley.py
new file mode 100644
index 0000000..af6d2cc
--- /dev/null
+++ b/finley/test/python/run_pasoSolversOnFinley.py
@@ -0,0 +1,638 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on finley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import Data, Solution, Vector, hasFeature
+from esys.finley import Rectangle, Brick
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_PASO = hasFeature('paso')
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+OPTIMIZE=True
+
+ at unittest.skipIf(not HAVE_PASO, "PASO not available")
+class SimpleSolveOnPaso(SimpleSolveTestCase):
+    pass
+
+class SimpleSolveOrder2(SimpleSolveOnPaso):
+    def _getGrad(self, system):
+        """returns exact gradient"""
+        dim = self.domain.getDim()
+        x = Solution(self.domain).getX()
+        if system:
+            g_ex = Data(0., (dim,dim), Solution(self.domain))
+            if dim == 2:
+                g_ex[0,0] = 2.+8.*x[0]+ 5.*x[1]
+                g_ex[0,1] = 3.+5.*x[0]+12.*x[1]
+                g_ex[1,0] = 4.+2.*x[0]+ 6.*x[1]
+                g_ex[1,1] = 2.+6.*x[0]+ 8.*x[1]
+            else:
+                g_ex[0,0] =  2.+6.*x[1]+8.*x[2]+18.*x[0]
+                g_ex[0,1] =  3.+6.*x[0]+7.*x[2]+20.*x[1]
+                g_ex[0,2] =  4.+7.*x[1]+8.*x[0]+22.*x[2]
+                g_ex[1,0] =  4.+3.*x[1]-8.*x[2]- 4.*x[0]
+                g_ex[1,1] =  1.+3.*x[0]+2.*x[2]+14.*x[1]
+                g_ex[1,2] = -6.+2.*x[1]-8.*x[0]+10.*x[2]
+                g_ex[2,0] =  7.-6.*x[1]+2.*x[2]+ 4.*x[0]
+                g_ex[2,1] =  9.-6.*x[0]+8.*x[2]+16.*x[1]
+                g_ex[2,2] =  2.+8.*x[1]+2.*x[0]+ 2.*x[2]
+        else:
+            g_ex = Data(0., (dim,), Solution(self.domain))
+            if dim == 2:
+                g_ex[0] = 2.+8.*x[0]+5.*x[1]
+                g_ex[1] = 3.+5.*x[0]+12.*x[1]
+            else:
+                g_ex[0] = 2.+6.*x[1]+8.*x[2]+18.*x[0]
+                g_ex[1] = 3.+6.*x[0]+7.*x[2]+20.*x[1]
+                g_ex[2] = 4.+7.*x[1]+8.*x[0]+22.*x[2]
+        return g_ex
+
+    def _getSolution(self, system):
+        """returns exact solution"""
+        dim = self.domain.getDim()
+        x = Solution(self.domain).getX()
+        if system:
+            u_ex = Vector(0., Solution(self.domain))
+            if dim == 2:
+                u_ex[0] =  1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
+                u_ex[1] = -1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
+            else:
+                u_ex[0] = 1.+2.*x[0]+3.*x[1]+4.*x[2]+\
+                          6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+\
+                          9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
+                u_ex[1] = 2.+4.*x[0]+1.*x[1]-6.*x[2]+\
+                          3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-\
+                          2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
+                u_ex[2] = -2.+7.*x[0]+9.*x[1]+2*x[2]-\
+                          6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+\
+                          2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
+        else:
+            if dim == 2:
+                u_ex = 1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
+            else:
+                u_ex = 1.+2.*x[0]+3.*x[1]+4.*x[2]+\
+                       6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+\
+                       9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
+        return u_ex
+
+    def _setCoefficients(self, pde, system):
+        """sets PDE coefficients"""
+        super(SimpleSolveOrder2, self)._setCoefficients(pde, system)
+        dim = self.domain.getDim()
+        if system:
+            Y = pde.getCoefficient("Y")
+            if dim == 2:
+                Y[0] = Y[0]-20.
+                Y[1] = Y[1]-10.
+            else:
+                Y[0] = Y[0]-60.
+                Y[1] = Y[1]-20.
+                Y[2] = Y[2]-22.
+            pde.setValue(Y=Y)
+        else:
+            if dim == 2:
+                pde.setValue(Y=-20.)
+            else:
+                pde.setValue(Y=-60.)
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_BICGSTAB_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_BICGSTAB_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_PCG_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_PCG_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_TFQMR_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_TFQMR_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_TFQMR_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_TFQMR_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_MINRES_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_MINRES_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_BICGSTAB_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_BICGSTAB_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_BICGSTAB_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_BICGSTAB_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_PCG_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_PCG_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_PCG_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_PCG_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_TFQMR_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_TFQMR_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_TFQMR_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_TFQMR_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_MINRES_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_MINRES_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_MINRES_GaussSeidel(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_MINRES_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_BICGSTAB_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_BICGSTAB_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_BICGSTAB_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_BICGSTAB_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_PCG_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_PCG_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_PCG_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_PCG_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_TFQMR_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_TFQMR_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Paso_MINRES_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Paso_MINRES_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Paso_MINRES_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Paso_MINRES_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+    run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_simplesolve.py b/finley/test/python/run_simplesolve.py
deleted file mode 100644
index 4638771..0000000
--- a/finley/test/python/run_simplesolve.py
+++ /dev/null
@@ -1,2501 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE  and pdetools test on finley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import esys.escriptcore.utestselect as unittest, sys
-from esys.escriptcore.testing import *
-
-from esys.escript import *
-from esys.finley import Rectangle,Brick
-from esys.escript.linearPDEs import LinearPDE, SolverOptions
-import numpy
-OPTIMIZE=True
-SOLVER_VERBOSE=False 
-# setNumberOfThreads(2)
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-
-# number of elements in the spatial directions
-NE0=8
-NE1=10
-NE2=12
-
-NE0=12
-NE1=12
-NE2=8
-
-SOLVER_TOL=1.e-8
-REL_TOL=1.e-6
-
-FAC_DIAG=1.
-FAC_OFFDIAG=-0.4
-
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        # Tell about how many MPI CPUs and OpenMP threads
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_TFQMR_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        # Tell about how many MPI CPUs and OpenMP threads
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_PCG_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_TFQMR_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.TFQMR)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_MINRES_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order1_SinglePDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1, optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SinglePDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,l0=1.,l1=1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.+8.*x[0]+5.*x[1]
-        g_ex[1]=3.+5.*x[0]+12.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()),Y=-20.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Rectangle_Order1_SystemPDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Rectangle_Order2_SystemPDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(NE0,NE1,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
-        u_ex[1]=-1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.+8.*x[0]+5.*x[1]
-        g_ex[0,1]=3.+5.*x[0]+12.*x[1]
-        g_ex[1,0]=4.+2.*x[0]+6.*x[1]
-        g_ex[1,1]=2.+6.*x[0]+8.*x[1]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y-[20.,10.],
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolve_Brick_Order1_SinglePDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order1_SystemPDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,1,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SinglePDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()),Y=-60.)
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-class Test_SimpleSolve_Brick_Order2_SystemPDE_Paso_BICGSTAB_GaussSeidel(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(NE0,NE1,NE2,2,optimize=OPTIMIZE)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]+6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
-        u_ex[1]=2.+4.*x[0]+1.*x[1]-6.*x[2]+3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
-        u_ex[2]=-2.+7.*x[0]+9.*x[1]+2*x[2]-6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.+6.*x[1]+8.*x[2]+18.*x[0]
-        g_ex[0,1]=3.+6.*x[0]+7.*x[2]+20.*x[1]
-        g_ex[0,2]=4.+7.*x[1]+8.*x[0]+22.*x[2]
-        g_ex[1,0]=4.+3.*x[1]-8.*x[2]-4.*x[0]
-        g_ex[1,1]=1+3.*x[0]+2.*x[2]+14.*x[1]
-        g_ex[1,2]=-6.+2.*x[1]-8.*x[0]+10.*x[2]
-        g_ex[2,0]=7.-6.*x[1]+2.*x[2]+4.*x[0]
-        g_ex[2,1]=9.-6.*x[0]+8.*x[2]+16.*x[1]
-        g_ex[2,2]=2+8.*x[1]+2.*x[0]+2.*x[2]
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y-numpy.array([60.,20.,22.]),
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.GAUSS_SEIDEL)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
diff --git a/finley/test/python/run_splitworldOnFinley.py b/finley/test/python/run_splitworldOnFinley.py
index 17851ff..698419a 100644
--- a/finley/test/python/run_splitworldOnFinley.py
+++ b/finley/test/python/run_splitworldOnFinley.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -16,7 +16,7 @@
 
 from __future__ import print_function, division
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
diff --git a/finley/test/python/run_trilinosSolversOnFinley.py b/finley/test/python/run_trilinosSolversOnFinley.py
new file mode 100644
index 0000000..0a34eae
--- /dev/null
+++ b/finley/test/python/run_trilinosSolversOnFinley.py
@@ -0,0 +1,690 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Open Software License version 3.0
+http://www.opensource.org/licenses/osl-3.0.php"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on finley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import Data, Solution, Vector, hasFeature
+from esys.finley import Rectangle, Brick
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_TRILINOS = hasFeature('trilinos')
+skip_muelu_long = False #hasFeature("longindex")
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+OPTIMIZE=True
+
+ at unittest.skipIf(not HAVE_TRILINOS, "Trilinos not available")
+class SimpleSolveOnTrilinos(SimpleSolveTestCase):
+    pass
+
+class SimpleSolveOrder2(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def _getGrad(self, system):
+        """returns exact gradient"""
+        dim = self.domain.getDim()
+        x = Solution(self.domain).getX()
+        if system:
+            g_ex = Data(0., (dim,dim), Solution(self.domain))
+            if dim == 2:
+                g_ex[0,0] = 2.+8.*x[0]+ 5.*x[1]
+                g_ex[0,1] = 3.+5.*x[0]+12.*x[1]
+                g_ex[1,0] = 4.+2.*x[0]+ 6.*x[1]
+                g_ex[1,1] = 2.+6.*x[0]+ 8.*x[1]
+            else:
+                g_ex[0,0] =  2.+6.*x[1]+8.*x[2]+18.*x[0]
+                g_ex[0,1] =  3.+6.*x[0]+7.*x[2]+20.*x[1]
+                g_ex[0,2] =  4.+7.*x[1]+8.*x[0]+22.*x[2]
+                g_ex[1,0] =  4.+3.*x[1]-8.*x[2]- 4.*x[0]
+                g_ex[1,1] =  1.+3.*x[0]+2.*x[2]+14.*x[1]
+                g_ex[1,2] = -6.+2.*x[1]-8.*x[0]+10.*x[2]
+                g_ex[2,0] =  7.-6.*x[1]+2.*x[2]+ 4.*x[0]
+                g_ex[2,1] =  9.-6.*x[0]+8.*x[2]+16.*x[1]
+                g_ex[2,2] =  2.+8.*x[1]+2.*x[0]+ 2.*x[2]
+        else:
+            g_ex = Data(0., (dim,), Solution(self.domain))
+            if dim == 2:
+                g_ex[0] = 2.+8.*x[0]+5.*x[1]
+                g_ex[1] = 3.+5.*x[0]+12.*x[1]
+            else:
+                g_ex[0] = 2.+6.*x[1]+8.*x[2]+18.*x[0]
+                g_ex[1] = 3.+6.*x[0]+7.*x[2]+20.*x[1]
+                g_ex[2] = 4.+7.*x[1]+8.*x[0]+22.*x[2]
+        return g_ex
+
+    def _getSolution(self, system):
+        """returns exact solution"""
+        dim = self.domain.getDim()
+        x = Solution(self.domain).getX()
+        if system:
+            u_ex = Vector(0., Solution(self.domain))
+            if dim == 2:
+                u_ex[0] =  1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
+                u_ex[1] = -1.+4.*x[0]+2.*x[1]+1.*x[0]**2+6.*x[1]*x[0]+4.*x[1]**2
+            else:
+                u_ex[0] = 1.+2.*x[0]+3.*x[1]+4.*x[2]+\
+                          6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+\
+                          9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
+                u_ex[1] = 2.+4.*x[0]+1.*x[1]-6.*x[2]+\
+                          3.*x[0]*x[1]+2.*x[1]*x[2]-8.*x[2]*x[0]-\
+                          2.*x[0]**2+7.*x[1]**2+5.*x[2]**2
+                u_ex[2] = -2.+7.*x[0]+9.*x[1]+2*x[2]-\
+                          6.*x[0]*x[1]+8.*x[1]*x[2]+2.*x[2]*x[0]+\
+                          2.*x[0]**2+8.*x[1]**2+1.*x[2]**2
+        else:
+            if dim == 2:
+                u_ex = 1.+2.*x[0]+3.*x[1]+4.*x[0]**2+5.*x[1]*x[0]+6.*x[1]**2
+            else:
+                u_ex = 1.+2.*x[0]+3.*x[1]+4.*x[2]+\
+                       6.*x[0]*x[1]+7.*x[1]*x[2]+8.*x[2]*x[0]+\
+                       9.*x[0]**2+10.*x[1]**2+11.*x[2]**2
+        return u_ex
+
+    def _setCoefficients(self, pde, system):
+        """sets PDE coefficients"""
+        super(SimpleSolveOrder2, self)._setCoefficients(pde, system)
+        dim = self.domain.getDim()
+        if system:
+            Y = pde.getCoefficient("Y")
+            if dim == 2:
+                Y[0] = Y[0]-20.
+                Y[1] = Y[1]-10.
+            else:
+                Y[0] = Y[0]-60.
+                Y[1] = Y[1]-20.
+                Y[2] = Y[2]-22.
+            pde.setValue(Y=Y)
+        else:
+            if dim == 2:
+                pde.setValue(Y=-20.)
+            else:
+                pde.setValue(Y=-60.)
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_BICGSTAB_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_BICGSTAB_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_PCG_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_PCG_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_TFQMR_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_TFQMR_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_TFQMR_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_TFQMR_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_MINRES_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_MINRES_Jacobi(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_BICGSTAB_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_PCG_AMG(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_PCG_AMG(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + Gauss-Seidel
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_TFQMR_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_TFQMR_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_TFQMR_GaussSeidel(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_TFQMR_GaussSeidel(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.GAUSS_SEIDEL
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_MINRES_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_MINRES_AMG(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_MINRES_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_MINRES_AMG(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### BiCGStab + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_BICGSTAB_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_BICGSTAB_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_BICGSTAB_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_BICGSTAB_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_PCG_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_PCG_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_PCG_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_PCG_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_TFQMR_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_TFQMR_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + RILU
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_MINRES_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_MINRES_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_MINRES_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_MINRES_RILU(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + ILUT
+
+class Test_SimpleSolveFinleyRect_Order1_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyRect_Order2_Trilinos_PCG_ILUT(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Rectangle(NE0, NE1, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order1_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 1, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveFinleyBrick_Order2_Trilinos_PCG_ILUT(SimpleSolveOrder2):
+    def setUp(self):
+        self.domain = Brick(NE0, NE1, NE2, 2, optimize=OPTIMIZE)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+    run_tests(__name__, exit_on_failure=True)
+
diff --git a/finley/test/python/run_utilOnFinley.py b/finley/test/python/run_utilOnFinley.py
index eabaa64..0e76a48 100644
--- a/finley/test/python/run_utilOnFinley.py
+++ b/finley/test/python/run_utilOnFinley.py
@@ -25,22 +25,21 @@ __url__="https://launchpad.net/escript-finley"
 
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
-from test_util import Test_util as Test_util
-from test_util import Test_Util_SpatialFunctions, Test_Util_SpatialFunctions_noGradOnBoundary, Test_Util_SpatialFunctions_noGradOnBoundary_noContact
+from test_util import Test_util, Test_Util_SpatialFunctions, \
+        Test_Util_SpatialFunctions_noGradOnBoundary, \
+        Test_Util_SpatialFunctions_noGradOnBoundary_noContact
+from test_util_NaN_funcs import Test_util_NaN_funcs
 
-from esys.escript import *
-from esys.finley import Rectangle,Brick,JoinFaces,ReadMesh
-import sys
+from esys.escript import FunctionOnBoundary, getMPISizeWorld, HAVE_SYMBOLS
+from esys.finley import Rectangle, Brick, JoinFaces, ReadMesh
 import os
 
 if HAVE_SYMBOLS:
     from test_symfuncs import Test_symfuncs
 else:
-    print("Skipping symbolic tests since sympy is not available")
+    @unittest.skip("Skipping symbolic tests since sympy is not available")
     class Test_symfuncs:
         pass
-from test_util_NaN_funcs import Test_util_NaN_funcs
-
 
 try:
      FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
@@ -49,17 +48,41 @@ except KeyError:
 
 FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
 
-FINLEY_MERGE_ERROR = "FinleyAdapterException: Mesh_merge: more than 1 processor is not supported yet."
+FINLEY_MERGE_ERROR = "merge: more than 1 processor is not supported yet."
 
 NE=4 # number elements, must be even
 
-class Test_UtilOnFinley(Test_util,Test_symfuncs,Test_util_NaN_funcs):
+class Test_UtilOnFinley(Test_util):
+   def setUp(self):
+       try:
+           self.workdir=os.environ['FINLEY_WORKDIR']
+       except KeyError:
+           self.workdir='.'
+       self.domain = Rectangle(NE, NE+1, 2)
+       self.functionspace = FunctionOnBoundary(self.domain) # due to a bug in escript python needs to hold a reference to the domain
+   def tearDown(self):
+       del self.functionspace
+       del self.domain
+
+class Test_SymFuncsOnFinley(Test_symfuncs,Test_util_NaN_funcs):
+   def setUp(self):
+       try:
+           self.workdir=os.environ['FINLEY_WORKDIR']
+       except KeyError:
+           self.workdir='.'
+       self.domain = Rectangle(NE, NE+1, 2)
+       self.functionspace = FunctionOnBoundary(self.domain) # due to a bug in escript python needs to hold a reference to the domain
+   def tearDown(self):
+       del self.functionspace
+       del self.domain
+
+class Test_NaNFuncsOnFinley(Test_util_NaN_funcs):
    def setUp(self):
        try:
            self.workdir=os.environ['FINLEY_WORKDIR']
        except KeyError:
            self.workdir='.'
-       self.domain =Rectangle(NE,NE+1,2)
+       self.domain = Rectangle(NE, NE+1, 2)
        self.functionspace = FunctionOnBoundary(self.domain) # due to a bug in escript python needs to hold a reference to the domain
    def tearDown(self):
        del self.functionspace
@@ -168,7 +191,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex2DOrder1withContact(Test_Util_Spatial
         d2 = Rectangle(n0=NE//2,n1=NE,l0=0.5,order=1,useElementsOnFace=0)
         d2.setX(d2.getX()+[0.5,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -187,7 +210,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex2DOrder2withContact(Test_Util_Spatial
         d2 = Rectangle(n0=NE//2,n1=NE,l0=0.5,order=2,useElementsOnFace=0)
         d2.setX(d2.getX()+[0.5,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -206,7 +229,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex3DOrder1withContact(Test_Util_Spatial
         d2 = Brick(n0=NE//2,n1=NE,n2=NE,l0=0.5,order=1,useElementsOnFace=0)
         d2.setX(d2.getX()+[0.5,0.,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -225,7 +248,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex3DOrder2withContact(Test_Util_Spatial
         d2 = Brick(n0=NE//2,n1=NE,n2=NE,l0=0.5,order=2,useElementsOnFace=0)
         d2.setX(d2.getX()+[0.5,0.,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -244,7 +267,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex2DOrder1useElementsOnFacewithContact(
         d2 = Rectangle(n0=NE//2,n1=NE,l0=0.5,order=1,useElementsOnFace=True)
         d2.setX(d2.getX()+[0.5,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -263,7 +286,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex2DOrder2useElementsOnFacewithContact(
         d2 = Rectangle(n0=NE//2,n1=NE,l0=0.5,order=2,useElementsOnFace=True)
         d2.setX(d2.getX()+[0.5,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -282,7 +305,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex3DOrder1useElementsOnFacewithContact(
         d2 = Brick(n0=NE//2+1,n1=NE,n2=NE,l0=0.5,order=1,useElementsOnFace=True)
         d2.setX(d2.getX()+[0.5,0.,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
@@ -301,7 +324,7 @@ class Test_Util_SpatialFunctionsOnFinleyHex3DOrder2useElementsOnFacewithContact(
         d2 = Brick(n0=NE//2+1,n1=NE,n2=NE,l0=0.5,order=2,useElementsOnFace=True)
         d2.setX(d2.getX()+[0.5,0.,0.])
         if getMPISizeWorld() > 1:
-            with self.assertRaises(RuntimeError) as pkg:
+            with self.assertRaises(NotImplementedError) as pkg:
                 self.domain = JoinFaces([d1,d2],optimize=False)
             e = pkg.exception
             if FINLEY_MERGE_ERROR not in str(e):
diff --git a/finley/test/python/run_visualization_interface.py b/finley/test/python/run_visualization_interface.py
deleted file mode 100644
index 323683e..0000000
--- a/finley/test/python/run_visualization_interface.py
+++ /dev/null
@@ -1,638 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-import esys.escriptcore.utestselect as unittest
-from esys.escript import *
-from esys.finley import ReadMesh
-
-try:
-     FINLEY_TEST_DATA=os.environ['FINLEY_TEST_DATA']
-except KeyError:
-     FINLEY_TEST_DATA='.'
-
-try:
-     FINLEY_WORKDIR=os.environ['FINLEY_WORKDIR']
-except KeyError:
-     FINLEY_WORKDIR='.'
-
-FINLEY_TEST_MESH_PATH=os.path.join(FINLEY_TEST_DATA,"data_meshes")
-# if os.name == "nt":
-#    FINLEY_TEST_MESH_PATH = os.path.join(FINLEY_TEST_MESH_PATH,"win32")
-FINLEY_WORKDIR_PATH=FINLEY_WORKDIR
-
-class Test_VisualizationInterface(unittest.TestCase):
-   def check_dx(self,f,reference_f):
-      out_string=open(os.path.join(FINLEY_WORKDIR_PATH,f)).read().splitlines()
-      ref_string=open(os.path.join(FINLEY_TEST_MESH_PATH,reference_f)).read().splitlines()
-      c=0
-      for l in range(0,len(ref_string)):
-         if not ref_string[l].strip()[0]=="#":
-           line=out_string[c].strip()
-           if os.name == "nt":
-               line=line.replace("e+00","e+0").replace("e-00","e-0")
-           line=line.replace("e-00","e+00").replace("-0.000000e+00","0.000000e+00")
-           self.assertEqual(line,ref_string[l].strip(),"line %d (%s) in dx file does not match reference (%s)"%(c,line,ref_string[l].strip()))
-           c+=1
-
-class Test_DXFiles(Test_VisualizationInterface):
-  # ===========================================================================
-  def test_hex_2D_order2_dx(self):
-     reference="hex_2D_o1.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2.dx"),domain=dom)
-     self.check_dx("hex_2D_order2.dx",reference)
-
-  def test_hex_2D_order2_AllPoints_Scalar_dx(self):
-     reference="hex_2D_o1_node_3xs.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     x_r=ReducedSolution(dom).getX()
-     x_n=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_AllPoints_Scalar.dx"),data_r=x_r[0],data_n=x_n[0],data=x[0])
-     self.check_dx("hex_2D_order2_AllPoints_Scalar.dx",reference)
-  def test_hex_2D_order2_02Points_Scalar_dx(self):
-     reference="hex_2D_o1_node_2xs.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     x_n=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_O2Points_Scalar.dx"),data_n=x_n[0],data=x[0])
-     self.check_dx("hex_2D_order2_O2Points_Scalar.dx",reference)
-  def test_hex_2D_order2_2Cells_Scalar_dx(self):
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     x_b=FunctionOnBoundary(dom).getX()
-     try: 
-        saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_2Cells_Scalar.dx"),data=x[0],data_b=x_b[0])
-        self.fail("non-matching data not detected.")
-     except Exception:
-        pass
-  def test_hex_2D_order2_BoundaryPoint_Scalar_dx(self):
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     x_b=FunctionOnBoundary(dom).getX()
-     try: 
-        saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_BoundaryPoint_Scalar.dx"),data=x[0],data_b=x_b[0])
-        self.fail("non-matching data not detected.")
-     except Exception:
-        pass
-  def test_hex_2D_order2_Cells_AllData_dx(self):
-     reference="hex_2D_o1_cell_all.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_Cells_AllData.dx"),data_s=x[0],data_v=x[0]*[1.,2.],data_t=x[0]*[[11.,12.],[21.,22.]],data_t2=x[0]*[[-11.,-12.],[-21.,-22.]])
-     self.check_dx("hex_2D_order2_Cells_AllData.dx",reference)
-
-  def test_hex_2D_order2_CellsPoints_AllData_dx(self):
-     reference="hex_2D_o1_cellnode_all.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_2D_order2.msh"),optimize=False)
-     x_c=Function(dom).getX()
-     x_p=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_2D_order2_CellsPoints_AllData.dx"),data_sp=x_p[0],
-                                                     data_vp=x_p[0]*[1.,2.],
-                                                     data_tp=x_p[0]*[[11.,12.],[21.,22.]],
-                                                     data_sc=x_c[0],
-                                                     data_vc=x_c[0]*[1.,2.],
-                                                     data_tc=x_c[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_2D_order2_CellsPoints_AllData.dx",reference)
-  # ======================================================================================================================
-  def test_hex_contact_2D_order1_ContinuousFunction_Scalar_dx(self):
-     reference="hex_2D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ContinuousFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_ContinuousFunction_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_ContinuousFunction_Vector_dx(self):
-     reference="hex_2D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ContinuousFunction_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_ContinuousFunction_Vector.dx",reference)
-  def test_hex_contact_2D_order1_ContinuousFunction_Tensor_dx(self):
-     reference="hex_2D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ContinuousFunction_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_ContinuousFunction_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_Solution_Scalar_dx(self):
-     reference="hex_2D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Solution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_Solution_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_Solution_Vector_dx(self):
-     reference="hex_2D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Solution_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_Solution_Vector.dx",reference)
-  def test_hex_contact_2D_order1_Solution_Tensor_dx(self):
-     reference="hex_2D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Solution_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_Solution_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_ReducedSolution_Scalar_dx(self):
-     reference="hex_2D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedSolution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_ReducedSolution_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_ReducedSolution_Vector_dx(self):
-     reference="hex_2D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedSolution_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_ReducedSolution_Vector.dx",reference)
-  def test_hex_contact_2D_order1_ReducedSolution_Tensor_dx(self):
-     reference="hex_2D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedSolution_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_ReducedSolution_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_Function_Scalar_dx(self):
-     reference="hex_2D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Function_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_Function_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_Function_Vector_dx(self):
-     reference="hex_2D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Function_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_Function_Vector.dx",reference)
-  def test_hex_contact_2D_order1_Function_Tensor_dx(self):
-     reference="hex_2D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_Function_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_Function_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunction_Scalar_dx(self):
-     reference="hex_2D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_ReducedFunction_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunction_Vector_dx(self):
-     reference="hex_2D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunction_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_ReducedFunction_Vector.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunction_Tensor_dx(self):
-     reference="hex_2D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunction_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_ReducedFunction_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_FunctionOnBoundary_Scalar_dx(self):
-     reference="hex_2D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_FunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_FunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_FunctionOnBoundary_Vector_dx(self):
-     reference="hex_2D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_FunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_FunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_2D_order1_FunctionOnBoundary_Tensor_dx(self):
-     reference="hex_2D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_FunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_FunctionOnBoundary_Tensor.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunctionOnBoundary_Scalar_dx(self):
-     reference="hex_2D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order1_ReducedFunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunctionOnBoundary_Vector_dx(self):
-     reference="hex_2D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order1_ReducedFunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_2D_order1_ReducedFunctionOnBoundary_Tensor_dx(self):
-     reference="hex_2D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order1_ReducedFunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order1_ReducedFunctionOnBoundary_Tensor.dx",reference)
-  # ======================================================================================================================
-  def test_hex_contact_2D_order2_ContinuousFunction_Scalar_dx(self):
-     reference="hex_2D_o2_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ContinuousFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_ContinuousFunction_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_ContinuousFunction_Vector_dx(self):
-     reference="hex_2D_o2_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ContinuousFunction_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_ContinuousFunction_Vector.dx",reference)
-  def test_hex_contact_2D_order2_ContinuousFunction_Tensor_dx(self):
-     reference="hex_2D_o2_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ContinuousFunction_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_ContinuousFunction_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_Solution_Scalar_dx(self):
-     reference="hex_2D_o2_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Solution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_Solution_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_Solution_Vector_dx(self):
-     reference="hex_2D_o2_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Solution_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_Solution_Vector.dx",reference)
-  def test_hex_contact_2D_order2_Solution_Tensor_dx(self):
-     reference="hex_2D_o2_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Solution_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_Solution_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_ReducedSolution_Scalar_dx(self):
-     reference="hex_2D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedSolution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_ReducedSolution_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_ReducedSolution_Vector_dx(self):
-     reference="hex_2D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedSolution_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_ReducedSolution_Vector.dx",reference)
-  def test_hex_contact_2D_order2_ReducedSolution_Tensor_dx(self):
-     reference="hex_2D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedSolution_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_ReducedSolution_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_Function_Scalar_dx(self):
-     reference="hex_2D_o2_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Function_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_Function_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_Function_Vector_dx(self):
-     reference="hex_2D_o2_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Function_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_Function_Vector.dx",reference)
-  def test_hex_contact_2D_order2_Function_Tensor_dx(self):
-     reference="hex_2D_o2_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_Function_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_Function_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunction_Scalar_dx(self):
-     reference="hex_2D_o2_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_ReducedFunction_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunction_Vector_dx(self):
-     reference="hex_2D_o2_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunction_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_ReducedFunction_Vector.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunction_Tensor_dx(self):
-     reference="hex_2D_o2_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunction_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_ReducedFunction_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_FunctionOnBoundary_Scalar_dx(self):
-     reference="hex_2D_o2_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_FunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_FunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_FunctionOnBoundary_Vector_dx(self):
-     reference="hex_2D_o2_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_FunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_FunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_2D_order2_FunctionOnBoundary_Tensor_dx(self):
-     reference="hex_2D_o2_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_FunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_FunctionOnBoundary_Tensor.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunctionOnBoundary_Scalar_dx(self):
-     reference="hex_2D_o2_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_2D_order2_ReducedFunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunctionOnBoundary_Vector_dx(self):
-     reference="hex_2D_o2_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.])
-     self.check_dx("hex_contact_2D_order2_ReducedFunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_2D_order2_ReducedFunctionOnBoundary_Tensor_dx(self):
-     reference="hex_2D_o2_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_2D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_2D_order2_ReducedFunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.],[21.,22.]])
-     self.check_dx("hex_contact_2D_order2_ReducedFunctionOnBoundary_Tensor.dx",reference)
-  # ======================================================================================================================
-  def test_hex_contact_3D_order1_ContinuousFunction_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ContinuousFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_ContinuousFunction_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_ContinuousFunction_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ContinuousFunction_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_ContinuousFunction_Vector.dx",reference)
-  def test_hex_contact_3D_order1_ContinuousFunction_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ContinuousFunction_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_ContinuousFunction_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_Solution_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Solution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_Solution_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_Solution_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Solution_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_Solution_Vector.dx",reference)
-  def test_hex_contact_3D_order1_Solution_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Solution_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_Solution_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_ReducedSolution_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedSolution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_ReducedSolution_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_ReducedSolution_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedSolution_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_ReducedSolution_Vector.dx",reference)
-  def test_hex_contact_3D_order1_ReducedSolution_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedSolution_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_ReducedSolution_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_Function_Scalar_dx(self):
-     reference="hex_3D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Function_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_Function_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_Function_Vector_dx(self):
-     reference="hex_3D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Function_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_Function_Vector.dx",reference)
-  def test_hex_contact_3D_order1_Function_Tensor_dx(self):
-     reference="hex_3D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_Function_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_Function_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunction_Scalar_dx(self):
-     reference="hex_3D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_ReducedFunction_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunction_Vector_dx(self):
-     reference="hex_3D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunction_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_ReducedFunction_Vector.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunction_Tensor_dx(self):
-     reference="hex_3D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunction_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_ReducedFunction_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_FunctionOnBoundary_Scalar_dx(self):
-     reference="hex_3D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_FunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_FunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_FunctionOnBoundary_Vector_dx(self):
-     reference="hex_3D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_FunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_FunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_3D_order1_FunctionOnBoundary_Tensor_dx(self):
-     reference="hex_3D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_FunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_FunctionOnBoundary_Tensor.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunctionOnBoundary_Scalar_dx(self):
-     reference="hex_3D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order1_ReducedFunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunctionOnBoundary_Vector_dx(self):
-     reference="hex_3D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order1_ReducedFunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_3D_order1_ReducedFunctionOnBoundary_Tensor_dx(self):
-     reference="hex_3D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order1.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order1_ReducedFunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order1_ReducedFunctionOnBoundary_Tensor.dx",reference)
-  # ======================================================================================================================
-  def test_hex_contact_3D_order2_ContinuousFunction_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ContinuousFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_ContinuousFunction_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_ContinuousFunction_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ContinuousFunction_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_ContinuousFunction_Vector.dx",reference)
-  def test_hex_contact_3D_order2_ContinuousFunction_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ContinuousFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ContinuousFunction_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_ContinuousFunction_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_Solution_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Solution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_Solution_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_Solution_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Solution_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_Solution_Vector.dx",reference)
-  def test_hex_contact_3D_order2_Solution_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Solution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Solution_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_Solution_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_ReducedSolution_Scalar_dx(self):
-     reference="hex_3D_o1_node_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedSolution_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_ReducedSolution_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_ReducedSolution_Vector_dx(self):
-     reference="hex_3D_o1_node_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedSolution_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_ReducedSolution_Vector.dx",reference)
-  def test_hex_contact_3D_order2_ReducedSolution_Tensor_dx(self):
-     reference="hex_3D_o1_node_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedSolution(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedSolution_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_ReducedSolution_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_Function_Scalar_dx(self):
-     reference="hex_3D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Function_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_Function_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_Function_Vector_dx(self):
-     reference="hex_3D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Function_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_Function_Vector.dx",reference)
-  def test_hex_contact_3D_order2_Function_Tensor_dx(self):
-     reference="hex_3D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=Function(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_Function_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_Function_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunction_Scalar_dx(self):
-     reference="hex_3D_o1_cell_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunction_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_ReducedFunction_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunction_Vector_dx(self):
-     reference="hex_3D_o1_cell_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunction_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_ReducedFunction_Vector.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunction_Tensor_dx(self):
-     reference="hex_3D_o1_cell_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunction(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunction_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_ReducedFunction_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_FunctionOnBoundary_Scalar_dx(self):
-     reference="hex_3D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_FunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_FunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_FunctionOnBoundary_Vector_dx(self):
-     reference="hex_3D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_FunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_FunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_3D_order2_FunctionOnBoundary_Tensor_dx(self):
-     reference="hex_3D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=FunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_FunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_FunctionOnBoundary_Tensor.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunctionOnBoundary_Scalar_dx(self):
-     reference="hex_3D_o1_boundary_s.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunctionOnBoundary_Scalar.dx"),data=x[0])
-     self.check_dx("hex_contact_3D_order2_ReducedFunctionOnBoundary_Scalar.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunctionOnBoundary_Vector_dx(self):
-     reference="hex_3D_o1_boundary_v.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunctionOnBoundary_Vector.dx"),data=x[0]*[1.,2.,3.])
-     self.check_dx("hex_contact_3D_order2_ReducedFunctionOnBoundary_Vector.dx",reference)
-  def test_hex_contact_3D_order2_ReducedFunctionOnBoundary_Tensor_dx(self):
-     reference="hex_3D_o1_boundary_t.dx"
-     dom=ReadMesh(os.path.join(FINLEY_TEST_MESH_PATH,"hex_contact_3D_order2.msh"),optimize=False)
-     x=ReducedFunctionOnBoundary(dom).getX()
-     saveDX(os.path.join(FINLEY_WORKDIR_PATH,"hex_contact_3D_order2_ReducedFunctionOnBoundary_Tensor.dx"),data=x[0]*[[11.,12.,13.],[21.,22.,23],[31.,32.,33.]])
-     self.check_dx("hex_contact_3D_order2_ReducedFunctionOnBoundary_Tensor.dx",reference)
-
-if __name__ == '__main__':
-   import sys
-   suite = unittest.TestSuite()
-   # saveDX is not MPI parallel
-   if getMPISizeWorld() == 1: 
-       suite.addTest(unittest.makeSuite(Test_DXFiles))
-       pass
-   else:
-       print("Test_DXFiles is dropped as number of processors >1")
-   s=unittest.TextTestRunner(verbosity=2).run(suite)
-   if not s.wasSuccessful(): sys.exit(1)
diff --git a/finley/test/python/runcoalgas.py b/finley/test/python/runcoalgas.py
index 59142ef..067cf85 100755
--- a/finley/test/python/runcoalgas.py
+++ b/finley/test/python/runcoalgas.py
@@ -33,18 +33,17 @@ from coalgas import *
 import time
 from esys.finley import ReadMesh, Rectangle, Brick
 from esys.escript.pdetools import Locator
+
 SAVE_VTK=True and False
-CONST_G = 9.81  * U.m/U.sec**2
+CONST_G = 9.81 * U.m/U.sec**2
 P_0=1.*U.atm
 
-
 CELL_X=2640*U.ft
 CELL_Y=2640*U.ft
 CELL_Z=33*U.ft
 
 TOP=2310*U.ft
 
-
 L_Z=CELL_Z
 
 L_X=CELL_X*210
@@ -56,32 +55,29 @@ N_X=int(L_X/CELL_X)
 N_Y=int(L_Y/CELL_Y)
 N_Z=int(L_Z/CELL_Z)
 
-
 OUTPUT_DIR="results"
 
-
 PERM_F_X = 100 * U.mDarcy
 PERM_F_Y = 100 * U.mDarcy
 PERM_F_Z = 1e-4 * U.mDarcy
 
 EQUIL = {
-  
-    "DATUM_DEPTH" : 2310. * U.ft ,
-    "DATUM_PRESS" : 1000. * U.psi ,
-    "GWC_DEPTH" : -1000. * U.ft ,
+    "DATUM_DEPTH" : 2310. * U.ft,
+    "DATUM_PRESS" : 1000. * U.psi,
+    "GWC_DEPTH" : -1000. * U.ft,
     "GWC_PCOW" : 0. * U.psi
 }
 
 TOPS = 2310 * U.ft
 PHI_F_0=0.01
-SIGMA = 1. /U.ft**2 
+SIGMA = 1. /U.ft**2
 
 #DT=[0.1* U.day]*9+[1 * U.day,3* U.day,9* U.day, 17.5*U.day] + [ 30.5*U.day ] *20
 DT=[1 * U.day,3* U.day,9* U.day, 17.5*U.day] + [ 30.5*U.day ] *20
 DT=[.5 * U.day, .5 * U.day, 0.5*3* U.day, 0.5*3* U.day, 0.5*9* U.day, 0.5*9* U.day, 17.5 *.5 *U.day, 17.5*0.5*U.day] + [ 15.25*U.day ] *40
-DT=[.25 * U.day, .25 * U.day, .25 * U.day, .25 * U.day, 
-     0.25*3* U.day, 0.25*3* U.day, 0.25*3* U.day, 0.25*3* U.day, 
-     0.25*9* U.day, 0.25*9* U.day, 0.25*9* U.day, 0.25*9* U.day, 
+DT=[.25 * U.day, .25 * U.day, .25 * U.day, .25 * U.day,
+     0.25*3* U.day, 0.25*3* U.day, 0.25*3* U.day, 0.25*3* U.day,
+     0.25*9* U.day, 0.25*9* U.day, 0.25*9* U.day, 0.25*9* U.day,
      17.5 *.25 *U.day, 17.5*0.25*U.day, 17.5 *.25 *U.day, 17.5*0.25*U.day] + \
     [ 15.25*U.day * 0.5] *80
 DT=[.125 * U.day, .125 * U.day, .125 * U.day, .125 * U.day, .125 * U.day, .125 * U.day, .125 * U.day, .125 * U.day,
@@ -97,21 +93,21 @@ DT=[1./10. * U.day]*10 + [3./10. * U.day]*10 + [ 9./10.* U.day ] *10 + [ 17.5/10
 
 #[0.1 * U.day ] *20
 
-PVTW={ "p_ref" :   1000 * U.psi ,  
-       "B_ref" :  0.997  ,
+PVTW={ "p_ref" :   1000 * U.psi,
+       "B_ref" :  0.997,
        "C" :  3.084E-06/U.psi,
        "mu_ref" : 0.68673 * U.cPoise,
        "C_v" : 0/U.psi
-
      }
-GRAVITY = { "water" : 1.0, 
+
+GRAVITY = { "water" : 1.0,
             "gas" : .553 }
-            
-ROCK = { "p_ref" :   1000 * U.psi ,
+
+ROCK = { "p_ref" :   1000 * U.psi,
          "C" : 3.3E-4 * 1./U.psi }
-         
-DIFFCOAL  = { "D" : 0.005 * U.ft**2/U.day, 
-              "f_r": 1.}
+
+DIFFCOAL = { "D" : 0.005 * U.ft**2/U.day,
+             "f_r": 1.}
 
 LANGMUIR = [
 [ 0     * U.psi , 0.00000000 * U.Mscf/U.ft**3],
@@ -129,7 +125,7 @@ LANGMUIR = [
 
 
 PVDG = [
-[ 14.70 * U.psi ,200.3800 * U.Barrel/U.Mscf , 0.012025 * U.cPoise ] , # psi, rb/Mscf,  
+[ 14.70 * U.psi ,200.3800 * U.Barrel/U.Mscf , 0.012025 * U.cPoise ] , # psi, rb/Mscf,
 [ 20.00 * U.psi ,146.0600 * U.Barrel/U.Mscf , 0.012030 * U.cPoise ] ,
 [ 25.00 * U.psi ,116.1461 * U.Barrel/U.Mscf , 0.012034 * U.cPoise ] ,
 [ 30.00 * U.psi ,96.3132 * U.Barrel/U.Mscf , 0.012038 * U.cPoise ] ,
@@ -183,10 +179,10 @@ PVDG = [
 [ 1261.50 * U.psi ,2.0705 * U.Barrel/U.Mscf , 0.013917 * U.cPoise ] ,
 [ 1296.10 * U.psi ,2.0138 * U.Barrel/U.Mscf , 0.013994 * U.cPoise ] ,
 [ 1330.70 * U.psi ,1.9600 * U.Barrel/U.Mscf , 0.014072 * U.cPoise ] ,
-[ 1365.40 * U.psi ,1.9089 * U.Barrel/U.Mscf , 0.014151  * U.cPoise ] ]  
+[ 1365.40 * U.psi ,1.9089 * U.Barrel/U.Mscf , 0.014151  * U.cPoise ] ]
 
 
-SGFN   = [  
+SGFN = [
 [ 0  , 0  , 0 * U.psi],
 [ 0.05   , 0  , 0  * U.psi ],
 [ 0.1333  , 0.00610   , 0  * U.psi ],
@@ -205,7 +201,7 @@ SWFN = [
 [ 0.45000  , 0.17030, 0  * U.psi],
 [ 0.53330  , 0.26220, 0  * U.psi],
 [ 0.61670  , 0.36650, 0  * U.psi],
-[ 0.70000  , 0.48170, 0  * U.psi], 
+[ 0.70000  , 0.48170, 0  * U.psi],
 [ 0.78330  , 0.60710, 0  * U.psi],
 [ 0.86670  , 0.74170, 0  * U.psi],
 [ 0.95000  , 0.88500, 0  * U.psi],
@@ -213,7 +209,7 @@ SWFN = [
 
 
 wellspecs = {
-  'P1' : { "X0" :[ (N_X/2+0.5)*CELL_X,  (N_Y/2+0.5)*CELL_Y, 0.5*CELL_Z], 
+  'P1' : { "X0" :[ (N_X/2+0.5)*CELL_X,  (N_Y/2+0.5)*CELL_Y, 0.5*CELL_Z],
            "r"  : 0.8333 * U.ft,
            "s"  : 0,
            "Q"  : [0., 2000*U.Barrel/U.day ],
@@ -223,7 +219,6 @@ wellspecs = {
 }
 
 
-
 # print input
 print(("<%s> Execution started."%time.asctime()))
 DIM=2
@@ -235,7 +230,6 @@ for I in wellspecs:
      N+=1
      domain.addDiracPoint(wellspecs[I]["X0"][:DIM], I)
      print(("<%s> Well %s introduced to domain."%(time.asctime(), I)))
-     
 
 #domain=Brick(N_X, N_Y,N_Z,l0=L_X, l1=L_Y,l2=L_Z)
 
@@ -252,35 +246,33 @@ print("fracture permeability in z direction= %f mD"%(PERM_F_Z/(U.mDarcy)))
 
 mkDir(OUTPUT_DIR)
 
-
-
 print(("<%s> Mesh set up completed."%time.asctime()))
-well_P1=VerticalPeacemanWell('P1', domain, BHP_limit=wellspecs['P1' ]["BHP"], 
-                                Q=wellspecs['P1']["Q"], 
-                                r=wellspecs['P1']["r"], 
+well_P1=VerticalPeacemanWell('P1', domain, BHP_limit=wellspecs['P1' ]["BHP"],
+                                Q=wellspecs['P1']["Q"],
+                                r=wellspecs['P1']["r"],
                                 X0=[ wellspecs['P1' ]["X0"][0], wellspecs['P1']["X0"][1], wellspecs['P1']["X0"][2]] ,
-                                D=[CELL_X, CELL_Y, CELL_Z], 
+                                D=[CELL_X, CELL_Y, CELL_Z],
                                 perm=[PERM_F_X, PERM_F_Y, PERM_F_Z],
-                                schedule=wellspecs['P1']["schedule"], 
-                                s=wellspecs['P1']["s"]) 
+                                schedule=wellspecs['P1']["schedule"],
+                                s=wellspecs['P1']["s"])
 rho_w = WaterDensity(B_ref=PVTW["B_ref"], p_ref = PVTW["p_ref"], C=PVTW["C"], gravity=GRAVITY["water"])
 p_top = EQUIL["DATUM_PRESS"] + P_0
 p_bottom=p_top + CONST_G * CELL_Z * rho_w(p_top)
 
-model = PorosityOneHalfModel(domain, 
+model = PorosityOneHalfModel(domain,
                              phi_f=Porosity(phi_0=PHI_F_0, p_0=(p_bottom +p_top)/2., p_ref=ROCK["p_ref"], C = ROCK["C"]),
                              L_g=InterpolationTable([ l[0] for l in LANGMUIR ], [ l[1] for l in LANGMUIR ] ),
-                 perm_f_0=PERM_F_X, 
-                 perm_f_1=PERM_F_Y, 
+                 perm_f_0=PERM_F_X,
+                 perm_f_1=PERM_F_Y,
                  perm_f_2=PERM_F_Z,
-                 k_w =InterpolationTable([ l[0] for l in SWFN ], [ l[1] for l in SWFN ], obeyBounds=False ),  
-                 k_g= InterpolationTable([ l[0] for l in SGFN ], [ l[1] for l in SGFN ], obeyBounds=False ),  
-                 mu_w = WaterViscosity(mu_ref = PVTW["mu_ref"], p_ref=PVTW["p_ref"], C=PVTW["C_v"]),      
+                 k_w =InterpolationTable([ l[0] for l in SWFN ], [ l[1] for l in SWFN ], obeyBounds=False ),
+                 k_g= InterpolationTable([ l[0] for l in SGFN ], [ l[1] for l in SGFN ], obeyBounds=False ),
+                 mu_w = WaterViscosity(mu_ref = PVTW["mu_ref"], p_ref=PVTW["p_ref"], C=PVTW["C_v"]),
                  mu_g = InterpolationTable([ l[0] for l in PVDG ], [ l[2] for l in PVDG ] ),
-                 rho_w = rho_w, 
-                 rho_g=GasDensity( p = [ l[0] for l in PVDG ], B = [ l[1] for l in PVDG ], gravity=GRAVITY["gas"]), 
+                 rho_w = rho_w,
+                 rho_g=GasDensity( p = [ l[0] for l in PVDG ], B = [ l[1] for l in PVDG ], gravity=GRAVITY["gas"]),
                  sigma=SIGMA,
-                 A_mg=DIFFCOAL["D"], 
+                 A_mg=DIFFCOAL["D"],
                        f_rg=DIFFCOAL["f_r"],
                  wells=[ well_P1, ], g= CONST_G)
 # this needs to be revised:.
@@ -295,27 +287,27 @@ n_t = 0
 p, S_fg, c_mg, BHP, q_gas,q_water =model.getState()
 
 if SAVE_VTK:
-   FN=os.path.join(OUTPUT_DIR, "state.%d.vtu"%n_t)
-   saveVTK(FN,p=p, S_fg=S_fg, c_mg=c_mg)
-   print("<%s> Initial state saved to file %s."%(time.asctime(),FN))
-print("EEE", t/U.day, well_P1.locator(p)/U.psi, well_P1.locator(S_fg),  well_P1.locator(c_mg)/U.Mscf*U.ft**3)
-print("DDD", t/U.day, well_P1.locator(BHP)/U.psi, well_P1.locator(q_gas)/U.Mcf*U.day,  well_P1.locator(q_water)/U.Barrel*U.day)
-
+    FN=os.path.join(OUTPUT_DIR, "state.%d.vtu"%n_t)
+    saveVTK(FN,p=p, S_fg=S_fg, c_mg=c_mg)
+    print("<%s> Initial state saved to file %s."%(time.asctime(),FN))
+    print(t/U.day, well_P1.locator(p)/U.psi, well_P1.locator(S_fg), well_P1.locator(c_mg)/U.Mscf*U.ft**3)
+    print(t/U.day, well_P1.locator(BHP)/U.psi, well_P1.locator(q_gas)/U.Mcf*U.day, well_P1.locator(q_water)/U.Barrel*U.day)
 
 
 for dt in DT:
-  print("<%s>Time step %d, time = %e days started:"%(time.asctime(), n_t+1, (t+dt)/U.day))
-  
-  model.update(dt)
-
-  p, S_fg, c_mg, BHP, q_gas,q_water =model.getState()
-  
-  if SAVE_VTK:
-     FN=os.path.join(OUTPUT_DIR, "state.%d.vtu"%(n_t+1))
-     saveVTK(FN,p=p, S_fg=S_fg, c_mg=c_mg)
-     print("<%s>State %s saved to file %s."%(time.asctime(),n_t+1,FN ))
-  print("EEE", (t+dt)/U.day, well_P1.locator(p)/U.psi, well_P1.locator(S_fg),  well_P1.locator(c_mg)/U.Mscf*U.ft**3)
-  print("DDD", (t+dt)/U.day, well_P1.locator(BHP)/U.psi, well_P1.locator(q_gas)/U.Mcf*U.day,  well_P1.locator(q_water)/U.Barrel*U.day)
-
-  n_t+=1
-  t+=dt
+    print("<%s>Time step %d, time = %e days started:"%(time.asctime(), n_t+1, (t+dt)/U.day))
+
+    model.update(dt)
+
+    p, S_fg, c_mg, BHP, q_gas,q_water = model.getState()
+
+    if SAVE_VTK:
+        FN=os.path.join(OUTPUT_DIR, "state.%d.vtu"%(n_t+1))
+        saveVTK(FN,p=p, S_fg=S_fg, c_mg=c_mg)
+        print("<%s>State %s saved to file %s."%(time.asctime(),n_t+1,FN))
+    print((t+dt)/U.day, well_P1.locator(p)/U.psi, well_P1.locator(S_fg), well_P1.locator(c_mg)/U.Mscf*U.ft**3)
+    print((t+dt)/U.day, well_P1.locator(BHP)/U.psi, well_P1.locator(q_gas)/U.Mcf*U.day, well_P1.locator(q_water)/U.Barrel*U.day)
+
+    n_t += 1
+    t += dt
+
diff --git a/modellib/test/python/SConscript b/modellib/test/python/SConscript
index 4b98df4..c1f4273 100644
--- a/modellib/test/python/SConscript
+++ b/modellib/test/python/SConscript
@@ -46,9 +46,8 @@ program = local_env.RunPyUnitTest(alltestruns)
 Depends(program, 'build_py_tests')
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("MODELLIB_TEST_DATA_ROOT","$BATCH_ROOT/modellib/test/python"),('MODELLIB_WORKDIR','$BUILD_DIR/modellib/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/modellib/test/python","$BATCH_ROOT/modellib/test/python",testruns)
-tgroup.makeDir("$BUILD_DIR/modellib/test")
+from grouptest import GroupTest
+tgroup=GroupTest("modellib", "$PYTHONRUNNER ", (("MODELLIB_TEST_DATA_ROOT","$BATCH_ROOT/modellib/test/python"),('MODELLIB_WORKDIR','$BUILD_DIR/modellib/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/modellib/test/python", "$BATCH_ROOT/modellib/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/modellib/test/python")
 TestGroups.append(tgroup)
 
diff --git a/modellib/test/python/run_convection.py b/modellib/test/python/run_convection.py
index b86603c..4ced7fd 100644
--- a/modellib/test/python/run_convection.py
+++ b/modellib/test/python/run_convection.py
@@ -35,6 +35,16 @@ else:
     #being passed to a python3 style StringIO that expects unicode
     from StringIO import StringIO
 
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+from esys.escript import hasFeature
+from esys.escript.modelframe import Link,Simulation
+from esys.modellib.input import Sequencer,InterpolateOverBox,GaussianProfile,LinearCombination
+from esys.modellib.flow import SteadyIncompressibleFlow
+from esys.modellib.temperature import TemperatureAdvection
+from esys.modellib.materials import SimpleEarthModel,GravityForce
+from esys.modellib.visualization import WriteVTK
+
 try:
     import esys.dudley
     HAVE_DUDLEY = True
@@ -48,17 +58,8 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from esys.escript import getEscriptParamInt
-from esys.escript.modelframe import Link,Simulation
-from esys.modellib.input import Sequencer,InterpolateOverBox,GaussianProfile,LinearCombination
-from esys.modellib.flow import SteadyIncompressibleFlow
-from esys.modellib.temperature import TemperatureAdvection
-from esys.modellib.materials import SimpleEarthModel,GravityForce
-from esys.modellib.visualization import WriteVTK
-
+# TODO: once Amesos2 can deal with block matrices uncomment
+HAVE_DIRECT = hasFeature("PASO_DIRECT") #or hasFeature('trilinos')
 
 try:
    WORKDIR=os.environ['MODELLIB_WORKDIR']
@@ -149,7 +150,7 @@ def run(dom, stream):
     s.writeXML(stream)
     s.run()
 
- at unittest.skipIf(not getEscriptParamInt("PASO_DIRECT"), "Direct solver not available")
+ at unittest.skipUnless(HAVE_DIRECT, "Direct solver not available")
 class Test_Convection(unittest.TestCase):
     def setUp(self):
         import sys
@@ -160,13 +161,13 @@ class Test_Convection(unittest.TestCase):
         import sys
         sys.stdout = self.old
 
-    @unittest.skipIf(not HAVE_FINLEY, "Finley module not available")
+    @unittest.skipUnless(HAVE_FINLEY, "Finley module not available")
     def test_order2(self):
         dom=RectangularDomain()
         dom.order=2
         run(dom, sys.stdout)
 
-    @unittest.skipIf(not HAVE_DUDLEY or not HAVE_FINLEY, "Dudley module not available")
+    @unittest.skipUnless(HAVE_DUDLEY and HAVE_FINLEY, "Dudley module not available")
     def test_order1(self):
         dom=RectangularDomain(esys.dudley)
         dom.order=1
diff --git a/modellib/test/python/run_domainreaders.py b/modellib/test/python/run_domainreaders.py
index 2b8d8cb..042d658 100644
--- a/modellib/test/python/run_domainreaders.py
+++ b/modellib/test/python/run_domainreaders.py
@@ -45,7 +45,7 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-from esys.escript import getMPISizeWorld, getEscriptParamInt
+from esys.escript import getMPISizeWorld, hasFeature
 from esys.escript.modelframe import DataSource
 from esys.pycad.gmsh import *
 from esys.pycad import *
@@ -56,20 +56,11 @@ try:
 except KeyError:
      MODELLIB_WORKDIR='.'
 
-GMSH = None
-try:
-    p=Popen(['gmsh', '-info'], stderr=PIPE)
-    _,e=p.communicate()
-    if e.split().count("MPI"):
-        GMSH = 'm'
-    else:
-        GMSH = 's'
-except OSError:
-    pass
-    
- at unittest.skipIf(GMSH is None, "gmsh not available")
- at unittest.skipIf(getEscriptParamInt("MPIBUILD",0)>0,
-        "not tested with MPI builds")
+GMSH = hasFeature('gmsh')
+mpiSize = getMPISizeWorld()
+
+ at unittest.skipIf(not GMSH, "gmsh not available")
+ at unittest.skipIf(mpiSize > 1, "not tested with more than 1 MPI rank")
 class Test_domainReaders(unittest.TestCase):
     def domain_family(self, dommodule, f):
         dom=RectangularDomain(dommodule, parameters=["fish","dummy"], debug=True)
diff --git a/modellib/test/python/run_flow.py b/modellib/test/python/run_flow.py
index 69c1a7d..1710f62 100644
--- a/modellib/test/python/run_flow.py
+++ b/modellib/test/python/run_flow.py
@@ -39,7 +39,7 @@ else:
 import esys.escriptcore.utestselect as unittest
 from esys.escriptcore.testing import *
 from esys.escript.modelframe import Link,Simulation
-from esys.escript import getEscriptParamInt
+from esys.escript import hasFeature
 from esys.modellib.input import Sequencer
 from esys.modellib.probe import Probe,EvaluateExpression
 from esys.modellib.flow import SteadyIncompressibleFlow
@@ -51,7 +51,8 @@ try:
 except ImportError:
     HAVE_FINLEY = False
 
-have_direct=getEscriptParamInt("PASO_DIRECT")
+# TODO: once Amesos2 can deal with block matrices uncomment
+HAVE_DIRECT = hasFeature("PASO_DIRECT") #or hasFeature('trilinos')
 
 #Link() behaves badly inside a TestCase class
 def run(dom, stream):
@@ -99,7 +100,7 @@ class Test_RunFlow(unittest.TestCase):
         sys.stdout = self.old
 
     @unittest.skipIf(not HAVE_FINLEY, "Finley module not available")
-    @unittest.skipIf(not have_direct, "Direct solver not available")
+    @unittest.skipIf(not HAVE_DIRECT, "Direct solver not available")
     def test_order2(self):
         dom=RectangularDomain()
         dom.order=2
diff --git a/scons/templates/jessie_mpi_options.py b/paso/SConscript
similarity index 84%
copy from scons/templates/jessie_mpi_options.py
copy to paso/SConscript
index cae5ae6..410abdf 100644
--- a/scons/templates/jessie_mpi_options.py
+++ b/paso/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,6 +13,8 @@
 #
 ##############################################################################
 
-from .jessie_options import *
+Import('env')
+if env['paso']:
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-mpi='OPENMPI'
diff --git a/paso/profiling/Paso_tests.cpp b/paso/profiling/Paso_tests.cpp
deleted file mode 100644
index 0354c51..0000000
--- a/paso/profiling/Paso_tests.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/************************************************************************************/
-
-/* Paso: interface to the direct solvers                    */
-
-/************************************************************************************/
-
-/* Author: artak at uq.edu.au */
-
-/************************************************************************************/
-
-#include "paso/Paso.h"
-#include "paso/Solver.h"
-#include "paso/SystemMatrix.h"
-#include "paso/Options.h"
-#include "Paso_tests.h"
-
-
-/************************************************************************************/
-
-void Paso_test_run(Paso_SystemMatrix* A,double* b,dim_t level)
-{
-    paso::Options options;
-   
- if(level==1) /* Solvers only*/
-    {
-      options.setDefaults();
-      options.method=PASO_PCG;
-      options.verbose=TRUE;
-      options.preconditioner=PASO_JACOBI;
-      fprintf(stdout,"Test solver: PCG with JACOBI\n");
-      Paso_test_matrix(A,b,&options);
-      
-      fprintf(stdout,"Test solver: BICGSTAB with JACOBI\n");
-      options.setDefaults();
-      A->solver=NULL;
-      options.verbose=TRUE;
-      options.method=PASO_BICGSTAB;
-      Paso_test_matrix(A,b,&options);
-      
-      fprintf(stdout,"Test solver: GMRES with JACOBI\n");
-      options.setDefaults();
-      A->solver=NULL;
-      options.verbose=TRUE;
-      options.method=PASO_GMRES;
-      Paso_test_matrix(A,b,&options);
-      
-      fprintf(stdout,"Test solver: PRES20 with JACOBI\n");
-      options.setDefaults();
-      A->solver=NULL;
-      options.verbose=TRUE;
-      options.method=PASO_PRES20;
-      Paso_test_matrix(A,b,&options); 
-      
-      fprintf(stdout,"Test solver: MINRES with JACOBI\n");
-      options.setDefaults();
-      A->solver=NULL;
-      options.verbose=TRUE;
-      options.method=PASO_MINRES;
-      Paso_test_matrix(A,b,&options); 
-      
-      fprintf(stdout,"Test solver: TFQMR with JACOBI\n");
-      options.setDefaults();
-      A->solver=NULL;
-      options.verbose=TRUE;
-      options.method=PASO_TFQMR;
-      Paso_test_matrix(A,b,&options); 
-    }
- else if (level==2) /* Preconditioners only with default solver*/
-    {
-      options.setDefaults();
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      options.preconditioner=PASO_JACOBI;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with JACOBI\n");
-      Paso_test_matrix(A,b,&options);
-
-      options.setDefaults();
-      A->solver=NULL;
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with ILU\n");
-      options.preconditioner=PASO_ILU0;
-      Paso_test_matrix(A,b,&options);
-
-      options.setDefaults();
-      A->solver=NULL;
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with RILU\n");
-      options.preconditioner=PASO_RILU;
-      Paso_test_matrix(A,b,&options); 
-
-      options.setDefaults();
-      A->solver=NULL;
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with GS\n");
-      options.preconditioner=PASO_GS;
-      Paso_test_matrix(A,b,&options); 
-
-      options.setDefaults();
-      A->solver=NULL;
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with AMG\n");
-      options.preconditioner=PASO_AMG;
-      Paso_test_matrix(A,b,&options);
-
-      options.setDefaults();
-      A->solver=NULL;
-      options.method=PASO_DEFAULT;
-      options.verbose=TRUE;
-      fprintf(stdout,"Test preconditioner: PASO_DEFAULT with AMLI\n");
-      options.preconditioner=PASO_AMLI;
-      Paso_test_matrix(A,b,&options);  
-    }
-}
-
-void Paso_test_matrix(Paso_SystemMatrix* A, double* b, Paso_Options* options ) {
-   
-   dim_t n=Paso_SystemMatrix_getTotalNumRows(A);
-   double *out=new double[n];
-   
-   Paso_solve(A,out,b,options);
-   
-   delete[] out;
-}
-
-void Paso_test_data(char *fileName_p, double* b, Paso_Options* options ) {
-   
-   Paso_SystemMatrix* A=NULL;
-   dim_t n=Paso_SystemMatrix_getTotalNumRows(A);
-   double *out = new double[n];
-   A=Paso_SystemMatrix_loadMM_toCSR(fileName_p);
-   Paso_solve(A,out,b,options);
-   Paso_SystemMatrix_free(A);
-   delete[] out;
-}
-
diff --git a/paso/profiling/Paso_tests.h b/paso/profiling/Paso_tests.h
deleted file mode 100644
index c7bc8de..0000000
--- a/paso/profiling/Paso_tests.h
+++ /dev/null
@@ -1,39 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#ifndef INC_PASO_TESTS
-#define INC_PASO_TESTS
-
-/************************************************************************************/
-
-/*   Some testing routines: */
-
-/************************************************************************************/
-
-/*   Copyrights by ACcESS Australia, 2003,2004,2005 */
-/*   author: artak at uq.edu.au */
-
-/************************************************************************************/
-
-#include "paso/Common.h"
-
-/************************************************************************************/
-
-void Paso_test_run(Paso_SystemMatrix* A,double* b,dim_t level) ;
-void Paso_test_matrix(Paso_SystemMatrix* A, double* b, Paso_Options* options );
-void Paso_test_data(char *fileName_p, double* b, Paso_Options* options );
-#endif /* #ifndef INC_PASO_TESTS */
diff --git a/paso/profiling/Test.cpp b/paso/profiling/Test.cpp
deleted file mode 100644
index c9195f6..0000000
--- a/paso/profiling/Test.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-#include "paso/Common.h"
-#include "paso/Solver.h"
-#include "paso/SystemMatrix.h"
-#include "Paso_tests.h"
-#include "getopt.h"
-#include <cmath>
-
-#define PI (3.141592653589793)
-
-/*
- Usage: PasoTests -f filename [-s solver] [-p preconditioner] [-l level] [-r rhs matrix] [-c coarsening method] [-t threshold parameter for AMG coarsening] 
-        filename - matrix to be loaded in CSR Matrix-Market format 
-        solver   - PCG, GMRES, PRES20, TFQMR and MINRES
-        preconditioner - ILU0, RILU, JACOBI, GS and AMG
-        level    - options are 1,2 and 3
-                   0 - default option just solves with default of specified parameters 
-                   1 - test all solvers with default preconditioner
-                   2 - test all preconditioners with default solver
-                   3 - compare solution obtained by using AMG and Jacobi preconditioners
-        coarsening method - YS, RS, AGG and STD.
-        threshold parameter for AMG - this is the threshold value used in AMG in the coarsening process. Default is 0.25. For YS and AGG, please, use 0.05.            
-*/
-
-double Lsup(double* x, int n) {
-    double max=0;
-    int i;
-    
-    for (i=0;i<n;i++) {
-        max=MAX(ABS(x[i]),max);
-    }
-    
-    return max;
-}
-
-int main (int argc, char *argv[]) {
-    Paso_SystemMatrix *A = NULL;
-    double *b,*x,*x_ref;
-    dim_t i,n,level=0;
-    double *error;
-    double Lsuperror;
-   
-    int c;
-    char *filename,*solver,*prec,*rhs;
-    extern char *optarg;
-    extern int optopt;
-
-    Paso_Options options;
-    Paso_Options_setDefaults(&options);
-
-    
-    
-    options.verbose=TRUE;
-    options.level_max=2;
-    options.tolerance=1.E-6;
-    options.smoother=PASO_GAUSS_SEIDEL;
-    options.pre_sweeps=5;
-    options.post_sweeps=5;
-
-    while ((c = getopt(argc, argv, "s:p:f:r:l:t:c:h")) != -1) {
-      switch(c) {
-        case 's':
-            solver=optarg;
-            if (strcmp(solver,"PCG")==0)
-                options.method=PASO_PCG;
-            else if (strcmp(solver,"GMRES")==0)
-                options.method=PASO_GMRES;
-            else if (strcmp(solver,"PRES20")==0)    
-                options.method=PASO_PRES20;
-            else if (strcmp(solver,"BICGSTAB")==0)
-                options.method=PASO_BICGSTAB;
-            else if (strcmp(solver,"TFQMR")==0)
-                options.method=PASO_TFQMR;
-            else if (strcmp(solver,"MINRES")==0)
-                options.method=PASO_MINRES;
-            else if (strcmp(solver,"DIRECT")==0) {
-                options.package=PASO_MKL;
-                options.verbose=1;
-            }
-        break;
-        case 'p':
-            prec=optarg;
-            if (strcmp(prec,"JACOBI")==0) 
-                options.preconditioner=PASO_JACOBI;
-            else if (strcmp(prec,"RILU")==0) 
-                options.preconditioner=PASO_RILU;
-            else if (strcmp(prec,"ILU0")==0)
-                options.preconditioner=PASO_ILU0;
-            else if (strcmp(prec,"GS")==0)
-                options.preconditioner=PASO_GS;
-            else if (strcmp(prec,"AMG")==0) {
-                options.preconditioner=PASO_AMG;
-            }
-            else if (strcmp(prec,"AMLI")==0) {
-                options.preconditioner=PASO_AMG;
-            }
-        break;
-        case 'f':
-            filename = optarg;
-            A=MEMALLOC(1,Paso_SystemMatrix);
-            A=Paso_SystemMatrix_loadMM_toCSR(filename);
-            n=Paso_SystemMatrix_getTotalNumRows(A);
-            b=MEMALLOC(n,double);
-            x=MEMALLOC(n,double);
-            x_ref=MEMALLOC(n,double);
-            error=MEMALLOC(n,double);
-            for(i=0;i<n;i++) {
-             x_ref[i]=cos(i*PI/n);
-            }
-            Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(DBLE(1),A,x_ref,DBLE(0),b);
-            break;
-        case 'r':
-            rhs=optarg;
-            if (A==NULL) {
-             fprintf(stderr,"System matrix is not loaded yet.\n");
-             break;
-            }
-            n=Paso_SystemMatrix_getTotalNumRows(A);
-            Paso_RHS_loadMM_toCSR(rhs,b,n);
-            break;
-        case 'l':
-            level=atoi(optarg);
-            break;
-        case 't':
-            options.coarsening_threshold=atof(optarg);
-            break;
-        case 'c':
-            prec=optarg;
-            if (strcmp(prec,"RS")==0) 
-                options.coarsening_method=PASO_RUGE_STUEBEN_COARSENING;
-            else if (strcmp(prec,"YS")==0) 
-                options.coarsening_method=PASO_YAIR_SHAPIRA_COARSENING;
-            else if (strcmp(prec,"AGG")==0)
-                options.coarsening_method=PASO_AGGREGATION_COARSENING;
-            else if (strcmp(prec,"STD")==0)
-                options.coarsening_method=PASO_STANDARD_COARSENING;
-            break;
-        case '?':
-            printf("unknown arg %c\n", optopt);
-            break;
-        case 'h':
-            printf("Usage: PasoTests -f filename [-s solver] [-p preconditioner] [-l level] [-r rhs vector] [-c coarsening method] [-t threshold parameter for AMG coarsening] \n");
-            printf("\t filename - matrix to be loaded in CSR Matrix-Market format\n");
-            printf("\t solver   - PCG, GMRES, PRES20, TFQMR and MINRES\n");
-            printf("\t preconditioner - ILU0, RILU, JACOBI, GS and AMG\n");
-            printf("\t level    - options are 1,2 and 3\n");
-            printf("\t\t 0 - default option just solves with default of specified parameters\n");
-            printf("\t\t 1 - test all solvers with default preconditioner\n");            
-            printf("\t\t 2 - test all preconditioners with default solver\n");
-            printf("\t\t 3 - compare solution obtained by using AMG and Jacobi precondioners\n");            
-            printf("\trhs vector - right hand side vector in CSR Matrix Market format.\n");
-            printf("\tcoarsening method - YS, RS, AGG and STD.\n");
-            printf("\tthreshold parameter for AMG - this is the threshold value used in AMG in the coarsening process. Default is 0.25. For YS and AGG, please, use 0.05.\n");            
-            break;
-        }
-    }
-    
-    if (A==NULL) {
-      /*fprintf(stderr,"CSR Matrix not Loaded\n");*/
-      return 0;
-    }
-   
-    
-    if (level==0) {
-        Paso_solve(A,x,b,&options);
-    }
-    else if (level==3) {
-        options.method=PASO_PCG;
-        options.verbose=TRUE;
-        options.preconditioner=PASO_JACOBI;
-
-        Paso_solve(A,x,b,&options);
-
-        for(i=0;i<n;i++) {
-          error[i]=x[i]-x_ref[i];
-        }
-        Lsuperror=Lsup(error,n)/Lsup(x_ref,n);
-        fprintf(stdout,"Lsup error Jacobi %e\n",Lsuperror);
-        
-        A->solver=NULL;
-        options.method=PASO_PCG;
-        options.preconditioner=PASO_AMG;
-        
-        Paso_solve(A,x,b,&options);
-        
-        for(i=0;i<n;i++) {
-          error[i]=x[i]-x_ref[i];
-        }
-        Lsuperror=Lsup(error,n)/Lsup(x_ref,n);
-        fprintf(stdout,"Lsup error AMG %e\n",Lsuperror);
-    }
-    else if (level==4) {
-        options.method=PASO_PCG;
-        options.verbose=TRUE;
-        options.preconditioner=PASO_AMG;
-        
-        Paso_Solver_setPreconditioner(A,&options);
-        Paso_Solver_solvePreconditioner(A,x,b);
- 
- 
-        for(i=0;i<n;i++) {
-          error[i]=x[i]-x_ref[i];
-        }
-        Lsuperror=Lsup(error,n)/Lsup(x_ref,n);
-        fprintf(stdout,"Lsup error AMG as a solver %e\n\n",Lsuperror);
-        
-        A->solver=NULL;
-        options.method=PASO_PCG;
-        options.preconditioner=PASO_AMG;
-                
-        Paso_solve(A,x,b,&options);
-        
-        for(i=0;i<n;i++) {
-          error[i]=x[i]-x_ref[i];
-        }
-        Lsuperror=Lsup(error,n)/Lsup(x_ref,n);
-        fprintf(stdout,"Lsup error AMG %e\n",Lsuperror);
-    }
-    else {
-        Paso_test_run(A,b,level);            
-    }
-
-   if (A!=NULL) {
-    MEMFREE(b);
-    MEMFREE(x);
-    MEMFREE(x_ref);
-    MEMFREE(error);
-    Paso_SystemMatrix_free(A);
-   }
-    
-return 1;
-}
-
-
diff --git a/paso/src/AMG.cpp b/paso/src/AMG.cpp
index 5a9e1f3..c759556 100644
--- a/paso/src/AMG.cpp
+++ b/paso/src/AMG.cpp
@@ -27,7 +27,6 @@
 
 #define SHOW_TIMING 0
 
-#include <iostream>
 #include "Paso.h"
 #include "Preconditioner.h"
 #include "MergedSolver.h"
@@ -35,8 +34,34 @@
 #include "PasoUtil.h"
 #include "MKL.h"
 
+#include <iostream>
+
+namespace {
+
+double random_seed = .4142135623730951;
+
+}
+
 namespace paso {
 
+inline double* createRandomVector(escript::const_Distribution_ptr dist)
+{
+    const index_t n_0 = dist->getFirstComponent();
+    const index_t n_1 = dist->getLastComponent();
+    const index_t n = dist->getGlobalNumComponents();
+    const dim_t my_n = n_1 - n_0;
+    double* out = new double[my_n];
+
+#pragma omp parallel for schedule(static)
+    for (index_t i = 0; i < my_n; ++i) {
+        out[i] = fmod(random_seed * (n_0+i+1), 1.);
+    }
+
+    random_seed = fmod(random_seed * (n+1.7), 1.);
+    return out;
+}
+
+
 void Preconditioner_AMG_free(Preconditioner_AMG* in)
 {
     if (in!=NULL) {
@@ -159,7 +184,7 @@ Preconditioner_AMG* Preconditioner_AMG_alloc(SystemMatrix_ptr A, int level,
     A->copyRemoteCoupleBlock(false);
 
     // set splitting of unknowns
-    time0 = Esys_timer();
+    time0 = escript::gettime();
     if (n_block > 1) {
         Preconditioner_AMG_setStrongConnections_Block(A, degree_S, offset_S,
                                                       S, theta, tau);
@@ -180,114 +205,104 @@ Preconditioner_AMG* Preconditioner_AMG_alloc(SystemMatrix_ptr A, int level,
     //if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
     //    Preconditioner_AMG_enforceFFConnectivity(n, A->pattern->ptr, degree_S, S, F_marker);
 
-    options->coarsening_selection_time = Esys_timer()-time0 +
+    options->coarsening_selection_time = escript::gettime()-time0 +
                                std::max(0., options->coarsening_selection_time);
-    if (Esys_noError()) {
 #pragma omp parallel for
-        for (dim_t i = 0; i < n; ++i)
-            F_marker[i] = (F_marker[i]==PASO_AMG_IN_F ? PASO_AMG_IN_C : PASO_AMG_IN_F);
-
-        // count number of unknowns to be eliminated:
-        dim_t my_n_F = util::cumsum_maskedTrue(my_n, counter, (int*)F_marker);
-        const dim_t n_F = util::cumsum_maskedTrue(n, counter, (int*)F_marker);
-        // collect my_n_F values on all processes, a direct solver should
-        // be used if any my_n_F value is 0
-        dim_t* F_set = new dim_t[A->mpi_info->size];
+    for (dim_t i = 0; i < n; ++i)
+        F_marker[i] = (F_marker[i]==PASO_AMG_IN_F ? PASO_AMG_IN_C : PASO_AMG_IN_F);
+
+    // count number of unknowns to be eliminated:
+    dim_t my_n_F = util::cumsum_maskedTrue(my_n, counter, (int*)F_marker);
+    const dim_t n_F = util::cumsum_maskedTrue(n, counter, (int*)F_marker);
+    // collect my_n_F values on all processes, a direct solver should
+    // be used if any my_n_F value is 0
+    dim_t* F_set = new dim_t[A->mpi_info->size];
 #ifdef ESYS_MPI
-        MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A->mpi_info->comm);
+    MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A->mpi_info->comm);
 #endif
-        dim_t global_n_F = 0;
-        bool F_flag = true;
-        for (dim_t i=0; i<A->mpi_info->size; i++) {
-            global_n_F += F_set[i];
-            if (F_set[i] == 0)
-                F_flag = false;
-        }
-        delete[] F_set;
-
-        const dim_t global_n_C = global_n-global_n_F;
-        if (verbose)
-            std::cout << "Preconditioner: AMG (non-local) level " << level
-                << ": " << global_n_F << " unknowns are flagged for"
-                << " elimination. " << global_n_C << " left." << std::endl;
-
-        //if (n_F == 0) { nasty case. a direct solver should be used!
-        if (F_flag) {
-            out = new Preconditioner_AMG;
-            out->level = level;
-            out->post_sweeps = options->post_sweeps;
-            out->pre_sweeps  = options->pre_sweeps;
-            out->r = NULL;
-            out->x_C = NULL;
-            out->b_C = NULL;
-            out->AMG_C = NULL;
-            out->Smoother = NULL;
-            out->merged_solver = NULL;
-            if (Esys_noError()) {
-                out->Smoother = Preconditioner_Smoother_alloc(A,
-                        (options->smoother == PASO_JACOBI), 0, verbose);
-
-                if (global_n_C != 0) {
-                    index_t* mask_C = new index_t[n];
-                    index_t* rows_in_F = new index_t[n_F];
-                    // create mask of C nodes with value >-1, gives new id
-                    const dim_t n_C = util::cumsum_maskedFalse(n, mask_C,
-                                                               (int*)F_marker);
-                    const dim_t my_n_C = my_n-my_n_F;
-                    // if nothing has been removed we have a diagonal dominant
-                    // matrix and we just run a few steps of the smoother
-
-                    out->x_C = new double[n_block*my_n_C];
-                    out->b_C = new double[n_block*my_n_C];
-                    out->r = new double[n_block*my_n];
-
-                    if (Esys_noError()) {
-                        // creates index for F
+    dim_t global_n_F = 0;
+    bool F_flag = true;
+    for (dim_t i=0; i<A->mpi_info->size; i++) {
+        global_n_F += F_set[i];
+        if (F_set[i] == 0)
+            F_flag = false;
+    }
+    delete[] F_set;
+
+    const dim_t global_n_C = global_n-global_n_F;
+    if (verbose)
+        std::cout << "Preconditioner: AMG (non-local) level " << level
+            << ": " << global_n_F << " unknowns are flagged for"
+            << " elimination. " << global_n_C << " left." << std::endl;
+
+    //if (n_F == 0) { nasty case. a direct solver should be used!
+    if (F_flag) {
+        out = new Preconditioner_AMG;
+        out->level = level;
+        out->post_sweeps = options->post_sweeps;
+        out->pre_sweeps  = options->pre_sweeps;
+        out->r = NULL;
+        out->x_C = NULL;
+        out->b_C = NULL;
+        out->AMG_C = NULL;
+        out->Smoother = NULL;
+        out->merged_solver = NULL;
+        out->Smoother = Preconditioner_Smoother_alloc(A,
+                (options->smoother == PASO_JACOBI), 0, verbose);
+
+        if (global_n_C != 0) {
+            index_t* mask_C = new index_t[n];
+            index_t* rows_in_F = new index_t[n_F];
+            // create mask of C nodes with value >-1, gives new id
+            const dim_t n_C = util::cumsum_maskedFalse(n, mask_C,
+                                                       (int*)F_marker);
+            const dim_t my_n_C = my_n-my_n_F;
+            // if nothing has been removed we have a diagonal dominant
+            // matrix and we just run a few steps of the smoother
+
+            out->x_C = new double[n_block*my_n_C];
+            out->b_C = new double[n_block*my_n_C];
+            out->r = new double[n_block*my_n];
+
+            // creates index for F
 #pragma omp parallel for
-                        for (dim_t i = 0; i < n; ++i) {
-                            if (F_marker[i])
-                                rows_in_F[counter[i]] = i;
-                        }
-                        // get Prolongation
-                        time0 = Esys_timer();
-                        out->P = Preconditioner_AMG_getProlongation(A,
-                                offset_S, degree_S, S, n_C, mask_C,
-                                options->interpolation_method);
-                    }
-
-                    // construct Restriction operator as transposed of
-                    // Prolongation operator:
-                    if (Esys_noError()) {
-                        time0 = Esys_timer();
-                        out->R = Preconditioner_AMG_getRestriction(out->P);
-                        if (SHOW_TIMING)
-                            std::cout << "timing: level " << level
-                                << ": getTranspose: " << Esys_timer()-time0
-                                << std::endl;
-                    }
-                    // construct coarse level matrix
-                    SystemMatrix_ptr A_C;
-                    if (Esys_noError()) {
-                        time0 = Esys_timer();
-                        A_C = Preconditioner_AMG_buildInterpolationOperator(A, out->P, out->R);
-                        if (SHOW_TIMING)
-                            std::cout << "timing: level " << level
-                                << ": construct coarse matrix: "
-                                << Esys_timer()-time0 << std::endl;
-
-                        out->AMG_C = Preconditioner_AMG_alloc(A_C, level+1, options);
-                        out->A_C = A_C;
-                        if (out->AMG_C == NULL) {
-                            // merge the system matrix into 1 rank when
-                            // it's not suitable coarsening due to the
-                            // total number of unknowns being too small
-                            out->merged_solver = new MergedSolver(A_C, options);
-                        }
-                    }
-                    delete[] mask_C;
-                    delete[] rows_in_F;
-                }
+            for (dim_t i = 0; i < n; ++i) {
+                if (F_marker[i])
+                    rows_in_F[counter[i]] = i;
             }
+            // get Prolongation
+            time0 = escript::gettime();
+            out->P = Preconditioner_AMG_getProlongation(A,
+                    offset_S, degree_S, S, n_C, mask_C,
+                    options->interpolation_method);
+
+            // construct Restriction operator as transposed of
+            // Prolongation operator:
+            time0 = escript::gettime();
+            out->R = Preconditioner_AMG_getRestriction(out->P);
+            if (SHOW_TIMING)
+                std::cout << "timing: level " << level
+                    << ": getTranspose: " << escript::gettime()-time0
+                    << std::endl;
+            // construct coarse level matrix
+            SystemMatrix_ptr A_C;
+            time0 = escript::gettime();
+            A_C = Preconditioner_AMG_buildInterpolationOperator(A, out->P, out->R);
+            if (SHOW_TIMING)
+                std::cout << "timing: level " << level
+                    << ": construct coarse matrix: "
+                    << escript::gettime()-time0 << std::endl;
+
+            out->AMG_C = Preconditioner_AMG_alloc(A_C, level+1, options);
+            out->A_C = A_C;
+            if (out->AMG_C == NULL) {
+                // merge the system matrix into 1 rank when
+                // it's not suitable coarsening due to the
+                // total number of unknowns being too small
+                out->merged_solver = new MergedSolver(A_C, options);
+            }
+            delete[] mask_C;
+            delete[] rows_in_F;
         }
     }
     delete[] counter;
@@ -299,12 +314,7 @@ Preconditioner_AMG* Preconditioner_AMG_alloc(SystemMatrix_ptr A, int level,
     delete[] offset_ST;
     delete[] ST;
 
-    if (Esys_noError()) {
-        return out;
-    } else  {
-        Preconditioner_AMG_free(out);
-        return NULL;
-    }
+    return out;
 }
 
 
@@ -316,45 +326,45 @@ void Preconditioner_AMG_solve(SystemMatrix_ptr A,
     const dim_t pre_sweeps=amg->pre_sweeps;
 
     // presmoothing
-    double time0 = Esys_timer();
+    double time0 = escript::gettime();
     Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, false);
-    time0 = Esys_timer()-time0;
+    time0 = escript::gettime()-time0;
     if (SHOW_TIMING)
         std::cout << "timing: level " << amg->level << ": Presmoothing: "
             << time0 << std::endl;
     // end of presmoothing
 
-    time0=Esys_timer();
+    time0=escript::gettime();
     // r <- b
     util::copy(n, amg->r, b);
     // r = r-Ax
-    SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r);
+    A->MatrixVector_CSR_OFFSET0(-1., x, 1., amg->r);
     // b_C = R*r
-    SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->R, amg->r, 0., amg->b_C);
-    time0 = Esys_timer()-time0;
+    amg->R->MatrixVector_CSR_OFFSET0(1., amg->r, 0., amg->b_C);
+    time0 = escript::gettime()-time0;
 
     // coarse level solve
     if (amg->AMG_C == NULL) {
-        time0 = Esys_timer();
+        time0 = escript::gettime();
         // A_C is the coarsest level
         amg->merged_solver->solve(amg->x_C, amg->b_C);
         if (SHOW_TIMING)
             std::cout << "timing: level " << amg->level << ": DIRECT SOLVER: "
-                << Esys_timer()-time0 << std::endl;
+                << escript::gettime()-time0 << std::endl;
     } else {
         // x_C = AMG(b_C)
         Preconditioner_AMG_solve(amg->A_C, amg->AMG_C, amg->x_C, amg->b_C);
     }
 
-    time0 = time0+Esys_timer();
+    time0 = time0+escript::gettime();
     // x = x + P*x_c
-    SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->P, amg->x_C, 1., x);
+    amg->P->MatrixVector_CSR_OFFSET0(1., amg->x_C, 1., x);
 
     // postsmoothing:
     // solve Ax=b with initial guess x
-    time0 = Esys_timer();
+    time0 = escript::gettime();
     Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, true);
-    time0 = Esys_timer()-time0;
+    time0 = escript::gettime()-time0;
     if (SHOW_TIMING)
         std::cout << "timing: level " << amg->level << ": Postsmoothing: "
             << time0 << std::endl;
@@ -379,15 +389,15 @@ void Preconditioner_AMG_setStrongConnections(SystemMatrix_ptr A,
 #pragma omp parallel for private(i,iptr) schedule(static)
     for (i=0;i<my_n;++i) {
         double max_offdiagonal = 0.;
-        register double sum_row=0;
-        register double main_row=0;
-        register dim_t kdeg=0;
-        register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
+        double sum_row=0;
+        double main_row=0;
+        dim_t kdeg=0;
+        const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
 
         // collect information for row i:
         #pragma ivdep
         for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
-            register index_t j=A->mainBlock->pattern->index[iptr];
+            index_t j=A->mainBlock->pattern->index[iptr];
             const double fnorm=std::abs(A->mainBlock->val[iptr]);
             if(j != i) {
                 max_offdiagonal = std::max(max_offdiagonal,fnorm);
@@ -442,19 +452,19 @@ void Preconditioner_AMG_setStrongConnections(SystemMatrix_ptr A,
             A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
             -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
 
-        Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
+        Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2, A->mpi_info));
         threshold_coupler->startCollect(threshold_p);
         double* remote_threshold = threshold_coupler->finishCollect();
 
 #pragma omp parallel for private(i,iptr) schedule(static)
         for (i=0; i<overlap_n; i++) {
             const double threshold = remote_threshold[2*i+1];
-            register dim_t kdeg=0;
-            register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
+            dim_t kdeg=0;
+            const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
             if (remote_threshold[2*i]>0) {
                 #pragma ivdep
                 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
-                  register index_t j=A->row_coupleBlock->pattern->index[iptr];
+                  index_t j=A->row_coupleBlock->pattern->index[iptr];
                   if(std::abs(A->row_coupleBlock->val[iptr])>threshold) {
                      S[koffset+kdeg] = j ;
                      kdeg++;
@@ -463,7 +473,7 @@ void Preconditioner_AMG_setStrongConnections(SystemMatrix_ptr A,
 
                 #pragma ivdep
                 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
-                  register index_t j=A->remote_coupleBlock->pattern->index[iptr];
+                  index_t j=A->remote_coupleBlock->pattern->index[iptr];
                   if(std::abs(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
                       S[koffset+kdeg] = j + my_n;
                       kdeg++;
@@ -507,10 +517,10 @@ void Preconditioner_AMG_setStrongConnections_Block(SystemMatrix_ptr A,
         #pragma omp for schedule(static)
         for (i=0;i<my_n;++i) {
             double max_offdiagonal = 0.;
-            register double sum_row=0;
-            register double main_row=0;
-            register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
-            register dim_t kdeg=0;
+            double sum_row=0;
+            double main_row=0;
+            index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
+            dim_t kdeg=0;
             const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
 
             /* collect information for row i: */
@@ -589,14 +599,14 @@ void Preconditioner_AMG_setStrongConnections_Block(SystemMatrix_ptr A,
             A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
             -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
 
-        Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
+        Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2, A->mpi_info));
         threshold_coupler->startCollect(threshold_p);
         double* remote_threshold = threshold_coupler->finishCollect();
 
         #pragma omp parallel for private(i,iptr) schedule(static)
         for (i=0; i<overlap_n; i++) {
             const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
-            register dim_t kdeg=0;
+            dim_t kdeg=0;
             const index_t koffset = koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
             if (remote_threshold[2*i]>0) {
                 #pragma ivdep
@@ -673,12 +683,12 @@ void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
                                        const index_t* offset_ST,
                                        const index_t* ST,
                                        const_Connector_ptr col_connector,
-                                       const_Distribution_ptr col_dist)
+                                       escript::const_Distribution_ptr col_dist)
 {
-    Coupler_ptr w_coupler(new Coupler(col_connector, 1));
+    Coupler_ptr w_coupler(new Coupler(col_connector, 1, col_dist->mpi_info));
     double* w = new double[n];
     double* Status = new double[n];
-    double* random = col_dist->createRandomVector(1);
+    double* random = createRandomVector(col_dist);
     index_t* ST_flag = new index_t[offset_ST[n-1] + degree_ST[n-1]];
     dim_t i, numUndefined, iter=0;
     index_t iptr, jptr, kptr;
@@ -700,7 +710,8 @@ void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
         }
     }
 
-    numUndefined = col_dist->numPositives(Status, 1);
+    numUndefined = util::numPositives(col_dist->getMyNumComponents(), Status,
+                                      col_dist->mpi_info);
     //printf("coarsening loop start: num of undefined rows = %d \n",numUndefined);
     iter=0;
     while (numUndefined > 0) {
@@ -817,10 +828,10 @@ void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
         }
 
         i = numUndefined;
-        numUndefined = col_dist->numPositives(Status, 1);
+        numUndefined = util::numPositives(col_dist->getMyNumComponents(),
+                                          Status, col_dist->mpi_info);
         if (numUndefined == i) {
-            Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
-            return;
+            throw PasoException("AMG: Can NOT reduce numUndefined.");
         }
 
         iter++;
diff --git a/paso/src/AMG_Interpolation.cpp b/paso/src/AMG_Interpolation.cpp
index 24ab019..aa01377 100644
--- a/paso/src/AMG_Interpolation.cpp
+++ b/paso/src/AMG_Interpolation.cpp
@@ -25,9 +25,9 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "SparseMatrix.h"
 #include "PasoUtil.h"
 #include "Preconditioner.h"
+#include "SparseMatrix.h"
 
 #include <cstring> // memcpy
 
@@ -67,9 +67,8 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     if (A->mpi_info->size == 1) return;
 
     if (B->remote_coupleBlock.get()) {
-        Esys_setError(VALUE_ERROR, "Preconditioner_AMG_extendB: the link to "
-                                   "remote_coupleBlock has already been set.");
-        return;
+        throw PasoException("Preconditioner_AMG_extendB: the link to "
+                            "remote_coupleBlock has already been set.");
     }
 #ifdef ESYS_MPI
     B->row_coupleBlock.reset();
@@ -96,7 +95,7 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
         cols[i] = offset + i;
 
     if (B->global_id == NULL) {
-        coupler.reset(new Coupler(B->col_coupler->connector, 1));
+        coupler.reset(new Coupler(B->col_coupler->connector, 1, A->mpi_info));
         coupler->startCollect(cols);
     }
 
@@ -116,7 +115,7 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     dim_t num_couple_cols = B->col_coupleBlock->numCols;
     SharedComponents_ptr send(A->col_coupler->connector->send);
     SharedComponents_ptr recv(A->col_coupler->connector->recv);
-    const int num_neighbors = send->numNeighbors;
+    const int num_neighbors = send->neighbour.size();
     index_t p = send->offsetInShared[num_neighbors];
     index_t len = p * B->col_distribution->first_component[size];
     double* send_buf = new double[len * block_size];
@@ -158,14 +157,14 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     MPI_Allgatherv(global_id, num_couple_cols, MPI_INT, cols_array, recv_degree, recv_offset, MPI_INT, A->mpi_info->comm);
 
     // first, prepare the ptr_ptr to be received
-    q = recv->numNeighbors;
+    q = recv->neighbour.size();
     len = recv->offsetInShared[q];
     ptr_ptr = new index_t[(len+1) * 2];
     for (p=0; p<q; p++) {
         row = recv->offsetInShared[p];
         m = recv->offsetInShared[p + 1];
-        MPI_Irecv(&(ptr_ptr[2*row]), 2 * (m-row), MPI_INT, recv->neighbor[p],
-                A->mpi_info->msg_tag_counter+recv->neighbor[p],
+        MPI_Irecv(&(ptr_ptr[2*row]), 2 * (m-row), MPI_INT, recv->neighbour[p],
+                A->mpi_info->counter()+recv->neighbour[p],
                 A->mpi_info->comm,
                 &(A->col_coupler->mpi_requests[p]));
     }
@@ -175,7 +174,7 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     i0 = 0;
     for (p=0; p<num_neighbors; p++) {
         i = i0;
-        neighbor = send->neighbor[p];
+        neighbor = send->neighbour[p];
         m_lb = B->col_distribution->first_component[neighbor];
         m_ub = B->col_distribution->first_component[neighbor + 1];
         j_ub = send->offsetInShared[p + 1];
@@ -246,10 +245,10 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
         }
 
         /* sending */
-        MPI_Issend(&(send_offset[2*i0]), 2*(i-i0), MPI_INT, send->neighbor[p],
-                A->mpi_info->msg_tag_counter+rank,
+        MPI_Issend(&send_offset[2*i0], 2*(i-i0), MPI_INT, send->neighbour[p],
+                A->mpi_info->counter()+rank,
                 A->mpi_info->comm,
-                &(A->col_coupler->mpi_requests[p+recv->numNeighbors]));
+                &A->col_coupler->mpi_requests[p+recv->neighbour.size()]);
         send_degree[p] = len;
         i0 = i;
     }
@@ -258,16 +257,15 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     delete[] idx_m;
     delete[] idx_c;
 
-    q = recv->numNeighbors;
+    q = recv->neighbour.size();
     len = recv->offsetInShared[q];
     ptr_main = new index_t[(len+1)];
     ptr_couple = new index_t[(len+1)];
 
-    MPI_Waitall(A->col_coupler->connector->send->numNeighbors +
-                    A->col_coupler->connector->recv->numNeighbors,
-                A->col_coupler->mpi_requests,
-                A->col_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
+    MPI_Waitall(A->col_coupler->connector->send->neighbour.size() +
+                    A->col_coupler->connector->recv->neighbour.size(),
+                A->col_coupler->mpi_requests, A->col_coupler->mpi_stati);
+    A->mpi_info->incCounter(size);
 
     j = 0;
     k = 0;
@@ -289,14 +287,14 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     /* send/receive index array */
     j=0;
     k_ub = 0;
-    for (p=0; p<recv->numNeighbors; p++) {
+    for (p=0; p<recv->neighbour.size(); p++) {
         k = recv->offsetInShared[p];
         m = recv->offsetInShared[p+1];
         i = ptr_main[m] - ptr_main[k] + ptr_couple[m] - ptr_couple[k];
         if (i > 0) {
             k_ub ++;
-            MPI_Irecv(&(ptr_idx[j]), i, MPI_INT, recv->neighbor[p],
-                A->mpi_info->msg_tag_counter+recv->neighbor[p],
+            MPI_Irecv(&(ptr_idx[j]), i, MPI_INT, recv->neighbour[p],
+                A->mpi_info->counter()+recv->neighbour[p],
                 A->mpi_info->comm,
                 &(A->col_coupler->mpi_requests[p]));
         }
@@ -309,19 +307,19 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
         i = send_degree[p] - j;
         if (i > 0){
             k_ub ++;
-            MPI_Issend(&(send_idx[j]), i, MPI_INT, send->neighbor[p],
-                A->mpi_info->msg_tag_counter+rank,
+            MPI_Issend(&(send_idx[j]), i, MPI_INT, send->neighbour[p],
+                A->mpi_info->counter()+rank,
                 A->mpi_info->comm,
-                &(A->col_coupler->mpi_requests[p+recv->numNeighbors]));
+                &(A->col_coupler->mpi_requests[p+recv->neighbour.size()]));
         }
         j = send_degree[p];
     }
 
-    MPI_Waitall(A->col_coupler->connector->send->numNeighbors +
-                    A->col_coupler->connector->recv->numNeighbors,
+    MPI_Waitall(A->col_coupler->connector->send->neighbour.size() +
+                    A->col_coupler->connector->recv->neighbour.size(),
                 A->col_coupler->mpi_requests,
                 A->col_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
+    A->mpi_info->incCounter(size);
 
 #pragma omp parallel for private(i,j,k,m,p) schedule(static)
     for (i=0; i<len; i++) {
@@ -355,14 +353,14 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
 
     /* send/receive value array */
     j=0;
-    for (p=0; p<recv->numNeighbors; p++) {
+    for (p=0; p<recv->neighbour.size(); p++) {
         k = recv->offsetInShared[p];
         m = recv->offsetInShared[p+1];
         i = ptr_main[m] - ptr_main[k] + ptr_couple[m] - ptr_couple[k];
         if (i > 0)
             MPI_Irecv(&(ptr_val[j]), i * block_size,
-                MPI_DOUBLE, recv->neighbor[p],
-                A->mpi_info->msg_tag_counter+recv->neighbor[p],
+                MPI_DOUBLE, recv->neighbour[p],
+                A->mpi_info->counter()+recv->neighbour[p],
                 A->mpi_info->comm,
                 &(A->col_coupler->mpi_requests[p]));
         j += (i * block_size);
@@ -372,17 +370,17 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
     for (p=0; p<num_neighbors; p++) {
         i = send_degree[p] - j;
         if (i > 0)
-            MPI_Issend(&(send_buf[j*block_size]), i*block_size, MPI_DOUBLE, send->neighbor[p],
-                A->mpi_info->msg_tag_counter+rank,
-                A->mpi_info->comm,
-                &(A->col_coupler->mpi_requests[p+recv->numNeighbors]));
-        j = send_degree[p] ;
+            MPI_Issend(&send_buf[j*block_size], i*block_size, MPI_DOUBLE,
+                    send->neighbour[p], A->mpi_info->counter()+rank,
+                    A->mpi_info->comm,
+                    &A->col_coupler->mpi_requests[p+recv->neighbour.size()]);
+        j = send_degree[p];
     }
 
-    MPI_Waitall(A->col_coupler->connector->send->numNeighbors+A->col_coupler->connector->recv->numNeighbors,
-                A->col_coupler->mpi_requests,
-                A->col_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
+    MPI_Waitall(A->col_coupler->connector->send->neighbour.size() +
+                A->col_coupler->connector->recv->neighbour.size(),
+                A->col_coupler->mpi_requests, A->col_coupler->mpi_stati);
+    A->mpi_info->incCounter(size);
 
 #pragma omp parallel for private(i,j,k,m,p) schedule(static)
     for (i=0; i<len; i++) {
@@ -414,167 +412,155 @@ void Preconditioner_AMG_extendB(SystemMatrix_ptr A, SystemMatrix_ptr B)
 /* As defined, sparse matrix (let's called it T) defined by T(ptr, idx, val)
    has the same number of rows as P->col_coupleBlock->numCols. Now, we need
    to copy block of data in T to neighbour processors, defined by
-        P->col_coupler->connector->recv->neighbor[k] where k is in
-        [0, P->col_coupler->connector->recv->numNeighbors).
-   Rows to be copied to neighbor processor k is in the list defined by
+        P->col_coupler->connector->recv->neighbour[k] where k is in
+        [0, P->col_coupler->connector->recv->numNeighbours).
+   Rows to be copied to neighbour processor k is in the list defined by
         P->col_coupler->connector->recv->offsetInShared[k] ...
         P->col_coupler->connector->recv->offsetInShared[k+1]  */
 void Preconditioner_AMG_CopyRemoteData(SystemMatrix_ptr P,
         index_t **p_ptr, index_t **p_idx, double **p_val,
         index_t *global_id, index_t block_size)
 {
-    SharedComponents_ptr send, recv;
-    index_t send_neighbors, recv_neighbors, send_rows, recv_rows;
-    index_t i, j, p, m, n, size;
-    index_t *send_degree=NULL, *recv_ptr=NULL, *recv_idx=NULL;
+    index_t i, j, p, m, n;
     index_t *ptr=*p_ptr, *idx=*p_idx;
-    double  *val=*p_val, *recv_val=NULL;
-  #ifdef ESYS_MPI
-    index_t rank = P->mpi_info->rank;
-  #endif
-
-    size = P->mpi_info->size;
-    send = P->col_coupler->connector->recv;
-    recv = P->col_coupler->connector->send;
-    send_neighbors = send->numNeighbors;
-    recv_neighbors = recv->numNeighbors;
-    send_rows = P->col_coupleBlock->numCols;
-    recv_rows = recv->offsetInShared[recv_neighbors];
-
-    send_degree = new index_t[send_rows];
-    recv_ptr = new index_t[recv_rows + 1];
-  #pragma omp for schedule(static) private(i)
+    double  *val=*p_val;
+#ifdef ESYS_MPI
+    int rank = P->mpi_info->rank;
+    int size = P->mpi_info->size;
+#endif
+
+    SharedComponents_ptr send(P->col_coupler->connector->recv);
+    SharedComponents_ptr recv(P->col_coupler->connector->send);
+    int send_neighbors = send->neighbour.size();
+    int recv_neighbors = recv->neighbour.size();
+    dim_t send_rows = P->col_coupleBlock->numCols;
+    dim_t recv_rows = recv->offsetInShared[recv_neighbors];
+
+    index_t* send_degree = new index_t[send_rows];
+    index_t* recv_ptr = new index_t[recv_rows + 1];
+#pragma omp for schedule(static) private(i)
     for (i=0; i<send_rows; i++)
         send_degree[i] = ptr[i+1] - ptr[i];
 
-  /* First, send/receive the degree */
-  for (p=0; p<recv_neighbors; p++) { /* Receiving */
-    m = recv->offsetInShared[p];
-    n = recv->offsetInShared[p+1];
-    #ifdef ESYS_MPI
-    MPI_Irecv(&(recv_ptr[m]), n-m, MPI_INT, recv->neighbor[p],
-                P->mpi_info->msg_tag_counter + recv->neighbor[p],
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p]));
-    #endif
-  }
-  for (p=0; p<send_neighbors; p++) { /* Sending */
-    m = send->offsetInShared[p];
-    n = send->offsetInShared[p+1];
-    #ifdef ESYS_MPI
-    MPI_Issend(&(send_degree[m]), n-m, MPI_INT, send->neighbor[p],
-                P->mpi_info->msg_tag_counter + rank,
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p+recv_neighbors]));
-    #endif
-  }
-  #ifdef ESYS_MPI
-  MPI_Waitall(send_neighbors+recv_neighbors,
-                P->col_coupler->mpi_requests,
-                P->col_coupler->mpi_stati);
-  #endif
-  ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
-
-  delete[] send_degree;
-  m = util::cumsum(recv_rows, recv_ptr);
-  recv_ptr[recv_rows] = m;
-  recv_idx = new index_t[m];
-  recv_val = new double[m * block_size];
-
-  /* Next, send/receive the index array */
-  j = 0;
-  for (p=0; p<recv_neighbors; p++) { /* Receiving */
-    m = recv->offsetInShared[p];
-    n = recv->offsetInShared[p+1];
-    i = recv_ptr[n] - recv_ptr[m];
-    if (i > 0) {
-      #ifdef ESYS_MPI
-      MPI_Irecv(&(recv_idx[j]), i, MPI_INT, recv->neighbor[p],
-                P->mpi_info->msg_tag_counter + recv->neighbor[p],
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p]));
-      #endif
+    // First, send/receive the degree
+    for (p = 0; p < recv_neighbors; p++) { // Receiving
+        m = recv->offsetInShared[p];
+        n = recv->offsetInShared[p+1];
+#ifdef ESYS_MPI
+        MPI_Irecv(&recv_ptr[m], n-m, MPI_INT, recv->neighbour[p],
+                  P->mpi_info->counter() + recv->neighbour[p],
+                  P->mpi_info->comm, &P->col_coupler->mpi_requests[p]);
+#endif
     }
-    j += i;
-  }
-
-  j = 0;
-  for (p=0; p<send_neighbors; p++) { /* Sending */
-    m = send->offsetInShared[p];
-    n = send->offsetInShared[p+1];
-    i = ptr[n] - ptr[m];
-    if (i >0) {
-        #ifdef ESYS_MPI
-        MPI_Issend(&(idx[j]), i, MPI_INT, send->neighbor[p],
-                P->mpi_info->msg_tag_counter + rank,
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p+recv_neighbors]));
-        #endif
+    for (p = 0; p < send_neighbors; p++) { // Sending
+        m = send->offsetInShared[p];
+        n = send->offsetInShared[p+1];
+#ifdef ESYS_MPI
+        MPI_Issend(&send_degree[m], n-m, MPI_INT, send->neighbour[p],
+                   P->mpi_info->counter() + rank, P->mpi_info->comm,
+                   &P->col_coupler->mpi_requests[p+recv_neighbors]);
+#endif
+    }
+#ifdef ESYS_MPI
+    P->mpi_info->incCounter(size);
+    MPI_Waitall(send_neighbors+recv_neighbors, P->col_coupler->mpi_requests,
+                P->col_coupler->mpi_stati);
+#endif
+
+    delete[] send_degree;
+    m = util::cumsum(recv_rows, recv_ptr);
+    recv_ptr[recv_rows] = m;
+    index_t* recv_idx = new index_t[m];
+    double* recv_val = new double[m * block_size];
+
+    // Next, send/receive the index array
+    j = 0;
+    for (p=0; p<recv_neighbors; p++) { // Receiving
+        m = recv->offsetInShared[p];
+        n = recv->offsetInShared[p+1];
+        i = recv_ptr[n] - recv_ptr[m];
+#ifdef ESYS_MPI
+        if (i > 0) {
+            MPI_Irecv(&recv_idx[j], i, MPI_INT, recv->neighbour[p],
+                    P->mpi_info->counter() + recv->neighbour[p],
+                    P->mpi_info->comm, &P->col_coupler->mpi_requests[p]);
+        }
+#endif
         j += i;
     }
-  }
-  #ifdef ESYS_MPI
-  MPI_Waitall(send_neighbors+recv_neighbors,
-                P->col_coupler->mpi_requests,
+
+    j = 0;
+    for (p=0; p<send_neighbors; p++) { /* Sending */
+        m = send->offsetInShared[p];
+        n = send->offsetInShared[p+1];
+        i = ptr[n] - ptr[m];
+        if (i > 0) {
+#ifdef ESYS_MPI
+            MPI_Issend(&idx[j], i, MPI_INT, send->neighbour[p],
+                       P->mpi_info->counter() + rank, P->mpi_info->comm,
+                       &P->col_coupler->mpi_requests[p+recv_neighbors]);
+#endif
+            j += i;
+        }
+    }
+#ifdef ESYS_MPI
+    P->mpi_info->incCounter(size);
+    MPI_Waitall(send_neighbors+recv_neighbors, P->col_coupler->mpi_requests,
                 P->col_coupler->mpi_stati);
-  #endif
-  ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
-
-  /* Last, send/receive the data array */
-  j = 0;
-  for (p=0; p<recv_neighbors; p++) { /* Receiving */
-    m = recv->offsetInShared[p];
-    n = recv->offsetInShared[p+1];
-    i = recv_ptr[n] - recv_ptr[m];
-    #ifdef ESYS_MPI
-    if (i > 0)
-      MPI_Irecv(&(recv_val[j]), i*block_size, MPI_DOUBLE, recv->neighbor[p],
-                P->mpi_info->msg_tag_counter + recv->neighbor[p],
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p]));
-    #endif
-    j += (i*block_size);
-  }
-
-  j = 0;
-  for (p=0; p<send_neighbors; p++) { /* Sending */
-    m = send->offsetInShared[p];
-    n = send->offsetInShared[p+1];
-    i = ptr[n] - ptr[m];
-    if (i >0) {
-        #ifdef ESYS_MPI
-        MPI_Issend(&(val[j]), i * block_size, MPI_DOUBLE, send->neighbor[p],
-                P->mpi_info->msg_tag_counter + rank,
-                P->mpi_info->comm,
-                &(P->col_coupler->mpi_requests[p+recv_neighbors]));
-        #endif
-        j += i * block_size;
+#endif
+
+    // Last, send/receive the data array
+    j = 0;
+    for (p=0; p<recv_neighbors; p++) { /* Receiving */
+        m = recv->offsetInShared[p];
+        n = recv->offsetInShared[p+1];
+        i = recv_ptr[n] - recv_ptr[m];
+#ifdef ESYS_MPI
+        if (i > 0)
+            MPI_Irecv(&recv_val[j], i*block_size, MPI_DOUBLE, recv->neighbour[p],
+                P->mpi_info->counter() + recv->neighbour[p],
+                P->mpi_info->comm, &P->col_coupler->mpi_requests[p]);
+#endif
+        j += (i*block_size);
     }
-  }
-  #ifdef ESYS_MPI
-  MPI_Waitall(send_neighbors+recv_neighbors,
-                P->col_coupler->mpi_requests,
+
+    j = 0;
+    for (p=0; p<send_neighbors; p++) { /* Sending */
+        m = send->offsetInShared[p];
+        n = send->offsetInShared[p+1];
+        i = ptr[n] - ptr[m];
+        if (i >0) {
+#ifdef ESYS_MPI
+            MPI_Issend(&val[j], i * block_size, MPI_DOUBLE, send->neighbour[p],
+                       P->mpi_info->counter() + rank, P->mpi_info->comm,
+                       &P->col_coupler->mpi_requests[p+recv_neighbors]);
+#endif
+            j += i * block_size;
+        }
+    }
+#ifdef ESYS_MPI
+    P->mpi_info->incCounter(size);
+    MPI_Waitall(send_neighbors+recv_neighbors, P->col_coupler->mpi_requests,
                 P->col_coupler->mpi_stati);
-  #endif
-  ESYS_MPI_INC_COUNTER(*(P->mpi_info),size);
-
-  /* Clean up and return with received ptr, index and data arrays */
-  delete[] ptr;
-  delete[] idx;
-  delete[] val;
-  *p_ptr = recv_ptr;
-  *p_idx = recv_idx;
-  *p_val = recv_val;
+#endif
+
+    // Clean up and return with received ptr, index and data arrays
+    delete[] ptr;
+    delete[] idx;
+    delete[] val;
+    *p_ptr = recv_ptr;
+    *p_idx = recv_idx;
+    *p_val = recv_val;
 }
 
 SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
         SystemMatrix_ptr A, SystemMatrix_ptr P,
         SystemMatrix_ptr R)
 {
-   esysUtils::JMPI& mpi_info=A->mpi_info;
+   escript::JMPI& mpi_info=A->mpi_info;
    SystemMatrix_ptr out;
    SystemMatrixPattern_ptr pattern;
-   Distribution_ptr input_dist, output_dist;
+   escript::Distribution_ptr input_dist, output_dist;
    Connector_ptr col_connector, row_connector;
    const dim_t row_block_size=A->row_block_size;
    const dim_t col_block_size=A->col_block_size;
@@ -583,10 +569,10 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    const double ZERO = 0.0;
    double *RAP_main_val=NULL, *RAP_couple_val=NULL, *RAP_ext_val=NULL;
    double rtmp, *RAP_val, *RA_val, *R_val, *temp_val=NULL, *t1_val, *t2_val;
-   index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
+   index_t size=mpi_info->size, rank=mpi_info->rank;
    index_t *RAP_main_ptr=NULL, *RAP_couple_ptr=NULL, *RAP_ext_ptr=NULL;
    index_t *RAP_main_idx=NULL, *RAP_couple_idx=NULL, *RAP_ext_idx=NULL;
-   index_t *offsetInShared=NULL, *row_couple_ptr=NULL, *row_couple_idx=NULL;
+   index_t *row_couple_ptr=NULL, *row_couple_idx=NULL;
    index_t *Pcouple_to_Pext=NULL, *Pext_to_RAP=NULL, *Pcouple_to_RAP=NULL;
    index_t *temp=NULL, *global_id_P=NULL, *global_id_RAP=NULL;
    index_t *shared=NULL, *P_marker=NULL, *A_marker=NULL;
@@ -598,13 +584,6 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    index_t **send_ptr=NULL, **send_idx=NULL;
    dim_t p, num_neighbors;
    dim_t *recv_len=NULL, *send_len=NULL, *len=NULL;
-   Esys_MPI_rank *neighbor=NULL;
-   #ifdef ESYS_MPI
-     MPI_Request* mpi_requests=NULL;
-     MPI_Status* mpi_stati=NULL;
-   #else
-     int *mpi_requests=NULL, *mpi_stati=NULL;
-   #endif
 
 /*   if (!(P->type & MATRIX_FORMAT_DIAGONAL_BLOCK))
      return Preconditioner_AMG_buildInterpolationOperatorBlock(A, P, R);*/
@@ -617,7 +596,7 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    if (size > 1)
      R_couple = P->col_coupleBlock->getTranspose();
 
-   /* generate P_ext, i.e. portion of P that is stored on neighbor procs
+   /* generate P_ext, i.e. portion of P that is stored on neighbour procs
       and needed locally for triple matrix product RAP
       to be specific, P_ext (on processor k) are group of rows in P, where
       the list of rows from processor q is given by
@@ -1062,7 +1041,7 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,
                 &RAP_ext_val, global_id_P, block_size);
 
-   num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];
+   num_RAPext_rows = P->col_coupler->connector->send->numSharedComponents;
    sum = RAP_ext_ptr[num_RAPext_rows];
    num_RAPext_cols = 0;
    if (num_Pext_cols || sum > 0) {
@@ -1138,14 +1117,14 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    /* alloc and initialise the makers */
    sum = num_RAPext_cols + num_Pmain_cols;
    P_marker = new index_t[sum];
-   #pragma omp parallel for private(i) schedule(static)
+#pragma omp parallel for private(i) schedule(static)
    for (i=0; i<sum; i++) P_marker[i] = -1;
-   #pragma omp parallel for private(i) schedule(static)
+#pragma omp parallel for private(i) schedule(static)
    for (i=0; i<num_A_cols; i++) A_marker[i] = -1;
 
-   /* Now, count the size of RAP. Start with rows in R_main */
-   num_neighbors = P->col_coupler->connector->send->numNeighbors;
-   offsetInShared = P->col_coupler->connector->send->offsetInShared;
+   // Now, count the size of RAP. Start with rows in R_main
+   num_neighbors = P->col_coupler->connector->send->neighbour.size();
+   std::vector<index_t> offsetInShared(P->col_coupler->connector->send->offsetInShared);
    shared = P->col_coupler->connector->send->shared;
    i = 0;
    j = 0;
@@ -1158,8 +1137,8 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
         in RAP_ext */
      P_marker[i_r] = i;
      i++;
-     for (j1=0; j1<num_neighbors; j1++) {
-        for (j2=offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
+     for (j1 = 0; j1<num_neighbors; j1++) {
+        for (j2 = offsetInShared[j1]; j2<offsetInShared[j1+1]; j2++) {
           if (shared[j2] == i_r) {
             for (k=RAP_ext_ptr[j2]; k<RAP_ext_ptr[j2+1]; k++) {
               i_c = RAP_ext_idx[k];
@@ -1678,23 +1657,21 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    /* Start to create the coarse level System Matrix A_c */
    /******************************************************/
    /* first, prepare the sender/receiver for the col_connector */
-   dist = P->pattern->input_distribution->first_component;
+   const std::vector<index_t> dist(P->pattern->input_distribution->first_component);
    recv_len = new dim_t[size];
    send_len = new dim_t[size];
-   neighbor = new Esys_MPI_rank[size];
-   offsetInShared = new index_t[size+1];
+   std::vector<int> neighbour;
+   offsetInShared.clear();
    shared = new index_t[num_RAPext_cols];
    memset(recv_len, 0, sizeof(dim_t) * size);
    memset(send_len, 0, sizeof(dim_t) * size);
-   num_neighbors = 0;
-   offsetInShared[0] = 0;
+   offsetInShared.push_back(0);
    for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {
      shared[i] = i + num_Pmain_cols;
      if (k <= global_id_RAP[i]) {
         if (recv_len[j] > 0) {
-          neighbor[num_neighbors] = j;
-          num_neighbors ++;
-          offsetInShared[num_neighbors] = i;
+          neighbour.push_back(j);
+          offsetInShared.push_back(i);
         }
         while (k <= global_id_RAP[i]) {
           j++;
@@ -1704,79 +1681,70 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
      recv_len[j] ++;
    }
    if (recv_len[j] > 0) {
-     neighbor[num_neighbors] = j;
-     num_neighbors ++;
-     offsetInShared[num_neighbors] = i;
+     neighbour.push_back(j);
+     offsetInShared.push_back(i);
    }
 
-   SharedComponents_ptr recv(new SharedComponents(
-               num_Pmain_cols, num_neighbors, neighbor, shared,
-               offsetInShared, 1, 0, mpi_info));
-
-   #ifdef ESYS_MPI
-   MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
-   #endif
+   SharedComponents_ptr recv(new SharedComponents(num_Pmain_cols, neighbour,
+                             shared, offsetInShared));
 
-   #ifdef ESYS_MPI
-     mpi_requests=new MPI_Request[size*2];
-     mpi_stati=new MPI_Status[size*2];
-   #else
-     mpi_requests=new int[size*2];
-     mpi_stati=new int[size*2];
-   #endif
+#ifdef ESYS_MPI
+    MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
+    MPI_Request* mpi_requests = new MPI_Request[size*2];
+    MPI_Status* mpi_stati = new MPI_Status[size*2];
+#endif
    num_neighbors = 0;
    j = 0;
-   offsetInShared[0] = 0;
+   neighbour.clear();
+   offsetInShared.clear();
+   offsetInShared.push_back(0);
    for (i=0; i<size; i++) {
      if (send_len[i] > 0) {
-        neighbor[num_neighbors] = i;
-        num_neighbors ++;
+        neighbour.push_back(i);
         j += send_len[i];
-        offsetInShared[num_neighbors] = j;
+        offsetInShared.push_back(j);
+        num_neighbors++;
      }
    }
    delete[] shared;
    shared = new index_t[j];
-   for (i=0, j=0; i<num_neighbors; i++) {
-     k = neighbor[i];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,
-                mpi_info->msg_tag_counter+k,
+   for (i=0, j=0; i<neighbour.size(); i++) {
+     k = neighbour[i];
+#ifdef ESYS_MPI
+     MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k, mpi_info->counter()+k,
                 mpi_info->comm, &mpi_requests[i]);
-     #endif
+#endif
      j += send_len[k];
    }
-   for (i=0, j=0; i<recv->numNeighbors; i++) {
-     k = recv->neighbor[i];
-     #ifdef ESYS_MPI
+   for (i=0, j=0; i<recv->neighbour.size(); i++) {
+     k = recv->neighbour[i];
+#ifdef ESYS_MPI
      MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,
-                mpi_info->msg_tag_counter+rank,
+                mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[i+num_neighbors]);
-     #endif
+#endif
      j += recv_len[k];
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + recv->numNeighbors,
-                mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+   mpi_info->incCounter(size);
+   MPI_Waitall(num_neighbors + recv->neighbour.size(), mpi_requests, mpi_stati);
+#endif
 
    j = offsetInShared[num_neighbors];
    offset = dist[rank];
-   #pragma omp parallel for schedule(static) private(i)
+#pragma omp parallel for schedule(static) private(i)
    for (i=0; i<j; i++) shared[i] = shared[i] - offset;
 
-   SharedComponents_ptr send(new SharedComponents(
-               num_Pmain_cols, num_neighbors, neighbor, shared,
-               offsetInShared, 1, 0, mpi_info));
+   SharedComponents_ptr send(new SharedComponents(num_Pmain_cols, neighbour,
+                                                  shared, offsetInShared));
 
    col_connector.reset(new Connector(send, recv));
    delete[] shared;
 
    /* now, create row distribution (output_distri) and col
       distribution (input_distribution) */
-   input_dist.reset(new Distribution(mpi_info, dist, 1, 0));
-   output_dist.reset(new Distribution(mpi_info, dist, 1, 0));
+   input_dist.reset(new escript::Distribution(mpi_info, dist));
+   output_dist.reset(new escript::Distribution(mpi_info, dist));
 
    /* then, prepare the sender/receiver for the row_connector, first, prepare
       the information for sender */
@@ -1802,9 +1770,9 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
         for (j=i1; j<i2; j++) {
           i_c = RAP_couple_idx[j];
           /* find out the corresponding neighbor "p" of column i_c */
-          for (p=0; p<recv->numNeighbors; p++) {
+          for (p=0; p<recv->neighbour.size(); p++) {
             if (i_c < recv->offsetInShared[p+1]) {
-              k = recv->neighbor[p];
+              k = recv->neighbour[p];
               if (send_ptr[k][i_r] == 0) sum++;
               send_ptr[k][i_r] ++;
               send_idx[k][len[k]] = global_id_RAP[i_c];
@@ -1823,8 +1791,9 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    /* now allocate the sender */
    shared = new index_t[sum];
    memset(send_len, 0, sizeof(dim_t) * size);
-   num_neighbors=0;
-   offsetInShared[0] = 0;
+   neighbour.clear();
+   offsetInShared.clear();
+   offsetInShared.push_back(0);
    for (p=0, k=0; p<size; p++) {
      for (i=0; i<num_Pmain_cols; i++) {
         if (send_ptr[p][i] > 0) {
@@ -1834,70 +1803,68 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
           send_len[p]++;
         }
      }
-     if (k > offsetInShared[num_neighbors]) {
-        neighbor[num_neighbors] = p;
-        num_neighbors ++;
-        offsetInShared[num_neighbors] = k;
+     if (k > offsetInShared.back()) {
+        neighbour.push_back(p);
+        offsetInShared.push_back(k);
      }
    }
-   send.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   send.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
 
    /* send/recv number of rows will be sent from current proc
       recover info for the receiver of row_connector from the sender */
-   #ifdef ESYS_MPI
+#ifdef ESYS_MPI
    MPI_Alltoall(send_len, 1, MPI_INT, recv_len, 1, MPI_INT, mpi_info->comm);
-   #endif
-   num_neighbors = 0;
-   offsetInShared[0] = 0;
+#endif
+   neighbour.clear();
+   offsetInShared.clear();
+   offsetInShared.push_back(0);
    j = 0;
    for (i=0; i<size; i++) {
      if (i != rank && recv_len[i] > 0) {
-        neighbor[num_neighbors] = i;
-        num_neighbors ++;
+        neighbour.push_back(i);
         j += recv_len[i];
-        offsetInShared[num_neighbors] = j;
+        offsetInShared.push_back(j);
      }
    }
+   num_neighbors = neighbour.size();
    delete[] shared;
    delete[] recv_len;
    shared = new index_t[j];
-   k = offsetInShared[num_neighbors];
-   #pragma omp parallel for schedule(static) private(i)
+   k = offsetInShared.back();
+#pragma omp parallel for schedule(static) private(i)
    for (i=0; i<k; i++) {
      shared[i] = i + num_Pmain_cols;
    }
-   recv.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   recv.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
    row_connector.reset(new Connector(send, recv));
    delete[] shared;
 
    /* send/recv pattern->ptr for rowCoupleBlock */
    num_RAPext_rows = offsetInShared[num_neighbors];
    row_couple_ptr = new index_t[num_RAPext_rows+1];
-   for (p=0; p<num_neighbors; p++) {
+   for (p = 0; p < num_neighbors; p++) {
      j = offsetInShared[p];
      i = offsetInShared[p+1];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],
-                mpi_info->msg_tag_counter+neighbor[p],
-                mpi_info->comm, &mpi_requests[p]);
-     #endif
+#ifdef ESYS_MPI
+     MPI_Irecv(&row_couple_ptr[j], i-j, MPI_INT, neighbour[p],
+               mpi_info->counter()+neighbour[p],
+               mpi_info->comm, &mpi_requests[p]);
+#endif
    }
    send = row_connector->send;
-   for (p=0; p<send->numNeighbors; p++) {
-     #ifdef ESYS_MPI
-     MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],
-                MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank,
+   for (p=0; p<send->neighbour.size(); p++) {
+#ifdef ESYS_MPI
+     MPI_Issend(send_ptr[send->neighbour[p]], send_len[send->neighbour[p]],
+                MPI_INT, send->neighbour[p], mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[p+num_neighbors]);
-     #endif
+#endif
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + send->numNeighbors,
-        mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+   mpi_info->incCounter(size);
+   MPI_Waitall(num_neighbors + send->neighbour.size(), mpi_requests, mpi_stati);
+#endif
    delete[] send_len;
 
    sum = 0;
@@ -1914,33 +1881,34 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
    for (p=0; p<num_neighbors; p++) {
      j1 = row_couple_ptr[offsetInShared[p]];
      j2 = row_couple_ptr[offsetInShared[p+1]];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],
-                mpi_info->msg_tag_counter+neighbor[p],
-                mpi_info->comm, &mpi_requests[p]);
-     #endif
+#ifdef ESYS_MPI
+     MPI_Irecv(&row_couple_idx[j1], j2-j1, MPI_INT, neighbour[p],
+                mpi_info->counter()+neighbour[p], mpi_info->comm,
+                &mpi_requests[p]);
+#endif
    }
-   for (p=0; p<send->numNeighbors; p++) {
-     #ifdef ESYS_MPI
-     MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],
-                MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank,
+   for (p = 0; p < send->neighbour.size(); p++) {
+#ifdef ESYS_MPI
+     MPI_Issend(send_idx[send->neighbour[p]], len[send->neighbour[p]],
+                MPI_INT, send->neighbour[p],
+                mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[p+num_neighbors]);
-     #endif
+#endif
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + send->numNeighbors,
-                mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+    mpi_info->incCounter(size);
+    MPI_Waitall(num_neighbors + send->neighbour.size(), mpi_requests, mpi_stati);
+    delete[] mpi_requests;
+    delete[] mpi_stati;
+#endif
 
     offset = input_dist->first_component[rank];
     k = row_couple_ptr[num_RAPext_rows];
-    #pragma omp parallel for schedule(static) private(i)
+#pragma omp parallel for schedule(static) private(i)
     for (i=0; i<k; i++) {
         row_couple_idx[i] -= offset;
     }
-    #pragma omp parallel for schedule(static) private(i)
+#pragma omp parallel for schedule(static) private(i)
     for (i=0; i<size; i++) {
         delete[] send_ptr[i];
         delete[] send_idx[i];
@@ -1948,11 +1916,6 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
     delete[] send_ptr;
     delete[] send_idx;
     delete[] len;
-    delete[] offsetInShared;
-    delete[] neighbor;
-    delete[] mpi_requests;
-    delete[] mpi_stati;
-
 
     /* Now, we can create pattern for mainBlock and coupleBlock */
     Pattern_ptr main_pattern(new Pattern(MATRIX_FORMAT_DEFAULT,
@@ -1969,7 +1932,8 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
                 output_dist, input_dist, main_pattern, col_couple_pattern,
                 row_couple_pattern, col_connector, row_connector));
     out.reset(new SystemMatrix(A->type, pattern, row_block_size,
-                                    col_block_size, false));
+                               col_block_size, false, A->getRowFunctionSpace(),
+                               A->getColumnFunctionSpace()));
 
     /* finally, fill in the data*/
     memcpy(out->mainBlock->val, RAP_main_val,
@@ -1979,9 +1943,6 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperator(
 
     delete[] RAP_main_val;
     delete[] RAP_couple_val;
-    if (!Esys_noError()) {
-        out.reset();
-    }
     return out;
 }
 
@@ -1990,10 +1951,10 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
         SystemMatrix_ptr A, SystemMatrix_ptr P,
         SystemMatrix_ptr R)
 {
-   esysUtils::JMPI& mpi_info=A->mpi_info;
+   escript::JMPI mpi_info(A->mpi_info);
    SystemMatrix_ptr out;
    SystemMatrixPattern_ptr pattern;
-   Distribution_ptr input_dist, output_dist;
+   escript::Distribution_ptr input_dist, output_dist;
    SharedComponents_ptr send, recv;
    Connector_ptr col_connector, row_connector;
    const dim_t row_block_size=A->row_block_size;
@@ -2002,10 +1963,10 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    const double ZERO = 0.0;
    double *RAP_main_val=NULL, *RAP_couple_val=NULL, *RAP_ext_val=NULL;
    double rtmp, *RAP_val, *RA_val, *R_val, *temp_val=NULL;
-   index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
+   index_t size=mpi_info->size, rank=mpi_info->rank;
    index_t *RAP_main_ptr=NULL, *RAP_couple_ptr=NULL, *RAP_ext_ptr=NULL;
    index_t *RAP_main_idx=NULL, *RAP_couple_idx=NULL, *RAP_ext_idx=NULL;
-   index_t *offsetInShared=NULL, *row_couple_ptr=NULL, *row_couple_idx=NULL;
+   index_t *row_couple_ptr=NULL, *row_couple_idx=NULL;
    index_t *Pcouple_to_Pext=NULL, *Pext_to_RAP=NULL, *Pcouple_to_RAP=NULL;
    index_t *temp=NULL, *global_id_P=NULL, *global_id_RAP=NULL;
    index_t *shared=NULL, *P_marker=NULL, *A_marker=NULL;
@@ -2017,15 +1978,14 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    index_t **send_ptr=NULL, **send_idx=NULL;
    dim_t p, num_neighbors;
    dim_t *recv_len=NULL, *send_len=NULL, *len=NULL;
-   Esys_MPI_rank *neighbor=NULL;
-   #ifdef ESYS_MPI
-     MPI_Request* mpi_requests=NULL;
-     MPI_Status* mpi_stati=NULL;
-   #else
-     int *mpi_requests=NULL, *mpi_stati=NULL;
-   #endif
+#ifdef ESYS_MPI
+    MPI_Request* mpi_requests=NULL;
+    MPI_Status* mpi_stati=NULL;
+#else
+    int *mpi_requests=NULL, *mpi_stati=NULL;
+#endif
 
-   /* two sparse matrices R_main and R_couple will be generate, as the
+   /* two sparse matrices R_main and R_couple will be generated, as the
       transpose of P_main and P_col_couple, respectively. Note that,
       R_couple is actually the row_coupleBlock of R (R=P^T) */
    SparseMatrix_ptr R_main(P->mainBlock->getTranspose());
@@ -2036,7 +1996,7 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
       to be specific, P_ext (on processor k) are group of rows in P, where
       the list of rows from processor q is given by
         A->col_coupler->connector->send->shared[rPtr...]
-        rPtr=A->col_coupler->connector->send->OffsetInShared[k]
+        rPtr=A->col_coupler->connector->send->offsetInShared[k]
       on q.
       P_ext is represented by two sparse matrices P_ext_main and
       P_ext_couple */
@@ -2469,7 +2429,7 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    Preconditioner_AMG_CopyRemoteData(P, &RAP_ext_ptr, &RAP_ext_idx,
                 &RAP_ext_val, global_id_P, block_size);
 
-   num_RAPext_rows = P->col_coupler->connector->send->offsetInShared[P->col_coupler->connector->send->numNeighbors];
+   num_RAPext_rows = P->col_coupler->connector->send->numSharedComponents;
    sum = RAP_ext_ptr[num_RAPext_rows];
    num_RAPext_cols = 0;
    if (num_Pext_cols || sum > 0) {
@@ -2549,8 +2509,8 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    for (i=0; i<num_A_cols; i++) A_marker[i] = -1;
 
    /* Now, count the size of RAP. Start with rows in R_main */
-   num_neighbors = P->col_coupler->connector->send->numNeighbors;
-   offsetInShared = P->col_coupler->connector->send->offsetInShared;
+   num_neighbors = P->col_coupler->connector->send->neighbour.size();
+   std::vector<index_t> offsetInShared = P->col_coupler->connector->send->offsetInShared;
    shared = P->col_coupler->connector->send->shared;
    i = 0;
    j = 0;
@@ -3076,23 +3036,22 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    /* Start to create the coarse level System Matrix A_c */
    /******************************************************/
    /* first, prepare the sender/receiver for the col_connector */
-   dist = P->pattern->input_distribution->first_component;
+   const std::vector<index_t> dist(P->pattern->input_distribution->first_component);
    recv_len = new dim_t[size];
    send_len = new dim_t[size];
-   neighbor = new Esys_MPI_rank[size];
-   offsetInShared = new index_t[size+1];
+   std::vector<int> neighbour;
+   offsetInShared.clear();
    shared = new index_t[num_RAPext_cols];
    memset(recv_len, 0, sizeof(dim_t) * size);
    memset(send_len, 0, sizeof(dim_t) * size);
-   num_neighbors = 0;
-   offsetInShared[0] = 0;
-   for (i=0, j=0, k=dist[j+1]; i<num_RAPext_cols; i++) {
+   offsetInShared.push_back(0);
+   for (i = 0, j = 0, k = dist[j+1]; i<num_RAPext_cols; i++) {
      shared[i] = i + num_Pmain_cols;
      if (k <= global_id_RAP[i]) {
         if (recv_len[j] > 0) {
-          neighbor[num_neighbors] = j;
+          neighbour.push_back(j);
+          offsetInShared.push_back(i);
           num_neighbors ++;
-          offsetInShared[num_neighbors] = i;
         }
         while (k <= global_id_RAP[i]) {
           j++;
@@ -3102,74 +3061,71 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
      recv_len[j] ++;
    }
    if (recv_len[j] > 0) {
-     neighbor[num_neighbors] = j;
-     num_neighbors ++;
-     offsetInShared[num_neighbors] = i;
+     neighbour.push_back(j);
+     offsetInShared.push_back(i);
    }
-   recv.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   recv.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
 
-   #ifdef ESYS_MPI
-   MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
-   #endif
+#ifdef ESYS_MPI
+    MPI_Alltoall(recv_len, 1, MPI_INT, send_len, 1, MPI_INT, mpi_info->comm);
 
-   #ifdef ESYS_MPI
-     mpi_requests=new MPI_Request[size*2];
-     mpi_stati=new MPI_Status[size*2];
-   #else
+    mpi_requests = new MPI_Request[size*2];
+    mpi_stati = new MPI_Status[size*2];
+#else
      mpi_requests=new int[size*2];
      mpi_stati=new int[size*2];
-   #endif
+#endif
    num_neighbors = 0;
    j = 0;
-   offsetInShared[0] = 0;
+   neighbour.clear();
+   offsetInShared.clear();
+   offsetInShared.push_back(0);
    for (i=0; i<size; i++) {
      if (send_len[i] > 0) {
-        neighbor[num_neighbors] = i;
-        num_neighbors ++;
+        neighbour.push_back(i);
         j += send_len[i];
-        offsetInShared[num_neighbors] = j;
+        offsetInShared.push_back(j);
+        num_neighbors++;
      }
    }
    delete[] shared;
    shared = new index_t[j];
    for (i=0, j=0; i<num_neighbors; i++) {
-     k = neighbor[i];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k,
-                mpi_info->msg_tag_counter+k,
-                mpi_info->comm, &mpi_requests[i]);
-     #endif
+     k = neighbour[i];
+#ifdef ESYS_MPI
+     MPI_Irecv(&shared[j], send_len[k] , MPI_INT, k, mpi_info->counter()+k,
+               mpi_info->comm, &mpi_requests[i]);
+#endif
      j += send_len[k];
    }
-   for (i=0, j=0; i<recv->numNeighbors; i++) {
-     k = recv->neighbor[i];
-     #ifdef ESYS_MPI
-     MPI_Issend(&(global_id_RAP[j]), recv_len[k], MPI_INT, k,
-                mpi_info->msg_tag_counter+rank,
-                mpi_info->comm, &mpi_requests[i+num_neighbors]);
-     #endif
+   for (i=0, j=0; i<recv->neighbour.size(); i++) {
+     k = recv->neighbour[i];
+#ifdef ESYS_MPI
+     MPI_Issend(&global_id_RAP[j], recv_len[k], MPI_INT, k,
+                mpi_info->counter()+rank, mpi_info->comm,
+                &mpi_requests[i+num_neighbors]);
+#endif
      j += recv_len[k];
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + recv->numNeighbors,
-                mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+   mpi_info->incCounter(size);
+   MPI_Waitall(num_neighbors + recv->neighbour.size(), mpi_requests, mpi_stati);
+#endif
 
    j = offsetInShared[num_neighbors];
    offset = dist[rank];
    for (i=0; i<j; i++) shared[i] = shared[i] - offset;
-   send.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   send.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
 
    col_connector.reset(new Connector(send, recv));
    delete[] shared;
 
    /* now, create row distribution (output_distri) and col
       distribution (input_distribution) */
-   input_dist.reset(new Distribution(mpi_info, dist, 1, 0));
-   output_dist.reset(new Distribution(mpi_info, dist, 1, 0));
+   input_dist.reset(new escript::Distribution(mpi_info, dist));
+   output_dist.reset(new escript::Distribution(mpi_info, dist));
 
    /* then, prepare the sender/receiver for the row_connector, first, prepare
       the information for sender */
@@ -3193,10 +3149,10 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
            how many neighbours i_r needs to be send to */
         for (j=i1; j<i2; j++) {
           i_c = RAP_couple_idx[j];
-          /* find out the corresponding neighbor "p" of column i_c */
-          for (p=0; p<recv->numNeighbors; p++) {
+          /* find out the corresponding neighbour "p" of column i_c */
+          for (p=0; p<recv->neighbour.size(); p++) {
             if (i_c < recv->offsetInShared[p+1]) {
-              k = recv->neighbor[p];
+              k = recv->neighbour[p];
               if (send_ptr[k][i_r] == 0) sum++;
               send_ptr[k][i_r] ++;
               send_idx[k][len[k]] = global_id_RAP[i_c];
@@ -3215,10 +3171,11 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    /* now allocate the sender */
    shared = new index_t[sum];
    memset(send_len, 0, sizeof(dim_t) * size);
-   num_neighbors=0;
-   offsetInShared[0] = 0;
-   for (p=0, k=0; p<size; p++) {
-     for (i=0; i<num_Pmain_cols; i++) {
+   neighbour.clear();
+   offsetInShared.clear();
+   offsetInShared.push_back(0);
+   for (p = 0, k = 0; p < size; p++) {
+     for (i = 0; i < num_Pmain_cols; i++) {
         if (send_ptr[p][i] > 0) {
           shared[k] = i;
           k++;
@@ -3226,69 +3183,69 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
           send_len[p]++;
         }
      }
-     if (k > offsetInShared[num_neighbors]) {
-        neighbor[num_neighbors] = p;
-        num_neighbors ++;
-        offsetInShared[num_neighbors] = k;
+     if (k > offsetInShared.back()) {
+        neighbour.push_back(p);
+        offsetInShared.push_back(k);
      }
    }
-   send.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   send.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
 
    /* send/recv number of rows will be sent from current proc
       recover info for the receiver of row_connector from the sender */
-   #ifdef ESYS_MPI
+#ifdef ESYS_MPI
    MPI_Alltoall(send_len, 1, MPI_INT, recv_len, 1, MPI_INT, mpi_info->comm);
-   #endif
+#endif
+   neighbour.clear();
+   offsetInShared.clear();
    num_neighbors = 0;
-   offsetInShared[0] = 0;
+   offsetInShared.push_back(0);
    j = 0;
    for (i=0; i<size; i++) {
      if (i != rank && recv_len[i] > 0) {
-        neighbor[num_neighbors] = i;
-        num_neighbors ++;
+        neighbour.push_back(i);
         j += recv_len[i];
-        offsetInShared[num_neighbors] = j;
+        offsetInShared.push_back(j);
+        num_neighbors ++;
      }
    }
    delete[] shared;
    delete[] recv_len;
    shared = new index_t[j];
-   k = offsetInShared[num_neighbors];
+   k = offsetInShared.back();
    for (i=0; i<k; i++) {
      shared[i] = i + num_Pmain_cols;
    }
-   recv.reset(new SharedComponents(num_Pmain_cols, num_neighbors,
-               neighbor, shared, offsetInShared, 1, 0, mpi_info));
+   recv.reset(new SharedComponents(num_Pmain_cols, neighbour, shared,
+                                   offsetInShared));
    row_connector.reset(new Connector(send, recv));
    delete[] shared;
 
    /* send/recv pattern->ptr for rowCoupleBlock */
-   num_RAPext_rows = offsetInShared[num_neighbors];
+   num_RAPext_rows = offsetInShared.back();
    row_couple_ptr = new index_t[num_RAPext_rows+1];
    for (p=0; p<num_neighbors; p++) {
      j = offsetInShared[p];
      i = offsetInShared[p+1];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&(row_couple_ptr[j]), i-j, MPI_INT, neighbor[p],
-                mpi_info->msg_tag_counter+neighbor[p],
+#ifdef ESYS_MPI
+     MPI_Irecv(&row_couple_ptr[j], i-j, MPI_INT, neighbour[p],
+                mpi_info->counter()+neighbour[p],
                 mpi_info->comm, &mpi_requests[p]);
-     #endif
+#endif
    }
    send = row_connector->send;
-   for (p=0; p<send->numNeighbors; p++) {
-     #ifdef ESYS_MPI
-     MPI_Issend(send_ptr[send->neighbor[p]], send_len[send->neighbor[p]],
-                MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank,
+   for (p=0; p<send->neighbour.size(); p++) {
+#ifdef ESYS_MPI
+     MPI_Issend(send_ptr[send->neighbour[p]], send_len[send->neighbour[p]],
+                MPI_INT, send->neighbour[p],
+                mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[p+num_neighbors]);
-     #endif
+#endif
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + send->numNeighbors,
-        mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+   mpi_info->incCounter(size);
+   MPI_Waitall(num_neighbors + send->neighbour.size(), mpi_requests, mpi_stati);
+#endif
    delete[] send_len;
 
    sum = 0;
@@ -3305,25 +3262,24 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    for (p=0; p<num_neighbors; p++) {
      j1 = row_couple_ptr[offsetInShared[p]];
      j2 = row_couple_ptr[offsetInShared[p+1]];
-     #ifdef ESYS_MPI
-     MPI_Irecv(&(row_couple_idx[j1]), j2-j1, MPI_INT, neighbor[p],
-                mpi_info->msg_tag_counter+neighbor[p],
+#ifdef ESYS_MPI
+     MPI_Irecv(&row_couple_idx[j1], j2-j1, MPI_INT, neighbour[p],
+                mpi_info->counter()+neighbour[p],
                 mpi_info->comm, &mpi_requests[p]);
-     #endif
+#endif
    }
-   for (p=0; p<send->numNeighbors; p++) {
-     #ifdef ESYS_MPI
-     MPI_Issend(send_idx[send->neighbor[p]], len[send->neighbor[p]],
-                MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank,
+   for (p=0; p<send->neighbour.size(); p++) {
+#ifdef ESYS_MPI
+     MPI_Issend(send_idx[send->neighbour[p]], len[send->neighbour[p]],
+                MPI_INT, send->neighbour[p],
+                mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[p+num_neighbors]);
-     #endif
+#endif
    }
-   #ifdef ESYS_MPI
-   MPI_Waitall(num_neighbors + send->numNeighbors,
-                mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size);
+#ifdef ESYS_MPI
+   mpi_info->incCounter(size);
+   MPI_Waitall(num_neighbors + send->neighbour.size(), mpi_requests, mpi_stati);
+#endif
 
    offset = input_dist->first_component[rank];
    k = row_couple_ptr[num_RAPext_rows];
@@ -3338,8 +3294,6 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
    delete[] send_ptr;
    delete[] send_idx;
    delete[] len;
-   delete[] offsetInShared;
-   delete[] neighbor;
    delete[] mpi_requests;
    delete[] mpi_stati;
 
@@ -3353,24 +3307,22 @@ SystemMatrix_ptr Preconditioner_AMG_buildInterpolationOperatorBlock(
                MATRIX_FORMAT_DEFAULT, num_RAPext_rows, num_Pmain_cols,
                row_couple_ptr, row_couple_idx));
 
-   /* next, create the system matrix */
-   pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-                output_dist, input_dist, main_pattern, col_couple_pattern,
-                row_couple_pattern, col_connector, row_connector));
-   out.reset(new SystemMatrix(A->type, pattern, row_block_size,
-                                    col_block_size, false));
+    /* next, create the system matrix */
+    pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
+                  output_dist, input_dist, main_pattern, col_couple_pattern,
+                  row_couple_pattern, col_connector, row_connector));
+    out.reset(new SystemMatrix(A->type, pattern, row_block_size,
+                               col_block_size, false, A->getRowFunctionSpace(),
+                               A->getColumnFunctionSpace()));
 
-   /* finally, fill in the data*/
-   memcpy(out->mainBlock->val, RAP_main_val,
-                out->mainBlock->len* sizeof(double));
-   memcpy(out->col_coupleBlock->val, RAP_couple_val,
+    /* finally, fill in the data*/
+    memcpy(out->mainBlock->val, RAP_main_val,
+                out->mainBlock->len * sizeof(double));
+    memcpy(out->col_coupleBlock->val, RAP_couple_val,
                 out->col_coupleBlock->len * sizeof(double));
 
     delete[] RAP_main_val;
     delete[] RAP_couple_val;
-    if (!Esys_noError()) {
-        out.reset();
-    }
     return out;
 }
 
diff --git a/paso/src/AMG_Prolongation.cpp b/paso/src/AMG_Prolongation.cpp
index c9530f9..b31bf1e 100644
--- a/paso/src/AMG_Prolongation.cpp
+++ b/paso/src/AMG_Prolongation.cpp
@@ -25,9 +25,10 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "SparseMatrix.h"
+#include "Options.h"
 #include "PasoUtil.h"
 #include "Preconditioner.h"
+#include "SparseMatrix.h"
 
 #include <cstring> // memset
 
@@ -58,294 +59,258 @@ SystemMatrix_ptr Preconditioner_AMG_getProlongation(
         const index_t* S, const dim_t n_C, index_t* counter_C,
         const index_t interpolation_method)
 {
-   esysUtils::JMPI& mpi_info=A_p->mpi_info;
-   Distribution_ptr input_dist, output_dist;
-   SharedComponents_ptr send, recv;
-   Connector_ptr col_connector;
-   Pattern_ptr main_pattern, couple_pattern;
-   const dim_t row_block_size=A_p->row_block_size;
-   const dim_t col_block_size=A_p->col_block_size;
-   const dim_t my_n=A_p->mainBlock->numCols;
-   const dim_t overlap_n=A_p->col_coupleBlock->numCols;
-   const dim_t num_threads=omp_get_max_threads();
-   index_t size=mpi_info->size, *dist=NULL;
-   index_t *main_p=NULL, *couple_p=NULL, *main_idx=NULL, *couple_idx=NULL;
-   index_t *shared=NULL, *offsetInShared=NULL;
-   index_t *recv_shared=NULL, *send_shared=NULL;
-   index_t sum, i, j, k, l, p, q, iptr;
-   index_t my_n_C, global_label, num_neighbors;
-   #ifdef ESYS_MPI
-   index_t rank=mpi_info->rank;
-   #endif
-   Esys_MPI_rank *neighbor=NULL;
-   #ifdef ESYS_MPI
-     MPI_Request* mpi_requests=NULL;
-     MPI_Status* mpi_stati=NULL;
-   #else
-     int *mpi_requests=NULL, *mpi_stati=NULL;
-   #endif
-
-   /* number of C points in current distribution */
-   my_n_C = 0;
-   sum=0;
-   if (num_threads>1) {
-     #pragma omp parallel private(i,sum)
-     {
-        sum=0;
-        #pragma omp for schedule(static)
-        for (i=0;i<my_n;++i) {
-          if (counter_C[i] != -1) {
-            sum++;
-          }
-        }
-        #pragma omp critical
+    escript::JMPI mpi_info(A_p->mpi_info);
+    Connector_ptr col_connector;
+    Pattern_ptr main_pattern, couple_pattern;
+    const dim_t row_block_size=A_p->row_block_size;
+    const dim_t col_block_size=A_p->col_block_size;
+    const dim_t my_n=A_p->mainBlock->numCols;
+    const dim_t overlap_n=A_p->col_coupleBlock->numCols;
+#ifdef _OPENMP
+    const int num_threads = omp_get_max_threads();
+#else
+    const int num_threads = 1;
+#endif
+    index_t size=mpi_info->size;
+    index_t *main_idx=NULL, *couple_idx=NULL;
+    index_t i, j, k, l, p, q, iptr;
+#ifdef ESYS_MPI
+    index_t rank = mpi_info->rank;
+#endif
+
+    // number of C points in current distribution
+    dim_t my_n_C = 0;
+    if (num_threads > 1) {
+#pragma omp parallel private(i)
         {
-            my_n_C += sum;
+            dim_t sum = 0;
+#pragma omp for schedule(static)
+            for (i = 0; i < my_n; ++i) {
+                if (counter_C[i] != -1) {
+                    sum++;
+                }
+            }
+#pragma omp critical
+            {
+                my_n_C += sum;
+            }
         }
-     }
-   } else { /* num_threads=1 */
-     for (i=0;i<my_n;++i) {
-         if (counter_C[i] != -1) {
-            my_n_C++;
-         }
-      }
-   }
+    } else { // num_threads=1
+        for (i = 0; i < my_n; ++i) {
+            if (counter_C[i] != -1) {
+                my_n_C++;
+            }
+        }
+    }
 
-   /* create row distribution (output_distribution) and col distribution
-      (input_distribution) */
-   /* ??? should I alloc an new Esys_MPIInfo object or reuse the one in
-      system matrix A. for now, I'm reuse A->mpi_info ??? */
-   dist = A_p->pattern->output_distribution->first_component;
-   output_dist.reset(new Distribution(mpi_info, dist, 1, 0));
-   dist = new index_t[size+1]; /* now prepare for col distribution */
-   #ifdef ESYS_MPI
-   MPI_Allgather(&my_n_C, 1, MPI_INT, dist, 1, MPI_INT, mpi_info->comm);
-   #endif
-   global_label=0;
-   for (i=0; i<size; i++) {
-     k = dist[i];
-     dist[i] = global_label;
-     global_label += k;
-   }
-   dist[size] = global_label;
+    // create row distribution (output_distribution) and col distribution
+    // (input_distribution)
+    escript::Distribution_ptr output_dist(new escript::Distribution(mpi_info,
+                    A_p->pattern->output_distribution->first_component));
+    std::vector<index_t> dist(size+1); // now prepare for col distribution
+#ifdef ESYS_MPI
+    MPI_Allgather(&my_n_C, 1, MPI_DIM_T, &dist[0], 1, MPI_DIM_T, mpi_info->comm);
+#endif
+    index_t global_label = 0;
+    for (i = 0; i < size; i++) {
+        k = dist[i];
+        dist[i] = global_label;
+        global_label += k;
+    }
+    dist[size] = global_label;
 
-   input_dist.reset(new Distribution(mpi_info, dist, 1, 0));
-   delete[] dist;
+    escript::Distribution_ptr input_dist(new escript::Distribution(mpi_info, dist));
 
-   /* create pattern for mainBlock and coupleBlock */
-   main_p = new index_t[my_n+1];
-   couple_p = new index_t[my_n+1];
-     /* count the number of entries per row in the Prolongation matrix :*/
-     #pragma omp parallel for private(i,l,k,iptr,j,p) schedule(static)
-     for (i=0; i<my_n; i++) {
+    // create pattern for mainBlock and coupleBlock
+    index_t* main_p = new index_t[my_n+1];
+    index_t* couple_p = new index_t[my_n+1];
+    // count the number of entries per row in the Prolongation matrix
+#pragma omp parallel for private(i,l,k,iptr,j,p) schedule(static)
+    for (i = 0; i < my_n; i++) {
         l = 0;
         if (counter_C[i]>=0) {
-          k = 1;    /* i is a C unknown */
+            k = 1;    // i is a C unknown
         } else {
-          k = 0;
-          iptr = offset_S[i];
-          for (p=0; p<degree_S[i]; p++) {
-            j = S[iptr+p];  /* this is a strong connection */
-            if (counter_C[j]>=0) { /* and is in C */
-                if (j <my_n) k++;
-                else {
-                  l++;
+            k = 0;
+            iptr = offset_S[i];
+            for (p = 0; p < degree_S[i]; p++) {
+                j = S[iptr+p];  // this is a strong connection
+                if (counter_C[j] >= 0) { // and is in C
+                    if (j < my_n)
+                        k++;
+                    else
+                        l++;
                 }
             }
-          }
         }
         main_p[i] = k;
         couple_p[i] = l;
-     }
+    }
 
-     /* number of unknowns in the col-coupleBlock of the interpolation matrix */
-     sum = 0;
-     for (i=0;i<overlap_n;++i) {
+    // number of unknowns in the col-coupleBlock of the interpolation matrix
+    dim_t sum = 0;
+    for (i = 0; i < overlap_n; ++i) {
         if (counter_C[i+my_n] > -1) {
-          counter_C[i+my_n] -= my_n_C;
-          sum++;
+            counter_C[i+my_n] -= my_n_C;
+            sum++;
         }
-     }
-
-     /* allocate and create index vector for prolongation: */
-     p = util::cumsum(my_n, main_p);
-     main_p[my_n] = p;
-     main_idx = new index_t[p];
-     p = util::cumsum(my_n, couple_p);
-     couple_p[my_n] = p;
-     couple_idx = new index_t[p];
-        #pragma omp parallel for private(i,k,l,iptr,j,p)  schedule(static)
-        for (i=0; i<my_n; i++) {
-          if (counter_C[i]>=0) {
+    }
+
+    // allocate and create index vector for prolongation
+    p = util::cumsum(my_n, main_p);
+    main_p[my_n] = p;
+    main_idx = new index_t[p];
+    p = util::cumsum(my_n, couple_p);
+    couple_p[my_n] = p;
+    couple_idx = new index_t[p];
+#pragma omp parallel for private(i,k,l,iptr,j,p)  schedule(static)
+    for (i = 0; i < my_n; i++) {
+        if (counter_C[i]>=0) {
             main_idx[main_p[i]]=counter_C[i];  /* i is a C unknown */
-          } else {
+        } else {
             k = 0;
             l = 0;
             iptr = offset_S[i];
-            for (p=0; p<degree_S[i]; p++) {
-              j = S[iptr+p]; /* this is a strong connection */
-              if (counter_C[j] >=0) { /* and is in C */
-                if (j < my_n) {
-                  main_idx[main_p[i]+k] = counter_C[j];
-                  k++;
-                } else {
-                  couple_idx[couple_p[i]+l] = counter_C[j];
-                  l++;
+            for (p = 0; p < degree_S[i]; p++) {
+                j = S[iptr+p]; // this is a strong connection
+                if (counter_C[j] >= 0) { // and is in C
+                    if (j < my_n) {
+                        main_idx[main_p[i]+k] = counter_C[j];
+                        k++;
+                    } else {
+                        couple_idx[couple_p[i]+l] = counter_C[j];
+                        l++;
+                    }
                 }
-              }
             }
-          }
         }
+    }
 
-   if (Esys_noError()) {
-     main_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, my_n,
-                        my_n_C, main_p, main_idx));
-     couple_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, my_n,
-                        sum, couple_p, couple_idx));
-   } else {
-     delete[] main_p;
-     delete[] main_idx;
-     delete[] couple_p;
-     delete[] couple_idx;
-   }
-
-   /* prepare the receiver for the col_connector.
-      Note that the allocation for "shared" assumes the send and receive buffer
-      of the interpolation matrix P is no larger than that of matrix A_p. */
-   neighbor = new Esys_MPI_rank[size];
-   offsetInShared = new index_t[size+1];
-   recv = A_p->col_coupler->connector->recv;
-   send = A_p->col_coupler->connector->send;
-   i = recv->numSharedComponents;
-   recv_shared = new index_t[i];
-   memset(recv_shared, 0, sizeof(index_t)*i);
-   k = send->numSharedComponents;
-   send_shared = new index_t[k];
-   if (k > i) i = k;
-   shared = new index_t[i];
-
-   #ifdef ESYS_MPI
-     mpi_requests=new MPI_Request[size*2];
-     mpi_stati=new MPI_Status[size*2];
-   #else
-     mpi_requests=new int[size*2];
-     mpi_stati=new int[size*2];
-   #endif
-
-   for (p=0; p<send->numNeighbors; p++) {
-     i = send->offsetInShared[p];
-     #ifdef ESYS_MPI
-     MPI_Irecv (&(send_shared[i]), send->offsetInShared[p+1]-i, MPI_INT,
-                send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
-                mpi_info->comm, &mpi_requests[p]);
-     #endif
-   }
+    main_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, my_n,
+                                   my_n_C, main_p, main_idx));
+    couple_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, my_n,
+                                     sum, couple_p, couple_idx));
+
+    // prepare the receiver for the col_connector.
+    // Note that the allocation for "shared" assumes the send and receive buffer
+    // of the interpolation matrix P is no larger than that of matrix A_p
+    std::vector<int> neighbour;
+    std::vector<index_t> offsetInShared;
+    SharedComponents_ptr recv(A_p->col_coupler->connector->recv);
+    SharedComponents_ptr send(A_p->col_coupler->connector->send);
+    i = recv->numSharedComponents;
+    index_t* recv_shared = new index_t[i];
+    memset(recv_shared, 0, sizeof(index_t)*i);
+    k = send->numSharedComponents;
+    index_t* send_shared = new index_t[k];
+    if (k > i)
+        i = k;
+    index_t* shared = new index_t[i];
+
+#ifdef ESYS_MPI
+    MPI_Request* mpi_requests = new MPI_Request[size*2];
+    MPI_Status* mpi_stati = new MPI_Status[size*2];
+#endif
+
+    for (p = 0; p < send->neighbour.size(); p++) {
+        i = send->offsetInShared[p];
+#ifdef ESYS_MPI
+        MPI_Irecv(&send_shared[i], send->offsetInShared[p+1]-i, MPI_INT,
+                  send->neighbour[p], mpi_info->counter()+send->neighbour[p],
+                  mpi_info->comm, &mpi_requests[p]);
+#endif
+    }
 
-   num_neighbors = 0;
-   q = 0;
-   p = recv->numNeighbors;
-   offsetInShared[0]=0;
-   for (i=0; i<p; i++) {
-     l = 0;
-     k = recv->offsetInShared[i+1];
-     for (j=recv->offsetInShared[i]; j<k; j++) {
-        if (counter_C[recv->shared[j]] > -1) {
-          shared[q] = my_n_C + q;
-          recv_shared[recv->shared[j]-my_n] = 1;
-          q++;
-          l = 1;
+    q = 0;
+    p = recv->neighbour.size();
+    offsetInShared.push_back(0);
+    for (i=0; i<p; i++) {
+        l = 0;
+        k = recv->offsetInShared[i+1];
+        for (j = recv->offsetInShared[i]; j < k; j++) {
+            if (counter_C[recv->shared[j]] > -1) {
+                shared[q] = my_n_C + q;
+                recv_shared[recv->shared[j]-my_n] = 1;
+                q++;
+                l = 1;
+            }
         }
-     }
-     if (l == 1) {
-        iptr = recv->neighbor[i];
-        neighbor[num_neighbors] = iptr;
-        num_neighbors++;
-        offsetInShared[num_neighbors] = q;
-     }
-     #ifdef ESYS_MPI
-     MPI_Issend(&(recv_shared[recv->offsetInShared[i]]),
-                k-recv->offsetInShared[i], MPI_INT, recv->neighbor[i],
-                mpi_info->msg_tag_counter+rank, mpi_info->comm,
-                &mpi_requests[i+send->numNeighbors]);
-     #endif
-   }
-   recv.reset(new SharedComponents(my_n_C, num_neighbors, neighbor,
-               shared, offsetInShared, 1, 0, mpi_info));
-
-   /* now we can build the sender */
-   #ifdef ESYS_MPI
-   MPI_Waitall(recv->numNeighbors+send->numNeighbors, mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size)
-   delete[] mpi_requests;
-   delete[] mpi_stati;
-
-   num_neighbors = 0;
-   q = 0;
-   p = send->numNeighbors;
-   offsetInShared[0]=0;
-   for (i=0; i<p; i++) {
-     l = 0;
-     k = send->offsetInShared[i+1];
-     for (j=send->offsetInShared[i]; j<k; j++) {
-        if (send_shared[j] == 1) {
-          shared[q] = counter_C[send->shared[j]];
-          q++;
-          l = 1;
+        if (l == 1) {
+            neighbour.push_back(recv->neighbour[i]);
+            offsetInShared.push_back(q);
         }
-     }
-     if (l == 1) {
-        iptr = send->neighbor[i];
-        neighbor[num_neighbors] = iptr;
-        num_neighbors++;
-        offsetInShared[num_neighbors] = q;
-     }
-   }
-
-   send.reset(new SharedComponents(my_n_C, num_neighbors, neighbor,
-               shared, offsetInShared, 1, 0, mpi_info));
-   col_connector.reset(new Connector(send, recv));
-   delete[] recv_shared;
-   delete[] send_shared;
-   delete[] neighbor;
-   delete[] offsetInShared;
-   delete[] shared;
-
-   /* now we need to create the System Matrix
-      TO BE FIXED: at this stage, we only construct col_couple_pattern
-      and col_connector for interpolation matrix P. To be completed,
-      row_couple_pattern and row_connector need to be constructed as well */
-   SystemMatrix_ptr out;
-   SystemMatrixPattern_ptr pattern;
-   if (Esys_noError()) {
-     pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-                output_dist, input_dist, main_pattern, couple_pattern,
-                couple_pattern, col_connector, col_connector));
-     out.reset(new SystemMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
-                                      row_block_size, col_block_size, false));
-   }
+#ifdef ESYS_MPI
+        MPI_Issend(&recv_shared[recv->offsetInShared[i]],
+                   k-recv->offsetInShared[i], MPI_INT, recv->neighbour[i],
+                   mpi_info->counter()+rank, mpi_info->comm,
+                   &mpi_requests[i+send->neighbour.size()]);
+#endif
+    }
+    recv.reset(new SharedComponents(my_n_C, neighbour, shared, offsetInShared));
+
+    // now we can build the sender
+#ifdef ESYS_MPI
+    mpi_info->incCounter(size);
+    MPI_Waitall(recv->neighbour.size()+send->neighbour.size(), mpi_requests, mpi_stati);
+    delete[] mpi_requests;
+    delete[] mpi_stati;
+#endif
+
+    neighbour.clear();
+    offsetInShared.clear();
+    q = 0;
+    p = send->neighbour.size();
+    offsetInShared.push_back(0);
+    for (i = 0; i < p; i++) {
+        l = 0;
+        k = send->offsetInShared[i+1];
+        for (j = send->offsetInShared[i]; j < k; j++) {
+            if (send_shared[j] == 1) {
+                shared[q] = counter_C[send->shared[j]];
+                q++;
+                l = 1;
+            }
+        }
+        if (l == 1) {
+            iptr = send->neighbour[i];
+            neighbour.push_back(iptr);
+            offsetInShared.push_back(q);
+        }
+    }
 
-   /* now fill in the matrix */
-   if (Esys_noError()) {
-     if ((interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
+    send.reset(new SharedComponents(my_n_C, neighbour, shared, offsetInShared));
+    col_connector.reset(new Connector(send, recv));
+    delete[] recv_shared;
+    delete[] send_shared;
+    delete[] shared;
+
+    // now we need to create the System Matrix
+    // TO BE FIXED: at this stage, we only construct col_couple_pattern
+    // and col_connector for interpolation matrix P. To be completed,
+    // row_couple_pattern and row_connector need to be constructed as well
+    SystemMatrixPattern_ptr pattern(new SystemMatrixPattern(
+                MATRIX_FORMAT_DEFAULT, output_dist, input_dist, main_pattern,
+                couple_pattern, couple_pattern, col_connector, col_connector));
+    SystemMatrix_ptr out(new SystemMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK,
+                pattern, row_block_size, col_block_size, false,
+                A_p->getRowFunctionSpace(), A_p->getColumnFunctionSpace()));
+
+    // now fill in the matrix
+    if ((interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
         || ( interpolation_method == PASO_CLASSIC_INTERPOLATION) ) {
         if (row_block_size == 1) {
-          Preconditioner_AMG_setClassicProlongation(out, A_p, offset_S, degree_S, S, counter_C);
+            Preconditioner_AMG_setClassicProlongation(out, A_p, offset_S, degree_S, S, counter_C);
         } else {
-          Preconditioner_AMG_setClassicProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
+            Preconditioner_AMG_setClassicProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
         }
-     } else {
+    } else {
         if (row_block_size == 1) {
-          Preconditioner_AMG_setDirectProlongation(out, A_p, offset_S, degree_S, S, counter_C);
+            Preconditioner_AMG_setDirectProlongation(out, A_p, offset_S, degree_S, S, counter_C);
         } else {
-          Preconditioner_AMG_setDirectProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
+            Preconditioner_AMG_setDirectProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
         }
-     }
-   }
-
-    if (!Esys_noError()) {
-        out.reset();
     }
+
     return out;
 }
 
@@ -376,8 +341,8 @@ void Preconditioner_AMG_setDirectProlongation(SystemMatrix_ptr P,
    index_t range;
 
    dim_t i;
-   register double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp;
-   register index_t iPtr, j, offset;
+   double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp;
+   index_t iPtr, j, offset;
    index_t *where_p, *start_p;
 
    #pragma omp parallel for private(i,offset,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp) schedule(static)
@@ -516,8 +481,8 @@ void Preconditioner_AMG_setDirectProlongation_Block(SystemMatrix_ptr P,
 
    dim_t i;
    double *alpha, *beta, *sum_all_neg, *sum_all_pos, *sum_strong_neg, *sum_strong_pos, *A_ii;
-   register double A_ij, rtmp;
-   register index_t iPtr, j, offset, ib;
+   double A_ij, rtmp;
+   index_t iPtr, j, offset, ib;
    index_t *where_p, *start_p;
 
    #pragma omp parallel private(i,offset,ib,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp)
@@ -754,27 +719,27 @@ void Preconditioner_AMG_setClassicProlongation(SystemMatrix_ptr P,
                     if (where_s == NULL) { /* weak connections are accumulated */
                         a+=A_ij;
                     } else {   /* yes i strongly connected with j */
-                        if  (counter_C[j]>=0)  { /* j is an interpolation point : add A_ij into P */
-                               const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), util::comparIndex);
-                               if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_setClassicProlongation: interpolation point is missing.");
-                               } else {
-                                    const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i);
-                                    main_block->val[offset]+=A_ij;
-                               }
-                          } else {  /* j is not an interpolation point */
-                               /* find all interpolation points m of k */
-                               double s=0.;
-                               len_D_s=0;
-
-                               /* first, the mainBlock part */
-                               range_j = A->mainBlock->pattern->ptr[j + 1];
-                               for (iPtr_j=A->mainBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
-                                    const double A_jm=A->mainBlock->val[iPtr_j];
-                                    const index_t m=A->mainBlock->pattern->index[iPtr_j];
-                                    /* is m an interpolation point ? */
-                                    const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), util::comparIndex);
-                                    if (! (where_p_m==NULL)) {
+                        if (counter_C[j]>=0)  { /* j is an interpolation point : add A_ij into P */
+                            const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), util::comparIndex);
+                            if (where_p == NULL)  {
+                                throw PasoException("Preconditioner_setClassicProlongation: interpolation point is missing.");
+                            } else {
+                                const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i);
+                                main_block->val[offset]+=A_ij;
+                            }
+                        } else {  /* j is not an interpolation point */
+                            /* find all interpolation points m of k */
+                            double s=0.;
+                            len_D_s=0;
+
+                            /* first, the mainBlock part */
+                            range_j = A->mainBlock->pattern->ptr[j + 1];
+                            for (iPtr_j=A->mainBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
+                                const double A_jm=A->mainBlock->val[iPtr_j];
+                                const index_t m=A->mainBlock->pattern->index[iPtr_j];
+                                /* is m an interpolation point ? */
+                                const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), util::comparIndex);
+                                if (! (where_p_m==NULL)) {
                                          const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i);
                                          if (!util::samesign(A_ii, A_jm)) {
                                               D_s[len_D_s]=A_jm;
@@ -783,13 +748,13 @@ void Preconditioner_AMG_setClassicProlongation(SystemMatrix_ptr P,
                                          }
                                          D_s_offset[len_D_s]=offset_m;
                                          len_D_s++;
-                                    }
-                               }
+                                }
+                            }
 
-                               /* then the coupleBlock part */
-                               if (degree_p_couple_i) {
-                                 range_j = A->col_coupleBlock->pattern->ptr[j + 1];
-                                 for (iPtr_j=A->col_coupleBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
+                            /* then the coupleBlock part */
+                            if (degree_p_couple_i) {
+                                range_j = A->col_coupleBlock->pattern->ptr[j + 1];
+                                for (iPtr_j=A->col_coupleBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
                                     const double A_jm=A->col_coupleBlock->val[iPtr_j];
                                     const index_t m=A->col_coupleBlock->pattern->index[iPtr_j];
                                     /* is m an interpolation point ? */
@@ -804,22 +769,22 @@ void Preconditioner_AMG_setClassicProlongation(SystemMatrix_ptr P,
                                          D_s_offset[len_D_s]=offset_m + main_len;
                                          len_D_s++;
                                     }
-                                 }
-                               }
-
-                               for (q=0;q<len_D_s;++q) s+=D_s[q];
-                               if (std::abs(s)>0) {
-                                   s=A_ij/s;
-                                   for (q=0;q<len_D_s;++q) {
-                                        if (D_s_offset[q] < main_len)
-                                          main_block->val[D_s_offset[q]]+=s*D_s[q];
-                                        else
-                                          couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q];
-                                   }
-                               } else {
-                                   a+=A_ij;
-                               }
-                          }
+                                }
+                            }
+
+                            for (q=0;q<len_D_s;++q) s+=D_s[q];
+                            if (std::abs(s)>0) {
+                                s=A_ij/s;
+                                for (q=0;q<len_D_s;++q) {
+                                    if (D_s_offset[q] < main_len)
+                                        main_block->val[D_s_offset[q]]+=s*D_s[q];
+                                    else
+                                        couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q];
+                                }
+                            } else {
+                                a+=A_ij;
+                            }
+                        }
                      }
                  }
               }
@@ -840,7 +805,7 @@ void Preconditioner_AMG_setClassicProlongation(SystemMatrix_ptr P,
                         if  (counter_C[t]>=0)  { /* j is an interpolation point : add A_ij into P */
                                const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), util::comparIndex);
                                if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_AMG_setClassicProlongation: interpolation point is missing.");
+                                   throw PasoException("Preconditioner_AMG_setClassicProlongation: interpolation point is missing.");
                                } else {
                                     const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i);
                                     couple_block->val[offset]+=A_ij;
@@ -989,7 +954,7 @@ void Preconditioner_AMG_setClassicProlongation_Block(
                         if  (counter_C[j]>=0)  { /* j is an interpolation point : add A_ij into P */
                                const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), util::comparIndex);
                                if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
+                                   throw PasoException("Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
                                } else {
                                     const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i);
                                     for (ib=0; ib<row_block; ib++) main_block->val[offset*row_block+ib] +=A_ij[(row_block+1)*ib];
@@ -1079,7 +1044,7 @@ void Preconditioner_AMG_setClassicProlongation_Block(
                         if  (counter_C[t]>=0)  { /* j is an interpolation point : add A_ij into P */
                                const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), util::comparIndex);
                                if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
+                                   throw PasoException("Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
 
                                } else {
                                     const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i);
@@ -1160,7 +1125,7 @@ void Preconditioner_AMG_setClassicProlongation_Block(
 
               /* i has been processed, now we need to do some rescaling */
               for (ib=0; ib<row_block; ib++) {
-                   register double a2=a[ib];
+                   double a2=a[ib];
                    if (std::abs(a2)>0.) {
                         a2=-1./a2;
                         range = main_pattern->ptr[i + 1];
diff --git a/paso/src/AMG_Restriction.cpp b/paso/src/AMG_Restriction.cpp
index 843de2a..9ad3db7 100644
--- a/paso/src/AMG_Restriction.cpp
+++ b/paso/src/AMG_Restriction.cpp
@@ -25,9 +25,9 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "SparseMatrix.h"
 #include "PasoUtil.h"
 #include "Preconditioner.h"
+#include "SparseMatrix.h"
 
 #include <cstring> // memcpy
 
@@ -46,15 +46,14 @@ namespace paso {
 
 SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
 {
-   esysUtils::JMPI& mpi_info=P->mpi_info;
-   Distribution_ptr input_dist, output_dist;
-   SharedComponents_ptr send, recv;
+   escript::JMPI mpi_info(P->mpi_info);
+   escript::Distribution_ptr input_dist, output_dist;
    Connector_ptr col_connector;
    const dim_t row_block_size=P->row_block_size;
    const dim_t col_block_size=P->col_block_size;
    const dim_t n=P->mainBlock->numRows;
    const dim_t n_C=P->mainBlock->numCols;
-   index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
+   index_t size=mpi_info->size, rank=mpi_info->rank;
    index_t *ptr=NULL, *idx=NULL, *degree_set=NULL, *offset_set=NULL;
    index_t *send_ptr=NULL, *recv_ptr=NULL, *recv_idx=NULL;
    index_t *temp=NULL, *where_p=NULL;
@@ -62,8 +61,7 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
    index_t i, j, j_ub, k, p, iptr, iptr_ub, icb, irb;
    index_t block_size, copy_block_size, sum, offset, len, msgs;
    double  *val=NULL, *data_set=NULL, *recv_val=NULL;
-   index_t *shared=NULL, *offsetInShared=NULL;
-   Esys_MPI_rank *neighbor=NULL;
+   index_t *shared=NULL;
    #ifdef ESYS_MPI
      MPI_Request* mpi_requests=NULL;
      MPI_Status* mpi_stati=NULL;
@@ -144,48 +142,48 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
 
    /* send/receive degree_set to build the "ptr" for R->col_coupleBlock */
    msgs = 0;
-   send = P->col_coupler->connector->send;
-   recv = P->col_coupler->connector->recv;
-   recv_ptr = new index_t[send->offsetInShared[send->numNeighbors]];
-   for (p=0; p<send->numNeighbors; p++) {
+   SharedComponents_ptr send(P->col_coupler->connector->send);
+   SharedComponents_ptr recv(P->col_coupler->connector->recv);
+   recv_ptr = new index_t[send->numSharedComponents];
+   for (p=0; p<send->neighbour.size(); p++) {
      i = send->offsetInShared[p];
      j = send->offsetInShared[p+1];
      k = j - i;
      if (k > 0) {
-        #ifdef ESYS_MPI
-        MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+send->neighbor[p],
+#ifdef ESYS_MPI
+        MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbour[p],
+                mpi_info->counter()+send->neighbour[p],
                 mpi_info->comm, &mpi_requests[msgs]);
-        #endif
+#endif
         msgs++;
      }
    }
 
-   for (p=0; p<recv->numNeighbors; p++) {
+   for (p=0; p<recv->neighbour.size(); p++) {
      i = recv->offsetInShared[p];
      j = recv->offsetInShared[p+1];
      k = j - i;
      if (k > 0) {
-        #ifdef ESYS_MPI
-        MPI_Issend(&(degree_set[i]), k, MPI_INT, recv->neighbor[p],
-                mpi_info->msg_tag_counter+rank, mpi_info->comm,
+#ifdef ESYS_MPI
+        MPI_Issend(&degree_set[i], k, MPI_INT, recv->neighbour[p],
+                mpi_info->counter()+rank, mpi_info->comm,
                 &mpi_requests[msgs]);
-        #endif
+#endif
         msgs++;
      }
    }
 
-   #ifdef ESYS_MPI
+#ifdef ESYS_MPI
    MPI_Waitall(msgs, mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, size)
+   mpi_info->incCounter(size);
+#endif
 
    delete[] degree_set;
-   degree_set = new index_t[send->numNeighbors];
-   memset(degree_set, 0, sizeof(index_t)*send->numNeighbors);
-   for (p=0, sum=0; p<send->numNeighbors; p++) {
+   degree_set = new index_t[send->neighbour.size()];
+   memset(degree_set, 0, sizeof(index_t)*send->neighbour.size());
+   for (p=0, sum=0; p<send->neighbour.size(); p++) {
      iptr_ub = send->offsetInShared[p+1];
-     for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++){
+     for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++) {
         degree_set[p] += recv_ptr[iptr];
      }
      sum += degree_set[p];
@@ -196,52 +194,52 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
    msgs = 0;
    recv_idx = new index_t[sum];
    recv_val = new double[sum * block_size];
-   for (p=0, offset=0; p<send->numNeighbors; p++) {
+   for (p=0, offset=0; p<send->neighbour.size(); p++) {
      if (degree_set[p]) {
-        #ifdef ESYS_MPI
-        MPI_Irecv(&(recv_idx[offset]), degree_set[p], MPI_INT,
-                send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
+#ifdef ESYS_MPI
+        MPI_Irecv(&recv_idx[offset], degree_set[p], MPI_INT,
+                send->neighbour[p], mpi_info->counter()+send->neighbour[p],
                 mpi_info->comm, &mpi_requests[msgs]);
         msgs++;
-        MPI_Irecv(&(recv_val[offset*block_size]), degree_set[p] * block_size,
-                MPI_DOUBLE, send->neighbor[p],
-                mpi_info->msg_tag_counter+send->neighbor[p]+size,
+        MPI_Irecv(&recv_val[offset*block_size], degree_set[p] * block_size,
+                MPI_DOUBLE, send->neighbour[p],
+                mpi_info->counter()+send->neighbour[p]+size,
                 mpi_info->comm, &mpi_requests[msgs]);
         offset += degree_set[p];
-        #endif
+#endif
         msgs++;
      }
    }
 
-   for (p=0; p<recv->numNeighbors; p++) {
+   for (p=0; p<recv->neighbour.size(); p++) {
      i = recv->offsetInShared[p];
      j = recv->offsetInShared[p+1];
      k = send_ptr[j] - send_ptr[i];
      if (k > 0) {
         #ifdef ESYS_MPI
-        MPI_Issend(&(offset_set[send_ptr[i]]), k, MPI_INT,
-                recv->neighbor[p], mpi_info->msg_tag_counter+rank,
+        MPI_Issend(&offset_set[send_ptr[i]], k, MPI_INT,
+                recv->neighbour[p], mpi_info->counter()+rank,
                 mpi_info->comm, &mpi_requests[msgs]);
         msgs++;
-        MPI_Issend(&(data_set[send_ptr[i]*block_size]), k*block_size, MPI_DOUBLE,
-                recv->neighbor[p], mpi_info->msg_tag_counter+rank+size,
+        MPI_Issend(&data_set[send_ptr[i]*block_size], k*block_size, MPI_DOUBLE,
+                recv->neighbour[p], mpi_info->counter()+rank+size,
                 mpi_info->comm, &mpi_requests[msgs]);
         #endif
         msgs++;
      }
    }
 
-   len = send->offsetInShared[send->numNeighbors];
+   len = send->numSharedComponents;
    temp = new index_t[len];
    memset(temp, 0, sizeof(index_t)*len);
    for (p=1; p<len; p++) {
      temp[p] = temp[p-1] + recv_ptr[p-1];
    }
 
-   #ifdef ESYS_MPI
+#ifdef ESYS_MPI
    MPI_Waitall(msgs, mpi_requests, mpi_stati);
-   #endif
-   ESYS_MPI_INC_COUNTER(*mpi_info, 2*size)
+   mpi_info->incCounter(2*size);
+#endif
    delete[] degree_set;
    delete[] offset_set;
    delete[] data_set;
@@ -250,15 +248,15 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
    delete[] mpi_stati;
 
    /* construct "ptr", "idx" and "val" for R->col_coupleBlock */
-   ptr = new  index_t[n_C + 1];
-   idx = new  index_t[sum];
-   val = new  double[sum*block_size];
+   ptr = new index_t[n_C + 1];
+   idx = new index_t[sum];
+   val = new double[sum*block_size];
    ptr[0] = 0;
    for (i=0; i<n_C; i++) {
      icb = 0;
-     for (p=0; p<send->numNeighbors; p++) {
+     for (p=0; p<send->neighbour.size(); p++) {
         k = send->offsetInShared[p+1];
-        for (j=send->offsetInShared[p]; j<k; j++) {
+        for (j = send->offsetInShared[p]; j<k; j++) {
           if (send->shared[j] == i) {
             offset = ptr[i] + icb;
             len = recv_ptr[j];
@@ -299,20 +297,19 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
    }
 
    /* prepare the receiver for the col_connector */
-   dist = P->pattern->output_distribution->first_component;
-   offsetInShared = new index_t[size+1];
+   const std::vector<index_t> dist(P->pattern->output_distribution->first_component);
+   std::vector<index_t> offsetInShared(size+1);
    shared = new index_t[num_Rcouple_cols];
-   numNeighbors = send->numNeighbors;
-   neighbor = send->neighbor;
-   memset(offsetInShared, 0, sizeof(index_t) * (size+1));
-   if (num_Rcouple_cols > 0) offset = dist[neighbor[0] + 1];
+   numNeighbors = send->neighbour.size();
+   std::vector<int> neighbour = send->neighbour;
+   if (num_Rcouple_cols > 0) offset = dist[neighbour[0] + 1];
    for (i=0, p=0; i<num_Rcouple_cols; i++) {
      /* cols i is received from rank neighbor[p] when it's still smaller
         than "offset", otherwise, it is received from rank neighbor[p+1] */
      while (recv_idx[i] >= offset) {
         p++;
         offsetInShared[p] = i;
-        offset = dist[neighbor[p] + 1];
+        offset = dist[neighbour[p] + 1];
      }
      shared[i] = i + n;  /* n is the number of cols in R->mainBlock */
    }
@@ -320,25 +317,24 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
    for (i=p; i<numNeighbors; i++) {
      offsetInShared[i+1] = num_Rcouple_cols;
    }
-   recv.reset(new SharedComponents(n, numNeighbors, neighbor, shared,
-               offsetInShared, 1, 0, mpi_info));
+   recv.reset(new SharedComponents(n, neighbour, shared, offsetInShared));
    delete[] recv_idx;
 
    /* prepare the sender for the col_connector */
    delete[] shared;
-   numNeighbors = P->col_coupler->connector->recv->numNeighbors;
-   neighbor = P->col_coupler->connector->recv->neighbor;
+   numNeighbors = P->col_coupler->connector->recv->neighbour.size();
+   neighbour = P->col_coupler->connector->recv->neighbour;
    shared = new index_t[n * numNeighbors];
    Pattern_ptr couple_pattern(P->col_coupleBlock->pattern);
    sum=0;
-   memset(offsetInShared, 0, sizeof(index_t) * (size+1));
-   for (p=0; p<numNeighbors; p++) {
+   offsetInShared.assign(size+1, 0);
+   for (p = 0; p < numNeighbors; p++) {
      j = P->col_coupler->connector->recv->offsetInShared[p];
      j_ub = P->col_coupler->connector->recv->offsetInShared[p+1];
-     for (i=0; i<n; i++) {
+     for (i = 0; i < n; i++) {
         iptr = couple_pattern->ptr[i];
         iptr_ub = couple_pattern->ptr[i+1];
-        for (; iptr<iptr_ub; iptr++) {
+        for (; iptr < iptr_ub; iptr++) {
           k = couple_pattern->index[iptr];
           if (k >= j && k < j_ub) {
             shared[sum] = i;
@@ -349,20 +345,17 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
      }
      offsetInShared[p+1] = sum;
    }
-   send.reset(new SharedComponents(n, numNeighbors, neighbor, shared,
-               offsetInShared, 1, 0, mpi_info));
+   send.reset(new SharedComponents(n, neighbour, shared, offsetInShared));
 
-   /* build the col_connector based on sender and receiver */
+   // build the col_connector based on sender and receiver
    col_connector.reset(new Connector(send, recv));
-   delete[] offsetInShared;
    delete[] shared;
 
    couple_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, n_C,
                         num_Rcouple_cols, ptr, idx));
 
-   input_dist.reset(new Distribution(mpi_info, dist, 1, 0));
-   dist = P->pattern->input_distribution->first_component;
-   output_dist.reset(new Distribution(mpi_info, dist, 1, 0));
+   input_dist.reset(new escript::Distribution(mpi_info, dist));
+   output_dist.reset(new escript::Distribution(mpi_info, P->pattern->input_distribution->first_component));
 
     /* now we need to create the System Matrix
        TO BE FIXED: at this stage, we only construction col_couple_pattern
@@ -370,13 +363,12 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
        row_couple_pattern and row_connector need to be constructed as well */
     SystemMatrix_ptr out;
     SystemMatrixPattern_ptr pattern;
-    if (Esys_noError()) {
-        pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-                  output_dist, input_dist, main_block->pattern, couple_pattern,
-                  couple_pattern, col_connector, col_connector));
-        out.reset(new SystemMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
-                  row_block_size, col_block_size, false));
-    }
+    pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
+              output_dist, input_dist, main_block->pattern, couple_pattern,
+              couple_pattern, col_connector, col_connector));
+    out.reset(new SystemMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
+              row_block_size, col_block_size, false,
+              P->getRowFunctionSpace(), P->getColumnFunctionSpace()));
 
     /* now fill in the matrix */
     memcpy(out->mainBlock->val, main_block->val,
@@ -384,10 +376,6 @@ SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
     memcpy(out->col_coupleBlock->val, val,
                 out->col_coupleBlock->len * sizeof(double));
     delete[] val;
-
-    if (!Esys_noError()) {
-        out.reset();
-    }
     return out;
 }
 
diff --git a/paso/src/AMG_Root.cpp b/paso/src/AMG_Root.cpp
index 773efe3..45ea0a6 100644
--- a/paso/src/AMG_Root.cpp
+++ b/paso/src/AMG_Root.cpp
@@ -25,10 +25,12 @@
 
 /****************************************************************************/
 
-#include <iostream>
 #include "Paso.h"
-#include "Preconditioner.h"
 #include "BOOMERAMG.h"
+#include "Options.h"
+#include "Preconditioner.h"
+
+#include <iostream>
 
 namespace paso {
 
@@ -57,66 +59,58 @@ Preconditioner_AMG_Root* Preconditioner_AMG_Root_alloc(SystemMatrix_ptr A,
         prec->is_local = (A->mpi_info->size == 1) || options->use_local_preconditioner;
         if (prec->is_local) {
             prec->localamg = Preconditioner_LocalAMG_alloc(A->mainBlock, 1, options);
-            Esys_MPIInfo_noError(A->mpi_info);
         } else {
             prec->amg = Preconditioner_AMG_alloc(A, 1, options);
         }
     }
-    if (Esys_noError()) {
-        if (options->verbose) {
-            if (prec->localamg || prec->amg || prec->boomeramg) {
-                std::cout << "Preconditioner_AMG_Root:  Smoother is ";
-                if (options->smoother == PASO_JACOBI) {
-                    std::cout << "Jacobi";
-                } else {
-                    std::cout << "Gauss-Seidel";
-                }
-                std::cout << " with " << options->pre_sweeps << "/"
-                    << options->post_sweeps << " pre/post sweeps";
-                if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION) {
-                    std::cout << " and classical interpolation.";
-                } else if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) {
-                    std::cout << " and classical interpolation with enforced FF coupling.";
-                } else {
-                    std::cout << " and direct interpolation.";
-                }
-                std::cout << std::endl;
+    if (options->verbose) {
+        if (prec->localamg || prec->amg || prec->boomeramg) {
+            std::cout << "Preconditioner_AMG_Root:  Smoother is ";
+            if (options->smoother == PASO_JACOBI) {
+                std::cout << "Jacobi";
             } else {
-                std::cout << "Preconditioner_AMG_Root:  no coarsening constructed." << std::endl;
+                std::cout << "Gauss-Seidel";
             }
-        } // verbose?
-
-
-        if (prec->localamg != NULL) {
-            options->num_level=Preconditioner_LocalAMG_getMaxLevel(prec->localamg);
-            options->coarse_level_sparsity=Preconditioner_LocalAMG_getCoarseLevelSparsity(prec->localamg);
-            options->num_coarse_unknowns=Preconditioner_LocalAMG_getNumCoarseUnknowns(prec->localamg);
-        } else if (prec->amg != NULL) {
-            options->num_level=Preconditioner_AMG_getMaxLevel(prec->amg);
-            options->coarse_level_sparsity=Preconditioner_AMG_getCoarseLevelSparsity(prec->amg);
-            options->num_coarse_unknowns=Preconditioner_AMG_getNumCoarseUnknowns(prec->amg);
-        } else if (prec->boomeramg == NULL) {
-            prec->sweeps=options->sweeps;
-            prec->amgsubstitute=Preconditioner_Smoother_alloc(A, (options->smoother == PASO_JACOBI), prec->is_local, options->verbose);
-            options->num_level=0;
-            if (options->verbose) {
-                if (options->smoother == PASO_JACOBI) {
-                    std::cout << "Preconditioner: Jacobi(" << prec->sweeps
-                        << ") preconditioner is used." << std::endl;
-                } else {
-                    std::cout << "Preconditioner: Gauss-Seidel("
-                        << prec->sweeps << ") preconditioner is used."
-                        << std::endl;
-                }
+            std::cout << " with " << options->pre_sweeps << "/"
+                << options->post_sweeps << " pre/post sweeps";
+            if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION) {
+                std::cout << " and classical interpolation.";
+            } else if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) {
+                std::cout << " and classical interpolation with enforced FF coupling.";
+            } else {
+                std::cout << " and direct interpolation.";
+            }
+            std::cout << std::endl;
+        } else {
+            std::cout << "Preconditioner_AMG_Root:  no coarsening constructed." << std::endl;
+        }
+    } // verbose?
+
+
+    if (prec->localamg != NULL) {
+        options->num_level=Preconditioner_LocalAMG_getMaxLevel(prec->localamg);
+        options->coarse_level_sparsity=Preconditioner_LocalAMG_getCoarseLevelSparsity(prec->localamg);
+        options->num_coarse_unknowns=Preconditioner_LocalAMG_getNumCoarseUnknowns(prec->localamg);
+    } else if (prec->amg != NULL) {
+        options->num_level=Preconditioner_AMG_getMaxLevel(prec->amg);
+        options->coarse_level_sparsity=Preconditioner_AMG_getCoarseLevelSparsity(prec->amg);
+        options->num_coarse_unknowns=Preconditioner_AMG_getNumCoarseUnknowns(prec->amg);
+    } else if (prec->boomeramg == NULL) {
+        prec->sweeps=options->sweeps;
+        prec->amgsubstitute=Preconditioner_Smoother_alloc(A, (options->smoother == PASO_JACOBI), prec->is_local, options->verbose);
+        options->num_level=0;
+        if (options->verbose) {
+            if (options->smoother == PASO_JACOBI) {
+                std::cout << "Preconditioner: Jacobi(" << prec->sweeps
+                    << ") preconditioner is used." << std::endl;
+            } else {
+                std::cout << "Preconditioner: Gauss-Seidel("
+                    << prec->sweeps << ") preconditioner is used."
+                    << std::endl;
             }
         }
     }
-    if (!Esys_noError() ){
-        Preconditioner_AMG_Root_free(prec);
-        return NULL;
-    } else {
-        return prec;
-    }
+    return prec;
 }
 
 /* Applies the preconditioner. */
diff --git a/paso/src/AML.cpp.old b/paso/src/AML.cpp.old
deleted file mode 100644
index 48a7770..0000000
--- a/paso/src/AML.cpp.old
+++ /dev/null
@@ -1,918 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************/
-
-/* Paso: AML preconditioner:
-
-   This is just a collection of older code. This does not compile.
-*/
-
-/****************************************************************************/
-
-/* Author: artak at uq.edu.au                                */
-
-/****************************************************************************/
-
-#include "Paso.h"
-#include "Preconditioner.h"
-#include "Options.h"
-#include "PasoUtil.h"
-#include "UMFPACK.h"
-#include "MKL.h"
-#include "SystemMatrix.h"
-#include "Coarsening.h"
-#include "BlockOps.h"
-
-#ifndef INC_COARSE
-#define INC_COARSE
-
-#include "SystemMatrix.h"
-
-
-/* Remove:
-#define PASO_COARSENING_IN_F TRUE
-#define PASO_COARSENING_IN_C FALSE
-*/
-
-void Coarsening_Local(index_t* marker_F, SparseMatrix* A,  Options* options);
-void Coarsening_Local_Aggregation(SparseMatrix* A, index_t* marker_F, const double theta);
-void Coarsening_Local_Aggregation_blk(SparseMatrix* A, index_t* marker_F, const double theta);
-void Coarsening_Local_YS(SparseMatrix* A, index_t* marker_F, const double theta);
-void Coarsening_Local_YS_blk(SparseMatrix* A, index_t* marker_F, const double theta);
-
-void Coarsening_Local_RS(SparseMatrix* A, index_t* marker_F, double theta);
-
-void Coarsening_Local_Partition(Pattern* pattern,index_t* marker_F);
-void Coarsening_Local_greedy(Pattern* pattern, index_t* marker_F);
-
-
-
-/*=== REVISE ============*/
-void Coarsening_Local_greedy_color(Pattern* pattern, index_t* marker_F);
-void Coarsening_Local_greedy_diag(SparseMatrix* A, index_t* marker_F, double thershold);
-
-void Coarsening_Local_YS_plus(SparseMatrix* A, index_t* marker_F, double alpha, double taw, double delta);
-void Coarsening_Local_Standard(SparseMatrix* A, index_t* marker_F, double theta);
-void Coarsening_Local_greedy_RS(SparseMatrix* A, index_t* marker_F, double theta);
-void Coarsening_Local_greedy_Agg(SparseMatrix* A, index_t* marker_F, double theta);
-/*dim_t how_many(dim_t n,dim_t* S_i, int value1, dim_t* addedSet, int value2);*/
-void Coarsening_Local_Standard_Block(SparseMatrix* A, index_t* marker_F, double theta);
-
-dim_t how_many(dim_t i,Pattern * S, bool_t transpose);
-dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask);
-Pattern* Coarsening_Local_getTranspose(Pattern* P);
-
-void Coarsening_Local_getReport(dim_t n,index_t* marker_F);
-void Coarsening_Local_Read(char *fileName,dim_t n,index_t* marker_F);
-void Coarsening_Local_Write(char *fileName,dim_t n,index_t* marker_F);
-
-
-#endif
-
-
-/***********************************************************************************,amli->b_F*/
-
-/* free all memory used by AMLI                                */
-
-void Solver_AMLI_System_free(Solver_AMLI_System * in) {
-     dim_t i;
-     if (in!=NULL) {
-        for (i=0;i<in->block_size;++i) {
-          Solver_AMLI_free(in->amliblock[i]);
-          SparseMatrix_free(in->block[i]);
-        }
-        delete in;
-     }
-}
-
-
-/* free all memory used by AMLI                                */
-
-void Solver_AMLI_free(Solver_AMLI * in) {
-     if (in!=NULL) {
-        Preconditioner_LocalSmoother_free(in->Smoother);
-
-        delete[] in->inv_A_FF;
-        delete[] in->A_FF_pivot;
-        SparseMatrix_free(in->A_FC);
-        SparseMatrix_free(in->A_CF);
-        SparseMatrix_free(in->A);
-        if(in->coarsest_level==TRUE) {
-        #ifdef MKL
-          MKL_free1(in->AOffset1);
-          SparseMatrix_free(in->AOffset1);
-        #else
-          #ifdef UMFPACK
-          UMFPACK1_free((UMFPACK_Handler*)(in->solver));
-          #endif
-        #endif
-        }
-        delete[] in->rows_in_F;
-        delete[] in->rows_in_C;
-        delete[] in->mask_F;
-        delete[] in->mask_C;
-        delete[] in->x_F;
-        delete[] in->b_F;
-        delete[] in->x_C;
-        delete[] in->b_C;
-        in->solver=NULL;
-        Solver_AMLI_free(in->AMLI_of_Schur);
-        delete[] in->b_C;
-        delete in;
-     }
-}
-
-/************************************************************************************/
-
-/*   constructs the block-block factorization of
-
-        [ A_FF A_FC ]
-   A_p=
-        [ A_CF A_FF ]
-
-to
-
-  [      I         0  ]  [ A_FF 0 ] [ I    invA_FF*A_FF ]
-  [ A_CF*invA_FF   I  ]  [   0  S ] [ 0          I      ]
-
-
-   where S=A_FF-ACF*invA_FF*A_FC within the shape of S
-
-   then AMLI is applied to S again until S becomes empty
-
-*/
-Solver_AMLI* Solver_getAMLI(SparseMatrix *A_p,dim_t level,Options* options) {
-  Solver_AMLI* out=NULL;
-  Pattern* temp1=NULL;
-  Pattern* temp2=NULL;
-  bool_t verbose=options->verbose;
-  dim_t n=A_p->numRows;
-  dim_t n_block=A_p->row_block_size;
-  index_t* mis_marker=NULL;
-  index_t* counter=NULL;
-  index_t iPtr,*index, *where_p, iPtr_s;
-  dim_t i,j;
-  SparseMatrix * schur=NULL;
-  SparseMatrix * schur_withFillIn=NULL;
-  double S=0;
-
- /* char filename[8];
-  sprintf(filename,"AMLILevel%d",level);
-
- SparseMatrix_saveMM(A_p,filename);
-  */
-
-  /*Make sure we have block sizes 1*/
-  if (A_p->col_block_size>1) {
-     Esys_setError(TYPE_ERROR,"Solver_getAMLI: AMLI requires column block size 1.");
-     return NULL;
-  }
-  if (A_p->row_block_size>1) {
-     Esys_setError(TYPE_ERROR,"Solver_getAMLI: AMLI requires row block size 1.");
-     return NULL;
-  }
-  out=new Solver_AMLI;
-  /* identify independent set of rows/columns */
-  mis_marker=new index_t[n];
-  counter=new index_t[n];
-  if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) {
-     out->AMLI_of_Schur=NULL;
-     out->inv_A_FF=NULL;
-     out->A_FF_pivot=NULL;
-     out->A_FC=NULL;
-     out->A_CF=NULL;
-     out->rows_in_F=NULL;
-     out->rows_in_C=NULL;
-     out->mask_F=NULL;
-     out->mask_C=NULL;
-     out->x_F=NULL;
-     out->b_F=NULL;
-     out->x_C=NULL;
-     out->b_C=NULL;
-     out->A=SparseMatrix_getReference(A_p);
-     out->Smoother=NULL;
-     out->solver=NULL;
-     /*out->GS=Solver_getGS(A_p,verbose);*/
-     out->level=level;
-     out->n=n;
-     out->n_F=n+1;
-     out->n_block=n_block;
-     out->post_sweeps=options->post_sweeps;
-     out->pre_sweeps=options->pre_sweeps;
-
-     if (level==0 || n<=options->min_coarse_matrix_size) {
-         out->coarsest_level=TRUE;
-         #ifdef MKL
-                  out->AOffset1=SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
-                  #pragma omp parallel for private(i) schedule(static)
-                  for (i=0;i<out->A->len;++i) {
-                       out->AOffset1->val[i]=out->A->val[i];
-                  }
-         #else
-            #ifdef UMFPACK
-            #else
-            out->Smoother=Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
-            #endif
-         #endif
-     } else {
-         out->coarsest_level=FALSE;
-         out->Smoother=Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
-
-         Coarsening_Local(mis_marker,A_p, options->coarsening_threshold, options->coarsening_method);
-
-        #pragma omp parallel for private(i) schedule(static)
-        for (i = 0; i < n; ++i) {
-           mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
-           counter[i]=mis_marker[i];
-
-        }
-
-        out->n_F=Util_cumsum(n,counter);
-
-        if (out->n_F==0) {
-           out->coarsest_level=TRUE;
-           level=0;
-           if (verbose) {
-               /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
-               printf("AMLI coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
-           }
-        }
-        else if (out->n_F==n) {
-          out->coarsest_level=TRUE;
-           level=0;
-           if (verbose) {
-               /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
-               printf("AMLI coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
-
-            }
-        } else {
-              if (Esys_noError()) {
-                 /*#pragma omp parallel for private(i) schedule(static)
-                 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
-                 out->n_F=Util_cumsum(n,counter);
-                 */
-
-                 /*if(level==3) {
-                   printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
-                   for (i = 0; i < n; ++i) {
-                    printf("##%d %d\n",i,mis_marker[i]);
-                   }
-                 }*/
-
-                 out->mask_F=new index_t[n];
-                 out->rows_in_F=new index_t[out->n_F];
-                 out->inv_A_FF=new double[n_block*n_block*out->n_F];
-                 out->A_FF_pivot=NULL; /* later use for block size>3 */
-                 if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->inv_A_FF) || Esys_checkPtr(out->rows_in_F) ) ) {
-                    /* creates an index for F from mask */
-                    #pragma omp parallel for private(i) schedule(static)
-                    for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
-                    #pragma omp parallel for private(i) schedule(static)
-                    for (i = 0; i < n; ++i) {
-                       if  (mis_marker[i]) {
-                              out->rows_in_F[counter[i]]=i;
-                              out->mask_F[i]=counter[i];
-                       } else {
-                              out->mask_F[i]=-1;
-                       }
-                    }
-
-                    /* Compute row-sum for getting rs(A_FF)^-1*/
-                    #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
-                    for (i = 0; i < out->n_F; ++i) {
-                      S=0;
-      /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
-                      for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
-                       j=A_p->pattern->index[iPtr];
-      /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
-                       if (mis_marker[j])
-                           S+=A_p->val[iPtr];
-                      }
-      /*printf("-> %e \n",S);*/
-                      out->inv_A_FF[i]=1./S;
-                    }
-                 }
-              }
-
-              /*check whether coarsening process actually makes sense to continue.
-              if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
-              if ((out->n_F*100/n)<30) {
-                    level=1;
-               }
-
-              if ( Esys_noError()) {
-                    /* if there are no nodes in the coarse level there is no more work to do */
-                    out->n_C=n-out->n_F;
-
-                   /*if (out->n_F>500) */
-                    out->rows_in_C=new index_t[out->n_C];
-                    out->mask_C=new index_t[n];
-                    if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) {
-                         /* creates an index for C from mask */
-                         #pragma omp parallel for private(i) schedule(static)
-                         for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
-                         Util_cumsum(n,counter);
-                         #pragma omp parallel for private(i) schedule(static)
-                         for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
-                         #pragma omp parallel for private(i) schedule(static)
-                         for (i = 0; i < n; ++i) {
-                                  if  (! mis_marker[i]) {
-                                      out->rows_in_C[counter[i]]=i;
-                                      out->mask_C[i]=counter[i];
-                                   } else {
-                                      out->mask_C[i]=-1;
-                                   }
-                         }
-                    }
-              }
-              if ( Esys_noError()) {
-                      /* get A_CF block: */
-                      out->A_CF=SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
-                      /* get A_FC block: */
-                      out->A_FC=SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
-                      /* get A_CC block: */
-                      schur=SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
-              }
-              if ( Esys_noError()) {
-                     /*find the pattern of the schur complement with fill in*/
-                    temp1=Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
-                    temp2=Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
-                    schur_withFillIn=SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
-                    Pattern_free(temp1);
-                    Pattern_free(temp2);
-              }
-              if ( Esys_noError()) {
-                    /* copy values over*/
-                    #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
-                    for (i = 0; i < schur_withFillIn->numRows; ++i) {
-                      for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
-                         j=schur_withFillIn->pattern->index[iPtr];
-                         iPtr_s=schur->pattern->ptr[i];
-                         index=&(schur->pattern->index[iPtr_s]);
-                         where_p=(index_t*)bsearch(&j,
-                                              index,
-                                              schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
-                                              sizeof(index_t),
-                                              comparIndex);
-                         if (where_p!=NULL) {
-                                schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
-                         }
-                       }
-                    }
-                    Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
-                    out->AMLI_of_Schur=Solver_getAMLI(schur_withFillIn,level-1,options);
-              }
-              /* allocate work arrays for AMLI application */
-              if (Esys_noError()) {
-                         out->x_F=new double[n_block*out->n_F];
-                         out->b_F=new double[n_block*out->n_F];
-                         out->x_C=new double[n_block*out->n_C];
-                         out->b_C=new double[n_block*out->n_C];
-
-                         if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {
-                             #pragma omp parallel for private(i) schedule(static)
-                             for (i = 0; i < out->n_F; ++i) {
-                                         out->x_F[i]=0.;
-                                         out->b_F[i]=0.;
-                              }
-                              #pragma omp parallel for private(i) schedule(static)
-                              for (i = 0; i < out->n_C; ++i) {
-                                     out->x_C[i]=0.;
-                                     out->b_C[i]=0.;
-                              }
-                         }
-              }
-            SparseMatrix_free(schur);
-            SparseMatrix_free(schur_withFillIn);
-         }
-     }
-  }
-  delete[] mis_marker;
-  delete[] counter;
-
-  if (Esys_noError()) {
-      if (verbose && level>0 && !out->coarsest_level) {
-         printf("AMLI: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
-     }
-     return out;
-  } else  {
-     Solver_AMLI_free(out);
-     return NULL;
-  }
-}
-
-/************************************************************************************/
-
-/* apply AMLI precondition b-> x
-
-     in fact it solves
-
-  [      I         0  ]  [ A_FF 0 ] [ I    invA_FF*A_FC ]  [ x_F ]  = [b_F]
-  [ A_CF*invA_FF   I  ]  [   0  S ] [ 0          I      ]  [ x_C ]  = [b_C]
-
- in the form
-
-   b->[b_F,b_C]
-   x_F=invA_FF*b_F
-   b_C=b_C-A_CF*x_F
-   x_C=AMLI(b_C)
-   b_F=b_F-A_FC*x_C
-   x_F=invA_FF*b_F
-   x<-[x_F,x_C]
-
- should be called within a parallel region
- barrier synchronisation should be performed to make sure that the input vector available
-
-*/
-
-void Solver_solveAMLI(Solver_AMLI * amli, double * x, double * b) {
-     dim_t i;
-     double time0=0;
-     double *r=NULL, *x0=NULL,*x_F_temp=NULL;
-     bool_t verbose=0;
-
-     dim_t post_sweeps=amli->post_sweeps;
-     dim_t pre_sweeps=amli->pre_sweeps;
-
-     #ifdef UMFPACK
-          UMFPACK_Handler * ptr=NULL;
-     #endif
-
-     r=new double[amli->n];
-     x0=new double[amli->n];
-     x_F_temp=new double[amli->n_F];
-
-     if (amli->coarsest_level) {
-
-      time0=Esys_timer();
-      /*If all unknown are eliminated then Jacobi is the best preconditioner*/
-      if (amli->n_F==0 || amli->n_F==amli->n) {
-         Preconditioner_LocalSmoother_solve(amli->A, amli->Smoother,x,b,1,FALSE);
-      }
-       else {
-       #ifdef MKL
-          MKL1(amli->AOffset1,x,b,verbose);
-       #else
-          #ifdef UMFPACK
-             ptr=(UMFPACK_Handler *)(amli->solver);
-             UMFPACK1(&ptr,amli->A,x,b,verbose);
-             amli->solver=(void*) ptr;
-          #else
-          Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,1,FALSE);
-         #endif
-       #endif
-       }
-       time0=Esys_timer()-time0;
-       if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
-
-     } else {
-        /* presmoothing */
-         time0=Esys_timer();
-         Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,pre_sweeps,FALSE);
-         time0=Esys_timer()-time0;
-         if (verbose) fprintf(stderr,"timing: Presmoothing: %e\n",time0);
-        /* end of presmoothing */
-
-         time0=Esys_timer();
-         #pragma omp parallel for private(i) schedule(static)
-         for (i=0;i<amli->n;++i) r[i]=b[i];
-
-         /*r=b-Ax*/
-         SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A,x,1.,r);
-
-        /* r->[b_F,b_C]     */
-        #pragma omp parallel for private(i) schedule(static)
-        for (i=0;i<amli->n_F;++i) amli->b_F[i]=r[amli->rows_in_F[i]];
-
-        #pragma omp parallel for private(i) schedule(static)
-        for (i=0;i<amli->n_C;++i) amli->b_C[i]=r[amli->rows_in_C[i]];
-
-        /* x_F=invA_FF*b_F  */
-        Copy(amli->n_F, amli->x_F,amli->b_F);
-        BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,amli->x_F);
-
-        /* b_C=b_C-A_CF*x_F */
-        SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_CF,amli->x_F,1.,amli->b_C);
-
-        time0=Esys_timer()-time0;
-        if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
-
-        /* x_C=AMLI(b_C)     */
-        Solver_solveAMLI(amli->AMLI_of_Schur,amli->x_C,amli->b_C);
-
-        time0=Esys_timer();
-
-        /* b_F=-A_FC*x_C */
-        SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_FC,amli->x_C,0.,amli->b_F);
-        /* x_F_temp=invA_FF*b_F  */
-        Copy(amli->n_F, x_F_temp,amli->b_F);
-        BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,x_F_temp);
-
-        #pragma omp parallel for private(i) schedule(static)
-        for (i=0;i<amli->n_F;++i) {
-                 amli->x_F[i]+=x_F_temp[i];
-        }
-
-        /* x<-[x_F,x_C]     */
-        #pragma omp parallel for private(i) schedule(static)
-        for (i=0;i<amli->n;++i) {
-            if (amli->mask_C[i]>-1) {
-                 x[i]+=amli->x_C[amli->mask_C[i]];
-            } else {
-                 x[i]+=amli->x_F[amli->mask_F[i]];
-            }
-        }
-
-        time0=Esys_timer()-time0;
-        if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
-
-     /*postsmoothing*/
-     time0=Esys_timer();
-     Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,post_sweeps,TRUE);
-     time0=Esys_timer()-time0;
-     if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
-
-     /*end of postsmoothing*/
-
-     }
-     delete[] r;
-     delete[] x0;
-     delete[] x_F_temp;
-     return;
-}
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/***********************************************************************************
-
-Paso: Coarsening strategies (no MPI)
-
-the given matrix A is split in to the form
-
-
-[ A_FF A_FC ]
-A  =  [           ]
-[ A_CF A_CC ]
-
-such that unknowns/equations in set C are weakly connected via A_CC and strongly connected
-to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F
-are marked in the marker_F by FALSE and TRUE respectively.
-
-The weak/strong connection is controlled by coarsening_threshold.
-
-three strategies are implemented:
-
-a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii|
-b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik|
-c) Aggregation :     |a_ij|^2 >= theta**2 * |a_ii||a_jj|
-
-where theta = coarsening_threshold/maximum_pattern_degree
-
-Remark:
-
-- a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation
-
-*/
-/************************************************************************************
-
-Author: artak at uq.edu.au, l.gross at uq.edu.au
-
-************************************************************************************/
-
-#include "Coarsening.h"
-#include "PasoUtil.h"
-#include <limits.h>
-
-
-
-/*Used in Coarsening_Local_RS_MI*/
-
-/*Computes how many connections unknown i have in S.
-bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise.
-Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE.
-*/
-dim_t how_many(dim_t i,Pattern * S, bool_t transpose) {
-   dim_t j,n;
-   dim_t total,ltotal;
-   index_t *index,*where_p;
-
-   /*index_t iptr;*/
-   total=0;
-   ltotal=0;
-
-   n=S->numOutput;
-
-   if(transpose) {
-      #pragma omp parallel
-      {
-         ltotal=0;
-         #pragma omp for private(j,index,where_p,ltotal) schedule(static)
-         for (j=0;j<n;++j) {
-            index=&(S->index[S->ptr[j]]);
-            where_p=(index_t*)bsearch(&i,
-                                      index,
-                                      S->ptr[j + 1]-S->ptr[j],
-                                      sizeof(index_t),
-                                      comparIndex);
-                                      if (where_p!=NULL) {
-                                         ltotal++;
-                                      }
-         }
-      }
-      #pragma omp critical
-      {
-         total+=ltotal;
-      }
-
-   }
-   else {
-      total=S->ptr[i+1]-S->ptr[i];
-      /*#pragma omp parallel for private(iptr) schedule(static)*/
-      /*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
-      if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE)
-         total++;
-   }*/
-
-   }
-
-   if (total==0) total=IS_NOT_AVAILABLE;
-
-   return total;
-}
-
-
-
-
-
-Pattern* Coarsening_Local_getTranspose(Pattern* P)
-{
-    Pattern *outpattern=NULL;
-    dim_t C=P->numInput;
-    dim_t F=P->numOutput-C;
-    dim_t n=C+F;
-    dim_t i,j;
-    index_t iptr;
-    IndexListArray index_list(C);
-
-   /*#pragma omp parallel for private(i,iptr,j) schedule(static)*/
-    for (i=0; i<n; ++i) {
-        for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) {
-            j=P->index[iptr];
-            index_list[i].insertIndex(j);
-        }
-    }
-
-    outpattern=Pattern_fromIndexListArray(0, index_list, 0, n, 0);
-    return outpattern;
-}
-
-
-/************** BLOCK COARSENING *********************/
-
-void Coarsening_Local_Standard_Block(SparseMatrix* A, index_t* marker_F, double theta)
-{
-   const dim_t n=A->numRows;
-
-   dim_t i,j,k;
-   index_t iptr,jptr;
-   /*index_t *index,*where_p;*/
-   double threshold,max_offdiagonal;
-   dim_t *lambda;   /*measure of importance */
-   dim_t maxlambda=0;
-   index_t index_maxlambda=0;
-   double time0=0;
-   bool_t verbose=0;
-   dim_t n_block=A->row_block_size;
-
-   double fnorm=0;
-   dim_t bi;
-
-   Pattern *S=NULL;
-   Pattern *ST=NULL;
-   IndexListArray index_list(n);
-
-   time0=Esys_timer();
-   k=0;
-   /*Coarsening_Local_getReport(n,marker_F);*/
-   /*printf("Blocks %d %d\n",n_block,A->len);*/
-
-   /*S_i={j \in N_i; i strongly coupled to j}*/
-   #pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static)
-   for (i=0;i<n;++i) {
-      if(marker_F[i]==IS_AVAILABLE) {
-         max_offdiagonal = DBL_MIN;
-         for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-            j=A->pattern->index[iptr];
-            if( j != i){
-               fnorm=0;
-               for(bi=0;bi<n_block*n_block;++bi)
-               {
-                  fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
-               }
-               fnorm=sqrt(fnorm);
-               max_offdiagonal = MAX(max_offdiagonal,fnorm);
-            }
-         }
-         threshold = theta*max_offdiagonal;
-         for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-            j=A->pattern->index[iptr];
-            if( j != i){
-               fnorm=0;
-               for(bi=0;bi<n_block*n_block;++bi)
-               {
-                  fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
-               }
-               fnorm=sqrt(fnorm);
-               if(fnorm>=threshold) {
-                  index_list[i].insertIndex(j);
-               }
-            }
-         }
-      }
-   }
-
-   S=Pattern_fromIndexListArray(0, index_list, 0, A->pattern->numInput, 0);
-   ST=Coarsening_Local_getTranspose(S);
-
-   /*printf("Patterns len %d %d\n",S->len,ST->len);*/
-
-   time0=Esys_timer()-time0;
-   if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0);
-
-   lambda=new dim_t[n];
-
-   #pragma omp parallel for private(i) schedule(static)
-   for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; }
-
-   /*S_i={j \in N_i; i strongly coupled to j}*/
-
-   /*
-   #pragma omp parallel for private(i,iptr,lk) schedule(static)
-   for (i=0;i<n;++i) {
-      lk=0;
-      for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-         if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i )
-            lk++;
-}
-#pragma omp critical
-k+=lk;
-if(k==0) {
-   marker_F[i]=TRUE;
-}
-}
-*/
-
-   k=0;
-   maxlambda=0;
-   time0=Esys_timer();
-
-   for (i=0;i<n;++i) {
-      if(marker_F[i]==IS_AVAILABLE) {
-         lambda[i]=how_many(k,ST,FALSE);   /* if every row is available then k and i are the same.*/
-         /*lambda[i]=how_many(i,S,TRUE);*/
-         /*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/
-         k++;
-         if(maxlambda<lambda[i]) {
-            maxlambda=lambda[i];
-            index_maxlambda=i;
-         }
-      }
-   }
-
-   k=0;
-   time0=Esys_timer()-time0;
-   if (verbose) fprintf(stdout,"timing: Lambda computations at the beginning: %e\n",time0);
-
-   time0=Esys_timer();
-
-   /*Coarsening_Local_getReport(n,marker_F);*/
-
-   while (Util_isAny(n,marker_F,IS_AVAILABLE)) {
-      if(index_maxlambda<0) {
-         break;
-      }
-
-      i=index_maxlambda;
-      if(marker_F[i]==IS_AVAILABLE) {
-         marker_F[index_maxlambda]=FALSE;
-         lambda[index_maxlambda]=IS_NOT_AVAILABLE;
-         for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) {
-            j=ST->index[iptr];
-            if(marker_F[j]==IS_AVAILABLE) {
-               marker_F[j]=TRUE;
-               lambda[j]=IS_NOT_AVAILABLE;
-               for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) {
-                  k=S->index[jptr];
-                  if(marker_F[k]==IS_AVAILABLE) {
-                     lambda[k]++;
-                  }
-               }
-            }
-         }
-         for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
-            j=S->index[iptr];
-            if(marker_F[j]==IS_AVAILABLE) {
-               lambda[j]--;
-            }
-         }
-      }
-
-      /* Used when transpose of S is not available */
-      /*
-      for (i=0;i<n;++i) {
-         if(marker_F[i]==IS_AVAILABLE) {
-            if (i==index_maxlambda) {
-               marker_F[index_maxlambda]=FALSE;
-               lambda[index_maxlambda]=-1;
-               for (j=0;j<n;++j) {
-                  if(marker_F[j]==IS_AVAILABLE) {
-                     index=&(S->index[S->ptr[j]]);
-                     where_p=(index_t*)bsearch(&i,
-                     index,
-                     S->ptr[j + 1]-S->ptr[j],
-                     sizeof(index_t),
-             comparIndex);
-                     if (where_p!=NULL) {
-                        marker_F[j]=TRUE;
-                        lambda[j]=-1;
-                        for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) {
-                           k=S->index[iptr];
-                           if(marker_F[k]==IS_AVAILABLE) {
-                              lambda[k]++;
-   }
-   }
-   }
-   }
-   }
-   }
-   }
-   }
-   */
-      index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE);
-   }
-
-   time0=Esys_timer()-time0;
-   if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0);
-
-   /*Coarsening_Local_getReport(n,marker_F);*/
-   #pragma omp parallel for private(i) schedule(static)
-   for (i=0;i<n;++i)
-      if(marker_F[i]==IS_AVAILABLE) {
-         marker_F[i]=TRUE;
-      }
-
-      /*Coarsening_Local_getReport(n,marker_F);*/
-
-      delete[] lambda;
-
-   Pattern_free(S);
-
-   /* swap to TRUE/FALSE in marker_F */
-   #pragma omp parallel for private(i) schedule(static)
-   for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE;
-
-}
-
-
-
-#undef IS_AVAILABLE
-#undef IS_NOT_AVAILABLE
-#undef TRUE
-#undef FALSE
-
-#undef IS_UNDECIDED
-#undef IS_STRONG
-#undef IS_WEAK
-
-#undef TRUEB
-#undef TRUEB
-
diff --git a/paso/src/BOOMERAMG.cpp b/paso/src/BOOMERAMG.cpp
index afb1545..2d26280 100644
--- a/paso/src/BOOMERAMG.cpp
+++ b/paso/src/BOOMERAMG.cpp
@@ -36,7 +36,7 @@ namespace paso {
 
 void Preconditioner_BoomerAMG_free(Preconditioner_BoomerAMG* in)
 {
-#ifdef BOOMERAMG
+#ifdef ESYS_HAVE_BOOMERAMG
     if (in != NULL) {
         HYPRE_IJMatrixDestroy(in->A);
         HYPRE_IJVectorDestroy(in->b);
@@ -51,7 +51,7 @@ void Preconditioner_BoomerAMG_free(Preconditioner_BoomerAMG* in)
 Preconditioner_BoomerAMG* Preconditioner_BoomerAMG_alloc(SystemMatrix_ptr A,
                                                          Options* options)
 {
-#ifdef BOOMERAMG
+#ifdef ESYS_HAVE_BOOMERAMG
     index_t ilower; /* first row in current processor, number is given by
                        the global indices. Can be 0- or 1-based indexing */
     index_t iupper; /* last row in current processor, number is given by
@@ -81,10 +81,10 @@ Preconditioner_BoomerAMG* Preconditioner_BoomerAMG_alloc(SystemMatrix_ptr A,
     /* set up inputs for BoomerAMG */
     nrows = A->mainBlock->numRows;
 
-    ilower = A->row_distribution->first_component[A->mpi_info->rank];
+    ilower = A->row_distribution->getFirstComponent();
     iupper = ilower + nrows - 1;
 
-    jlower = A->col_distribution->first_component[A->mpi_info->rank];
+    jlower = A->col_distribution->getFirstComponent();
     jupper = ilower + A->mainBlock->numCols - 1;
 
     rows = new index_t[nrows];
@@ -231,7 +231,7 @@ void Preconditioner_BoomerAMG_solve(SystemMatrix_ptr A,
                                     Preconditioner_BoomerAMG* amg,
                                     double* out, double* in)
 {
-#ifdef BOOMERAMG
+#ifdef ESYS_HAVE_BOOMERAMG
     index_t ilower; /* first row in current processor, number is given by
                        the global indices. Can be 0- or 1-based indexing */
     index_t iupper; /* last row in current processor, number is given by
@@ -255,7 +255,7 @@ void Preconditioner_BoomerAMG_solve(SystemMatrix_ptr A,
 
     /* set up inputs for BoomerAMG */
     nrows = A->mainBlock->numRows;
-    ilower = A->row_distribution->first_component[A->mpi_info->rank];
+    ilower = A->row_distribution->getFirstComponent();
     iupper = ilower + nrows - 1;
     rows = new index_t[nrows];
     #pragma omp parallel for schedule(static) private(i)
diff --git a/paso/src/BOOMERAMG.h b/paso/src/BOOMERAMG.h
index 634e684..f017b52 100644
--- a/paso/src/BOOMERAMG.h
+++ b/paso/src/BOOMERAMG.h
@@ -33,7 +33,7 @@
 
 #include "SystemMatrix.h"
 
-#ifdef BOOMERAMG
+#ifdef ESYS_HAVE_BOOMERAMG
 #include <HYPRE_krylov.h>
 #include <HYPRE.h>
 #include <HYPRE_parcsr_ls.h>
@@ -43,7 +43,7 @@ namespace paso {
 
 struct Preconditioner_BoomerAMG
 {
-#ifdef BOOMERAMG
+#ifdef ESYS_HAVE_BOOMERAMG
     HYPRE_IJMatrix A;
     HYPRE_ParCSRMatrix parcsr_A;
     HYPRE_IJVector b;
diff --git a/paso/src/BiCGStab.cpp b/paso/src/BiCGStab.cpp
index a008eca..4040a1e 100644
--- a/paso/src/BiCGStab.cpp
+++ b/paso/src/BiCGStab.cpp
@@ -19,9 +19,8 @@
    Crude modifications and translations for Paso by Matt Davies and Lutz Gross
 */
 
-#include "Paso.h"
-#include "SystemMatrix.h"
 #include "Solver.h"
+#include "SystemMatrix.h"
 
 namespace paso {
 
@@ -75,8 +74,8 @@ namespace paso {
 *  ==============================================================
 */
 
-err_t Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
-                      double* tolerance, Performance* pp)
+SolverResult Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x,
+                             dim_t* iter, double* tolerance, Performance* pp)
 {
   /* Local variables */
   double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/
@@ -88,14 +87,14 @@ err_t Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
   dim_t num_iter=0,maxit,num_iter_global=0;
   dim_t i0;
   bool breakFlag=false, maxIterFlag=false, convergeFlag=false;
-  dim_t status = SOLVER_NO_ERROR;
+  SolverResult status = NoError;
   double *resid = tolerance;
   dim_t n = A->getTotalNumRows();
 
   /* Test the input parameters. */
 
   if (n < 0) {
-    status = SOLVER_INPUT_ERROR;
+    status = InputError;
   } else {
     /* allocate memory: */
     rtld=new double[n];
@@ -161,7 +160,7 @@ err_t Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
         /*        Compute direction adjusting vector PHAT and scalar ALPHA. */
 
         A->solvePreconditioner(&phat[0], &p[0]);
-        SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0], PASO_ZERO, &v[0]);
+        A->MatrixVector_CSR_OFFSET0(PASO_ONE, &phat[0], PASO_ZERO, &v[0]);
 
         #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
         for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
@@ -193,7 +192,7 @@ err_t Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
            } else {
              /*           Compute stabilizer vector SHAT and scalar OMEGA. */
              A->solvePreconditioner(&shat[0], &s[0]);
-             SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]);
+             A->MatrixVector_CSR_OFFSET0(PASO_ONE, &shat[0],PASO_ZERO,&t[0]);
 
              #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
              for (i0 = 0; i0 < n; i0++) {
@@ -236,9 +235,9 @@ err_t Solver_BiCGStab(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
       num_iter_global=num_iter;
       norm_of_residual_global=norm_of_residual;
       if (maxIterFlag) {
-            status = SOLVER_MAXITER_REACHED;
+            status = MaxIterReached;
       } else if (breakFlag) {
-            status = SOLVER_BREAKDOWN;
+            status = Breakdown;
       }
     }
     delete[] rtld;
diff --git a/paso/src/BlockOps.h b/paso/src/BlockOps.h
index 73c4f14..e5bb69a 100644
--- a/paso/src/BlockOps.h
+++ b/paso/src/BlockOps.h
@@ -18,10 +18,12 @@
 #define __PASO_BLOCKOPS_H__
 
 #include "Paso.h"
+#include "PasoException.h"
+
 #include <cstring> // memcpy
 
-#ifdef USE_LAPACK
-   #ifdef MKL_LAPACK
+#ifdef ESYS_HAVE_LAPACK
+   #ifdef ESYS_MKL_LAPACK
       #include <mkl_lapack.h>
       #include <mkl_cblas.h>
    #else
@@ -72,12 +74,12 @@ inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
     R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
 }
 
-#define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a LAPACK version to enable operations on block sizes > 3.")
+#define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
 
 /// performs operation R=R-mat*V (V and R are not overlapping) - NxN
 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
 {
-#ifdef USE_LAPACK
+#ifdef ESYS_HAVE_LAPACK
     cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
 #else
     PASO_MISSING_CLAPACK;
@@ -86,7 +88,7 @@ inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double*
 
 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
 {
-#ifdef USE_LAPACK
+#ifdef ESYS_HAVE_LAPACK
     cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
 #else
     PASO_MISSING_CLAPACK;
@@ -144,8 +146,8 @@ inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
 /// LU factorization of NxN matrix mat with partial pivoting
 inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
 {
-#ifdef USE_LAPACK
-#ifdef MKL_LAPACK
+#ifdef ESYS_HAVE_LAPACK
+#ifdef ESYS_MKL_LAPACK
     int res = 0;
     dgetrf(&N, &N, mat, &N, pivot, &res);
     if (res != 0)
@@ -154,7 +156,7 @@ inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
     int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
     if (res != 0)
         *failed = 1;
-#endif // MKL_LAPACK
+#endif // ESYS_MKL_LAPACK
 #else
     PASO_MISSING_CLAPACK;
 #endif
@@ -163,8 +165,8 @@ inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
 /// solves system of linear equations A*X=B
 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
 {
-#ifdef USE_LAPACK
-#ifdef MKL_LAPACK
+#ifdef ESYS_HAVE_LAPACK
+#ifdef ESYS_MKL_LAPACK
     int res = 0;
     int ONE = 1;
     dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
@@ -174,7 +176,7 @@ inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, in
     int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
     if (res != 0)
         *failed = 1;
-#endif // MKL_LAPACK
+#endif // ESYS_MKL_LAPACK
 #else
     PASO_MISSING_CLAPACK;
 #endif
@@ -236,7 +238,7 @@ inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
             BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
         }
         if (failed > 0) {
-            Esys_setError(ZERO_DIVISION_ERROR, "BlockOps_solveAll: solution failed.");
+            throw PasoException("BlockOps_solveAll: solution failed.");
         }
     }
 }
diff --git a/paso/src/Coupler.cpp b/paso/src/Coupler.cpp
index 950192e..acaa092 100644
--- a/paso/src/Coupler.cpp
+++ b/paso/src/Coupler.cpp
@@ -14,7 +14,6 @@
 *
 *****************************************************************************/
 
-
 #include "Coupler.h"
 
 #include <cstring> // memcpy
@@ -27,7 +26,8 @@ namespace paso {
  *
  ****************************************************************************/
 
-Coupler::Coupler(const_Connector_ptr conn, dim_t blockSize) :
+Coupler::Coupler(const_Connector_ptr conn, dim_t blockSize,
+                 escript::JMPI mpiInfo) :
     connector(conn),
     block_size(blockSize),
     in_use(false),
@@ -35,27 +35,26 @@ Coupler::Coupler(const_Connector_ptr conn, dim_t blockSize) :
     send_buffer(NULL),
     recv_buffer(NULL),
     mpi_requests(NULL),
-    mpi_stati(NULL)
+    mpi_stati(NULL),
+    mpi_info(mpiInfo)
 {
-    Esys_resetError();
-    mpi_info = conn->mpi_info;
 #ifdef ESYS_MPI
-    mpi_requests = new MPI_Request[conn->send->numNeighbors +
-                                   conn->recv->numNeighbors];
-    mpi_stati = new MPI_Status[conn->send->numNeighbors +
-                               conn->recv->numNeighbors];
-#endif
+    mpi_requests = new MPI_Request[conn->send->neighbour.size() +
+                                   conn->recv->neighbour.size()];
+    mpi_stati = new MPI_Status[conn->send->neighbour.size() +
+                               conn->recv->neighbour.size()];
     if (mpi_info->size > 1) {
-        send_buffer=new double[conn->send->numSharedComponents * block_size];
-        recv_buffer=new double[conn->recv->numSharedComponents * block_size];
+        send_buffer = new double[conn->send->numSharedComponents * block_size];
+        recv_buffer = new double[conn->recv->numSharedComponents * block_size];
     }
+#endif
 }
 
 Coupler::~Coupler()
 {
+#ifdef ESYS_MPI
     delete[] send_buffer;
     delete[] recv_buffer;
-#ifdef ESYS_MPI
     delete[] mpi_requests;
     delete[] mpi_stati;
 #endif
@@ -64,19 +63,18 @@ Coupler::~Coupler()
 void Coupler::startCollect(const double* in)
 {
     data = const_cast<double*>(in);
+#ifdef ESYS_MPI
     if (mpi_info->size > 1) {
         if (in_use) {
-            Esys_setError(SYSTEM_ERROR,"Coupler::startCollect: Coupler in use.");
+            throw PasoException("Coupler::startCollect: Coupler in use.");
         }
         // start receiving input
-        for (dim_t i=0; i < connector->recv->numNeighbors; ++i) {
-#ifdef ESYS_MPI
+        for (dim_t i=0; i < connector->recv->neighbour.size(); ++i) {
             MPI_Irecv(&recv_buffer[connector->recv->offsetInShared[i]*block_size],
                     (connector->recv->offsetInShared[i+1]-connector->recv->offsetInShared[i])*block_size,
-                    MPI_DOUBLE, connector->recv->neighbor[i],
-                    mpi_info->msg_tag_counter+connector->recv->neighbor[i],
+                    MPI_DOUBLE, connector->recv->neighbour[i],
+                    mpi_info->counter()+connector->recv->neighbour[i],
                     mpi_info->comm, &mpi_requests[i]);
-#endif
         }
         // collect values into buffer
         const int numSharedSend = connector->send->numSharedComponents;
@@ -95,34 +93,32 @@ void Coupler::startCollect(const double* in)
             }
         }
         // send buffer out
-        for (dim_t i=0; i < connector->send->numNeighbors; ++i) {
-#ifdef ESYS_MPI
+        for (dim_t i=0; i < connector->send->neighbour.size(); ++i) {
             MPI_Issend(&send_buffer[connector->send->offsetInShared[i]*block_size],
                     (connector->send->offsetInShared[i+1] - connector->send->offsetInShared[i])*block_size,
-                    MPI_DOUBLE, connector->send->neighbor[i],
-                    mpi_info->msg_tag_counter+mpi_info->rank, mpi_info->comm,
-                    &mpi_requests[i+connector->recv->numNeighbors]);
-#endif
+                    MPI_DOUBLE, connector->send->neighbour[i],
+                    mpi_info->counter()+mpi_info->rank, mpi_info->comm,
+                    &mpi_requests[i+connector->recv->neighbour.size()]);
         }
-        ESYS_MPI_INC_COUNTER(*mpi_info, mpi_info->size)
+        mpi_info->incCounter(mpi_info->size);
         in_use = true;
     }
+#endif
 }
 
 double* Coupler::finishCollect()
 {
+#ifdef ESYS_MPI
     if (mpi_info->size > 1) {
         if (!in_use) {
-            Esys_setError(SYSTEM_ERROR, "Coupler::finishCollect: Communication has not been initiated.");
-            return NULL;
+            throw PasoException("Coupler::finishCollect: Communication has not been initiated.");
         }
         // wait for receive
-#ifdef ESYS_MPI
-        MPI_Waitall(connector->recv->numNeighbors+connector->send->numNeighbors,
-                    mpi_requests, mpi_stati);
-#endif
+        MPI_Waitall(connector->recv->neighbour.size() +
+                    connector->send->neighbour.size(), mpi_requests, mpi_stati);
         in_use = false;
     }
+#endif
     return recv_buffer;
 }
 
diff --git a/paso/src/Coupler.h b/paso/src/Coupler.h
index 48579df..2d787a5 100644
--- a/paso/src/Coupler.h
+++ b/paso/src/Coupler.h
@@ -28,6 +28,8 @@
 #ifndef __PASO_COUPLER_H__
 #define __PASO_COUPLER_H__
 
+#include "Paso.h"
+#include "PasoException.h"
 #include "SharedComponents.h"
 
 namespace paso {
@@ -45,26 +47,17 @@ struct Connector
 {
     SharedComponents_ptr send;
     SharedComponents_ptr recv;
-    mutable esysUtils::JMPI mpi_info;
 
     Connector(SharedComponents_ptr s, SharedComponents_ptr r)
     {
-        Esys_resetError();
-        if (s->mpi_info != r->mpi_info) {
-            Esys_setError(SYSTEM_ERROR,
-                    "Connector: send and recv MPI communicators don't match.");
-        } else if (s->local_length != r->local_length) {
-            Esys_setError(SYSTEM_ERROR,
-                    "Connector: local length of send and recv SharedComponents must match.");
+        if (s->local_length != r->local_length) {
+            throw PasoException("Connector: local length of send and recv "
+                                "SharedComponents must match.");
         }
         send = s;
         recv = r;
-        mpi_info = s->mpi_info;
     }
 
-    /// destructor
-    ~Connector() { }
-
     /// creates a copy
     inline Connector_ptr copy() const { return unroll(1); }
 
@@ -74,33 +67,30 @@ struct Connector
         Connector_ptr out;
         if (block_size > 1) {
             new_send_shcomp.reset(new SharedComponents(send->local_length,
-                        send->numNeighbors, send->neighbor,
-                        send->shared, send->offsetInShared,
-                        block_size, 0, mpi_info));
+                        send->neighbour, send->shared, send->offsetInShared,
+                        block_size, 0));
 
             new_recv_shcomp.reset(new SharedComponents(recv->local_length,
-                    recv->numNeighbors, recv->neighbor,
-                    recv->shared, recv->offsetInShared,
-                    block_size, 0, mpi_info));
+                        recv->neighbour, recv->shared, recv->offsetInShared,
+                        block_size, 0));
         } else {
             new_send_shcomp = send;
             new_recv_shcomp = recv;
         }
-        if (Esys_noError())
-            out.reset(new Connector(new_send_shcomp, new_recv_shcomp));
+        out.reset(new Connector(new_send_shcomp, new_recv_shcomp));
         return out;
     }
 
     //inline debug() const
     //{
-    //    for (int i=0; i<recv->numNeighbors; ++i)
+    //    for (int i=0; i<recv->neighbour.size(); ++i)
     //        printf("Coupler: %d receive %d data at %d from %d\n",
     //            s->mpi_info->rank,recv->offsetInShared[i+1]-recv->offsetInShared[i],
-    //            recv->offsetInShared[i],recv->neighbor[i]);
-    //    for (int i=0; i<send->numNeighbors; ++i)
+    //            recv->offsetInShared[i],recv->neighbour[i]);
+    //    for (int i=0; i<send->neighbour.size(); ++i)
     //        printf("Coupler: %d send %d data at %d to %d\n",
     //            s->mpi_info->rank,send->offsetInShared[i+1]-send->offsetInShared[i],
-    //            send->offsetInShared[i],send->neighbor[i]);
+    //            send->offsetInShared[i],send->neighbour[i]);
     //}
 };
 
@@ -108,7 +98,7 @@ struct Connector
 PASO_DLL_API
 struct Coupler
 {
-    Coupler(const_Connector_ptr, dim_t blockSize);
+    Coupler(const_Connector_ptr, dim_t blockSize, escript::JMPI mpiInfo);
     ~Coupler();
 
     void startCollect(const double* in);
@@ -156,7 +146,7 @@ struct Coupler
     double* recv_buffer;
     MPI_Request* mpi_requests;
     MPI_Status* mpi_stati;
-    mutable esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
 };
 
 
diff --git a/paso/src/Distribution.cpp b/paso/src/Distribution.cpp
deleted file mode 100644
index 42126bb..0000000
--- a/paso/src/Distribution.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************/
-
-/*   Paso: distribution                                                     */
-
-/****************************************************************************/
-
-/*   Author: Lutz Gross, l.gross at uq.edu.au */
-
-/****************************************************************************/
-
-#include "Distribution.h"
-
-namespace paso {
-
-// Pseudo random numbers such that the values are independent from the
-// distribution
-double Distribution::random_seed = .4142135623730951;
-
-} // namespace paso
-
diff --git a/paso/src/Distribution.h b/paso/src/Distribution.h
deleted file mode 100644
index 3b93f53..0000000
--- a/paso/src/Distribution.h
+++ /dev/null
@@ -1,135 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/****************************************************************************/
-
-/*   Paso: distribution                                                     */
-
-/****************************************************************************/
-
-/*   Author: Lutz Gross, l.gross at uq.edu.au */
-
-/****************************************************************************/
-
-#ifndef __PASO_DISTRIBUTION_H__
-#define __PASO_DISTRIBUTION_H__
-
-#include "Paso.h"
-#include "PasoUtil.h"
-
-namespace paso {
-
-struct Distribution;
-typedef boost::shared_ptr<Distribution> Distribution_ptr;
-typedef boost::shared_ptr<const Distribution> const_Distribution_ptr;
-
-/// describes the distribution of a vector stored on the local process
-PASO_DLL_API
-struct Distribution
-{
-    Distribution(const esysUtils::JMPI& mpiInfo, const index_t* firstComponent,
-                 index_t m, index_t b) :
-        mpi_info(mpiInfo)
-    {
-        first_component = new index_t[mpi_info->size+1];
-        for (dim_t i=0; i < mpi_info->size+1; ++i)
-            first_component[i] = m*firstComponent[i]+b;
-    }
-
-    ~Distribution()
-    {
-        delete[] first_component;
-    }
-
-    inline index_t getFirstComponent() const
-    {
-        return first_component[mpi_info->rank];
-    }
-
-    inline index_t getLastComponent() const
-    {
-        return first_component[mpi_info->rank+1];
-    }
-
-
-    inline dim_t getGlobalNumComponents() const
-    {
-        return getMaxGlobalComponents()-getMinGlobalComponents();
-    }
-
-    inline dim_t getMyNumComponents() const
-    {
-        return getLastComponent()-getFirstComponent();
-    }
-
-    inline dim_t getMinGlobalComponents() const
-    {
-        return first_component[0];
-    }
-
-    inline dim_t getMaxGlobalComponents() const
-    {
-        return first_component[mpi_info->size];
-    }
-
-    inline dim_t numPositives(const double* x, dim_t block) const
-    {
-        const dim_t my_n = block * getMyNumComponents();
-        dim_t my_out = util::numPositives(my_n, x);
-        dim_t out;
-
-#ifdef ESYS_MPI
-#pragma omp single
-        {
-            MPI_Allreduce(&my_out, &out, 1, MPI_INT, MPI_SUM, mpi_info->comm);
-        }
-#else
-        out = my_out;
-#endif
-        return out;
-    }
-
-    inline double* createRandomVector(dim_t block) const
-    {
-        const index_t n_0 = getFirstComponent() * block;
-        const index_t n_1 = getLastComponent() * block;
-        const index_t n = getGlobalNumComponents() * block;
-        const dim_t my_n = n_1-n_0;
-
-        double* out = new double[my_n];
-
-#pragma omp parallel for schedule(static)
-        for (index_t i=0; i<my_n; ++i) {
-            out[i]=fmod(random_seed*(n_0+i+1), 1.);
-        }
-
-        random_seed = fmod(random_seed * (n+1.7), 1.);
-        return out;
-    }
-
-    // process i has nodes with global indices first_component[i] to
-    // first_component[i+1].
-    index_t* first_component;
-    dim_t reference_counter;
-    const esysUtils::JMPI mpi_info;
-    static double random_seed;
-};
-
-} // namespace paso
-
-#endif // __PASO_DISTRIBUTION_H__
-
diff --git a/paso/src/FCT_Solver.cpp b/paso/src/FCT_Solver.cpp
index 584245b..9234bb1 100644
--- a/paso/src/FCT_Solver.cpp
+++ b/paso/src/FCT_Solver.cpp
@@ -30,13 +30,18 @@
 
 /****************************************************************************/
 
-#include <iostream>
 #include "FCT_Solver.h"
-#include "Preconditioner.h"
 #include "PasoUtil.h"
+#include "Preconditioner.h"
+
+#include <iostream>
+
+#define MIN3(_arg1_,_arg2_,_arg3_) std::min(_arg1_, std::min(_arg2_,_arg3_))
 
 namespace paso {
 
+static const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
+
 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
     transportproblem(tp),
     omega(0),
@@ -52,8 +57,8 @@ FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
         du = new double[n];
         z = new double[n];
     }
-    u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
-    u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
+    u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize, mpi_info));
+    u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize, mpi_info));
 
     if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
         method = PASO_LINEAR_CRANK_NICOLSON;
@@ -62,8 +67,7 @@ FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
     } else if (options->ode_solver == PASO_BACKWARD_EULER) {
         method = PASO_BACKWARD_EULER;
     } else {
-        Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
-        method = UNKNOWN;
+        throw PasoException("FCT_Solver: unknown integration scheme.");
     }
 }
 
@@ -78,6 +82,7 @@ FCT_Solver::~FCT_Solver()
 // modifies the main diagonal of the iteration matrix to introduce new dt
 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
 {
+    const real_t EPSILON = escript::DataTypes::real_t_eps();
     const_TransportProblem_ptr fctp(transportproblem);
     const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
     const dim_t n = fctp->transport_matrix->getTotalNumRows();
@@ -117,9 +122,10 @@ void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
 }
 
 // entry point for update procedures
-err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance* pp)
+SolverResult FCT_Solver::update(double* u, double* u_old, Options* options,
+                                Performance* pp)
 {
-    err_t err_out = SOLVER_NO_ERROR;
+    SolverResult err_out = NoError;
 
     if (method == PASO_LINEAR_CRANK_NICOLSON) {
         err_out = updateLCN(u, u_old, options, pp);
@@ -128,20 +134,21 @@ err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance
     } else if (method == PASO_BACKWARD_EULER) {
         err_out = updateNL(u, u_old, options, pp);
     } else {
-        err_out = SOLVER_INPUT_ERROR;
+        err_out = InputError;
     }
     return err_out;
 }
 
 /// linear crank-nicolson update
-err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performance* pp)
+SolverResult FCT_Solver::updateLCN(double* u, double* u_old, Options* options,
+                                   Performance* pp)
 {
     dim_t sweep_max, i;
     double const RTOL = options->tolerance;
     const dim_t n = transportproblem->getTotalNumRows();
     SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
     const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
-    err_t errorCode = SOLVER_NO_ERROR;
+    SolverResult errorCode = NoError;
     double norm_u_tilde;
 
     u_old_coupler->startCollect(u_old);
@@ -186,23 +193,23 @@ err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performa
     errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
             ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
             &sweep_max, true);
-    if (errorCode == PRECONDITIONER_NO_ERROR) {
+    if (errorCode == NoError) {
         if (options->verbose)
             std::cout << "FCT_Solver::updateLCN: convergence after "
                 << sweep_max << " Gauss-Seidel steps." << std::endl;
-        errorCode = SOLVER_NO_ERROR;
+        errorCode = NoError;
     } else {
         if (options->verbose)
             std::cout << "FCT_Solver::updateLCN: Gauss-Seidel failed within "
                 << sweep_max << " steps (rel. tolerance " << RTOL << ")."
                 << std::endl;
-        errorCode = SOLVER_MAXITER_REACHED;
+        errorCode = MaxIterReached;
     }
     return errorCode;
 }
 
-err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
-                           Performance* pp)
+SolverResult FCT_Solver::updateNL(double* u, double* u_old, Options* options,
+                                  Performance* pp)
 {
     // number of rates >=critical_rate accepted before divergence is triggered
     const dim_t num_critical_rates_max = 3;
@@ -217,7 +224,7 @@ err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
     const double rtol = options->tolerance;
     const dim_t max_m = options->iter_max;
     dim_t m = 0, num_critical_rates = 0;
-    err_t errorCode = SOLVER_NO_ERROR;
+    SolverResult errorCode = NoError;
     bool converged=false, max_m_reached=false, diverged=false;
     /* //////////////////////////////////////////////////////////////////// */
 
@@ -255,7 +262,7 @@ err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
     // u_old is an initial guess for u
     util::copy(n, u, u_old);
 
-    while (!converged && !diverged && !max_m_reached && Esys_noError()) {
+    while (!converged && !diverged && !max_m_reached) {
         u_coupler->startCollect(u);
         u_coupler->finishCollect();
 
@@ -307,7 +314,7 @@ err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
                 std::cout << "FCT_Solver::updateNL: BiCGStab completed after "
                     << cntIter << " steps (residual = " << tol << ")." << std::endl;
             options->num_iter += cntIter;
-            if (errorCode != SOLVER_NO_ERROR) break;
+            if (errorCode != NoError) break;
         } else {
             // just use the main diagonal of (m/omega - L )
 
@@ -344,19 +351,19 @@ err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
         }
         m++;
     } // end of while loop
-    if (errorCode == SOLVER_NO_ERROR) {
+    if (errorCode == NoError) {
         if (converged) {
             if (options->verbose)
                 std::cout << "FCT_Solver::updateNL: iteration is completed." << std::endl;
-            errorCode = SOLVER_NO_ERROR;
+            errorCode = NoError;
         } else if (diverged) {
             if (options->verbose)
                 std::cout << "FCT_Solver::updateNL: divergence." << std::endl;
-            errorCode = SOLVER_DIVERGENCE;
+            errorCode = Divergence;
         } else if (max_m_reached) {
             if (options->verbose)
                 std::cout << "FCT_Solver::updateNL: maximum number of iteration steps reached." << std::endl;
-            errorCode = SOLVER_MAXITER_REACHED;
+            errorCode = MaxIterReached;
         }
     }
     return errorCode;
@@ -535,41 +542,40 @@ void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
 
 /****************************************************************************/
 
-double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
+double FCT_Solver::getSafeTimeStepSize(const_TransportProblem_ptr fctp)
 {
     double dt_max = LARGE_POSITIVE_FLOAT;
     const dim_t n = fctp->transport_matrix->getTotalNumRows();
 
     // set low order transport operator
-    setLowOrderOperator(fctp);
+    setLowOrderOperator(boost::const_pointer_cast<TransportProblem>(fctp));
 
-    if (Esys_noError()) {
-        // calculate time step size
-        dt_max = LARGE_POSITIVE_FLOAT;
+    // calculate time step size
+    dt_max = LARGE_POSITIVE_FLOAT;
 #pragma omp parallel
-        {
-            double dt_max_loc = LARGE_POSITIVE_FLOAT;
+    {
+        double dt_max_loc = LARGE_POSITIVE_FLOAT;
 #pragma omp for schedule(static)
-            for (dim_t i=0; i<n; ++i) {
-                const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
-                const double m_i = fctp->lumped_mass_matrix[i];
-                if (m_i > 0) {
-                    if (l_ii<0)
-                        dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
-                }
-            }
-            #pragma omp critical
-            {
-                dt_max = std::min(dt_max,dt_max_loc);
+        for (dim_t i=0; i<n; ++i) {
+            const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
+            const double m_i = fctp->lumped_mass_matrix[i];
+            if (m_i > 0) {
+                if (l_ii<0)
+                    dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
             }
         }
+        #pragma omp critical
+        {
+            dt_max = std::min(dt_max,dt_max_loc);
+        }
+    }
 #ifdef ESYS_MPI
-        double dt_max_loc = dt_max;
-        MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
+    double dt_max_loc = dt_max;
+    MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
 #endif
-        if (dt_max < LARGE_POSITIVE_FLOAT)
-            dt_max *= 2.;
-    }
+    if (dt_max < LARGE_POSITIVE_FLOAT)
+        dt_max *= 2.;
+
     return dt_max;
 }
 
@@ -596,7 +602,9 @@ void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
         fc->iteration_matrix.reset(new SystemMatrix(
                   fc->transport_matrix->type, fc->transport_matrix->pattern,
                   fc->transport_matrix->row_block_size,
-                  fc->transport_matrix->col_block_size, true));
+                  fc->transport_matrix->col_block_size, true,
+                  fc->transport_matrix->getRowFunctionSpace(),
+                  fc->transport_matrix->getColumnFunctionSpace()));
     }
 
     const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
diff --git a/paso/src/FCT_Solver.h b/paso/src/FCT_Solver.h
index db8ffd8..2b0f65a 100644
--- a/paso/src/FCT_Solver.h
+++ b/paso/src/FCT_Solver.h
@@ -30,15 +30,15 @@ struct FCT_Solver
 
     ~FCT_Solver();
 
-    err_t update(double* u, double* u_old, Options* options, Performance* pp);
+    SolverResult update(double* u, double* u_old, Options* options, Performance* pp);
 
-    err_t updateNL(double* u, double* u_old, Options* options, Performance* pp);
+    SolverResult updateNL(double* u, double* u_old, Options* options, Performance* pp);
 
-    err_t updateLCN(double* u, double* u_old, Options* options, Performance* pp);
+    SolverResult updateLCN(double* u, double* u_old, Options* options, Performance* pp);
 
     void initialize(double dt, Options* options, Performance* pp);
 
-    static double getSafeTimeStepSize(TransportProblem_ptr tp);
+    static double getSafeTimeStepSize(const_TransportProblem_ptr tp);
 
     static void setLowOrderOperator(TransportProblem_ptr tp);
 
@@ -56,7 +56,7 @@ struct FCT_Solver
     }
 
     const_TransportProblem_ptr transportproblem;
-    esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
     FCT_FluxLimiter* flux_limiter;
     index_t method;
     double omega;
diff --git a/paso/src/FluxLimiter.cpp b/paso/src/FluxLimiter.cpp
index d60919f..5dd2dfe 100644
--- a/paso/src/FluxLimiter.cpp
+++ b/paso/src/FluxLimiter.cpp
@@ -39,12 +39,14 @@ FCT_FluxLimiter::FCT_FluxLimiter(const_TransportProblem_ptr tp)
     MQ = new double[2*n];
     R = new double[2*n];
 
-    R_coupler.reset(new Coupler(tp->borrowConnector(), 2*blockSize));
-    u_tilde_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
+    R_coupler.reset(new Coupler(tp->borrowConnector(), 2*blockSize, mpi_info));
+    u_tilde_coupler.reset(new Coupler(tp->borrowConnector(), blockSize, mpi_info));
     antidiffusive_fluxes.reset(new SystemMatrix(
                 tp->transport_matrix->type, tp->transport_matrix->pattern,
                 tp->transport_matrix->row_block_size,
-                tp->transport_matrix->col_block_size, true));
+                tp->transport_matrix->col_block_size, true,
+                tp->transport_matrix->getRowFunctionSpace(),
+                tp->transport_matrix->getColumnFunctionSpace()));
     borrowed_lumped_mass_matrix = tp->lumped_mass_matrix;
 }
 
@@ -59,6 +61,7 @@ FCT_FluxLimiter::~FCT_FluxLimiter()
 // and calculates the limiters QP and QN
 void FCT_FluxLimiter::setU_tilde(const double* Mu_tilde)
 {
+    const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
     const dim_t n = getTotalNumRows();
     const_SystemMatrixPattern_ptr pattern(getFluxPattern());
 
diff --git a/paso/src/FluxLimiter.h b/paso/src/FluxLimiter.h
index 06501d3..3316d6c 100644
--- a/paso/src/FluxLimiter.h
+++ b/paso/src/FluxLimiter.h
@@ -43,7 +43,7 @@ struct FCT_FluxLimiter
     void addLimitedFluxes_Complete(double* b);
 
     SystemMatrix_ptr antidiffusive_fluxes;
-    esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
     double dt;
     double* u_tilde;
     double* MQ;  // (M_C* Q_min, M_C* Q_max)
diff --git a/paso/src/Functions.cpp b/paso/src/Functions.cpp
index 3462f90..90623ff 100644
--- a/paso/src/Functions.cpp
+++ b/paso/src/Functions.cpp
@@ -14,28 +14,28 @@
 *
 *****************************************************************************/
 
-
 #include "Functions.h"
 #include "PasoUtil.h"
 #include "Solver.h"
 
 namespace paso {
 
-Function::Function(esysUtils::JMPI& mpiInfo)
+Function::Function(const escript::JMPI& mpiInfo) :
+    mpi_info(mpiInfo)
 {
-    mpi_info = mpiInfo;
 }
 
 Function::~Function()
 {
 }
 
-err_t Function::derivative(double* J0w, const double* w, const double* f0,
+SolverResult Function::derivative(double* J0w, const double* w, const double* f0,
                            const double* x0, double* setoff, Performance* pp)
 {
-    err_t err = SOLVER_NO_ERROR;
+    const real_t EPSILON = escript::DataTypes::real_t_eps();
+    SolverResult err = NoError;
     dim_t i;
-    register double aw;
+    double aw;
     const double epsnew = sqrt(EPSILON);
     double ttt, s=epsnew, local_s, norm_w=0.;
     const dim_t n = getLen();
@@ -74,7 +74,7 @@ err_t Function::derivative(double* J0w, const double* w, const double* f0,
         //printf("s = %e\n",s);
         util::linearCombination(n,setoff,1.,x0,s,w);
         err = call(J0w, setoff, pp);
-        if (err==SOLVER_NO_ERROR) {
+        if (err==NoError) {
             util::update(n,1./s,J0w,-1./s,f0); // J0w = (J0w - f0)/epsnew;
             //for (int i=0;i<n; i++) printf("df[%d]=%e %e\n",i,J0w[i],w[i]);
         }
diff --git a/paso/src/Functions.h b/paso/src/Functions.h
index e17877f..1bdf864 100644
--- a/paso/src/Functions.h
+++ b/paso/src/Functions.h
@@ -18,6 +18,7 @@
 #ifndef __PASO_FUNCTIONS_H__
 #define __PASO_FUNCTIONS_H__
 
+#include "Paso.h"
 #include "performance.h"
 #include "SystemMatrix.h"
 
@@ -25,21 +26,21 @@ namespace paso {
 
 struct Function
 {
-    Function(esysUtils::JMPI& mpi_info);
+    Function(const escript::JMPI& mpi_info);
     virtual ~Function();
 
     /// sets value=F(arg)
-    virtual err_t call(double* value, const double* arg, Performance* pp) = 0;
+    virtual SolverResult call(double* value, const double* arg, Performance* pp) = 0;
 
     /// numerical calculation of the directional derivative J0w of F at x0 in
     /// the direction w. f0 is the value of F at x0. setoff is workspace
-    err_t derivative(double* J0w, const double* w, const double* f0,
-                     const double* x0, double* setoff, Performance* pp);
+    SolverResult derivative(double* J0w, const double* w, const double* f0,
+                            const double* x0, double* setoff, Performance* pp);
 
     /// returns the length of the vectors used by this function
     virtual dim_t getLen() = 0;
 
-    esysUtils::JMPI mpi_info;
+    const escript::JMPI mpi_info;
 };
 
 struct LinearSystem : public Function
@@ -47,7 +48,7 @@ struct LinearSystem : public Function
     LinearSystem(SystemMatrix_ptr A, double* b, Options* options);
     virtual ~LinearSystem();
 
-    virtual err_t call(double* value, const double* arg, Performance* pp);
+    virtual SolverResult call(double* value, const double* arg, Performance* pp);
 
     virtual dim_t getLen() { return n; }
 
diff --git a/paso/src/GMRES.cpp b/paso/src/GMRES.cpp
index 8754419..07c32fa 100644
--- a/paso/src/GMRES.cpp
+++ b/paso/src/GMRES.cpp
@@ -62,16 +62,21 @@
 #include "Solver.h"
 
 #include <cstring> // memset&memcpy
+
 namespace paso {
 
-err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
-                   double* tolerance, dim_t Length_of_recursion, dim_t restart,
-                   Performance* pp)
+SolverResult Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
+                          double* tolerance, dim_t Length_of_recursion,
+                          dim_t restart, Performance* pp)
 {
     if (Length_of_recursion <= 0) {
-        return SOLVER_INPUT_ERROR;
+        return InputError;
     }
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
     double *AP,**X_PRES,**R_PRES,**P_PRES, *dots, *loc_dots;
     double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
     double R_PRES_dot_AP0,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,breakf0;
@@ -80,7 +85,7 @@ err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     dim_t maxit,Num_iter_global=0,num_iter_restart=0,num_iter;
     dim_t i,z,order, th, local_n , rest, n_start ,n_end;
     bool breakFlag=false, maxIterFlag=false, convergeFlag=false,restartFlag=false;
-    err_t Status=SOLVER_NO_ERROR;
+    SolverResult status = NoError;
 
     // adapt original routine parameters
     const dim_t n = A->getTotalNumRows();
@@ -153,7 +158,7 @@ err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
         /***
          *** apply A to P to get AP
          ***/
-        SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &P_PRES[0][0], PASO_ZERO, &AP[0]);
+        A->MatrixVector_CSR_OFFSET0(PASO_ONE, &P_PRES[0][0], PASO_ZERO, &AP[0]);
         /***
          ***** calculation of the norm of R and the scalar products of
          ***   the residuals and A*P:
@@ -582,9 +587,9 @@ err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
         Norm_of_residual_global=norm_of_residual;
         Num_iter_global=num_iter;
         if (maxIterFlag) {
-               Status = SOLVER_MAXITER_REACHED;
+               status = MaxIterReached;
            } else if (breakFlag) {
-               Status = SOLVER_BREAKDOWN;
+               status = Breakdown;
         }
     for (i=0; i<Length_of_mem; i++) {
         delete[] X_PRES[i];
@@ -603,7 +608,7 @@ err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     delete[] loc_dots;
     *iter=Num_iter_global;
     *tolerance=Norm_of_residual_global;
-    return Status;
+    return status;
 }
 
 } // namespace paso
diff --git a/paso/src/GMRES2.cpp b/paso/src/GMRES2.cpp
index 73b2193..ca136c6 100644
--- a/paso/src/GMRES2.cpp
+++ b/paso/src/GMRES2.cpp
@@ -14,15 +14,16 @@
 *
 *****************************************************************************/
 
-#include <iostream>
 #include "Solver.h"
 #include "PasoUtil.h"
 
+#include <iostream>
+
 namespace paso {
 
-err_t Solver_GMRES2(Function* F, const double* f0, const double* x0,
-                    double* dx, dim_t* iter, double* tolerance,
-                    Performance* pp)
+SolverResult Solver_GMRES2(Function* F, const double* f0, const double* x0,
+                           double* dx, dim_t* iter, double* tolerance,
+                           Performance* pp)
 {
     static double RENORMALIZATION_CONST=0.001;
     const dim_t l=(*iter)+1, iter_max=*iter;
@@ -33,10 +34,10 @@ err_t Solver_GMRES2(Function* F, const double* f0, const double* x0,
     bool breakFlag = false, maxIterFlag = false, convergeFlag = false;
 
     if (n < 0 || iter_max<=0 || l<1 || rel_tol<0) {
-        return SOLVER_INPUT_ERROR;
+        return InputError;
     }
 
-    err_t Status=SOLVER_NO_ERROR;
+    SolverResult status=NoError;
 
     double* h = new double[l*l];
     double** v = new double*[l];
@@ -53,9 +54,9 @@ err_t Solver_GMRES2(Function* F, const double* f0, const double* x0,
     /*
      *  the show begins:
      */
-    normf0 = util::l2(n,f0,F->mpi_info);
+    normf0 = util::l2(n, f0, F->mpi_info);
     k = 0;
-    convergeFlag = (ABS(normf0)<=0);
+    convergeFlag = (std::abs(normf0)<=0);
     if (!convergeFlag) {
         abs_tol = rel_tol*normf0;
         std::cout << "GMRES2 initial residual norm " << normf0
@@ -64,7 +65,7 @@ err_t Solver_GMRES2(Function* F, const double* f0, const double* x0,
         util::zeroes(n, v[0]);
         util::update(n, 1., v[0], -1./normf0, f0); // v = -1./normf0*f0
         g[0] = normf0;
-        while (!breakFlag && !maxIterFlag && !convergeFlag && Status==SOLVER_NO_ERROR) {
+        while (!breakFlag && !maxIterFlag && !convergeFlag && status==NoError) {
             k++;
             v[k]=new double[n];
             /*
@@ -148,7 +149,7 @@ err_t Solver_GMRES2(Function* F, const double* f0, const double* x0,
     delete[] work;
     *iter=k;
     *tolerance=norm_of_residual;
-    return Status;
+    return status;
 }
 
 } // namespace paso
diff --git a/paso/src/GSMPI.cpp.old b/paso/src/GSMPI.cpp.old
deleted file mode 100644
index ce09aea..0000000
--- a/paso/src/GSMPI.cpp.old
+++ /dev/null
@@ -1,605 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/************************************************************************************/
-
-/* Paso: GS preconditioner with reordering                    */
-
-/************************************************************************************/
-
-/* Author: l.gao at uq.edu.au                                    */
-
-/************************************************************************************/
-
-#include "Paso.h"
-#include "SystemMatrix.h"
-#include "Solver.h"
-#include "PasoUtil.h"
-
-#include <stdio.h>
-
-/************************************************************************************/
-
-/* free all memory used by GS                                */
-
-void Solver_GSMPI_free(Solver_GS * in) {
-     if (in!=NULL) {
-        delete[] in->colorOf;
-        SparseMatrix_free(in->factors);
-        delete[] in->diag;
-        delete[] in->main_iptr;
-        Pattern_free(in->pattern);
-        delete in;
-     }
-}
-
-==========================================================================
-/************************************************************************************/
-
-/*   gs->diag saves the matrix of D{-1}
-     This is different from Solver_getGS(), in which, gs->diag
-     is the matrix D.
-*/
-Solver_GS* Solver_getGSMPI(SparseMatrix * A,bool_t verbose) {
-  dim_t n=A->numRows;
-  dim_t n_block=A->row_block_size;
-  dim_t block_size=A->block_size;
-  register index_t i,iptr_main=0,iPtr;
-  double time0=0,time_color=0,time_fac=0;
-  double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
-
-  /* allocations: */
-/*  printf("n_block= %d, n= %d\n", n_block, n); */
-  Solver_GS* out=new Solver_GS;
-  out->colorOf=new index_t[n];
-  out->diag=new double[ ((size_t) n) * ((size_t) block_size)];
-  /*out->diag=new double[A->len];*/
-  out->main_iptr=new index_t[n];
-  out->pattern=Pattern_getReference(A->pattern);
-  out->factors=SparseMatrix_getReference(A);
-  out->n_block=n_block;
-  out->n=n;
-
-  if ( !(checkPtr(out->colorOf) || checkPtr(out->main_iptr) || checkPtr(out->factors)) ) {
-    time0=timer();
-    Pattern_color(A->pattern,&out->num_colors,out->colorOf);
-    time_color=timer()-time0;
-
-    if (noError()) {
-       time0=timer();
-
-       if (! (checkPtr(out->diag))) {
-             if (n_block==1) {
-                #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
-                for (i = 0; i < A->pattern->numOutput; i++) {
-                   iptr_main=0;
-                   out->diag[i]=1.;
-                   /* find main diagonal */
-                   for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
-                       if (A->pattern->index[iPtr]==i) {
-                           iptr_main=iPtr;
-                           if (ABS(A->val[iPtr]) > 0.) {
-                                out->diag[i]=1./(A->val[iPtr]);
-                           } else {
-                                setError(ZERO_DIVISION_ERROR, "Solver_getGSMPI: non-regular main diagonal block.");
-                           }
-                           break;
-                       }
-                   }
-                   out->main_iptr[i]=iptr_main;
-                }
-             } else if (n_block==2) {
-                #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
-                for (i = 0; i < A->pattern->numOutput; i++) {
-                   out->diag[i*4+0]= 1.;
-                   out->diag[i*4+1]= 0.;
-                   out->diag[i*4+2]= 0.;
-                   out->diag[i*4+3]= 1.;
-                   iptr_main=0;
-                   /* find main diagonal */
-                   for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
-                       if (A->pattern->index[iPtr]==i) {
-                           iptr_main=iPtr;
-                           A11=A->val[iPtr*4];
-                           A21=A->val[iPtr*4+1];
-                           A12=A->val[iPtr*4+2];
-                           A22=A->val[iPtr*4+3];
-                           D = A11*A22-A12*A21;
-                           if (ABS(D)>0.) {
-                                D=1./D;
-                                out->diag[i*4  ]=  A22*D;
-                                out->diag[i*4+1]= -A21*D;
-                                out->diag[i*4+2]= -A12*D;
-                                out->diag[i*4+3]=  A11*D;
-                           } else {
-                                setError(ZERO_DIVISION_ERROR, "Solver_getGSMPI: non-regular main diagonal block.");
-                           }
-                           break;
-                       }
-                   }
-                   out->main_iptr[i]=iptr_main;
-                }
-             } else if (n_block==3) {
-                #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
-                for (i = 0; i < A->pattern->numOutput; i++) {
-                   out->diag[i*9  ]=1.;
-                   out->diag[i*9+1]=0.;
-                   out->diag[i*9+2]=0.;
-                   out->diag[i*9+3]=0.;
-                   out->diag[i*9+4]=1.;
-                   out->diag[i*9+5]=0.;
-                   out->diag[i*9+6]=0.;
-                   out->diag[i*9+7]=0.;
-                   out->diag[i*9+8]=1.;
-                   iptr_main=0;
-                   /* find main diagonal */
-                   for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
-                       if (A->pattern->index[iPtr]==i) {
-                           iptr_main=iPtr;
-                           A11=A->val[iPtr*9  ];
-                           A21=A->val[iPtr*9+1];
-                           A31=A->val[iPtr*9+2];
-                           A12=A->val[iPtr*9+3];
-                           A22=A->val[iPtr*9+4];
-                           A32=A->val[iPtr*9+5];
-                           A13=A->val[iPtr*9+6];
-                           A23=A->val[iPtr*9+7];
-                           A33=A->val[iPtr*9+8];
-                           D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
-                           if (ABS(D)>0.) {
-                                  D=1./D;
-                                  out->diag[i*9  ]= (A22*A33-A23*A32)*D;
-                                  out->diag[i*9+1]= (A31*A23-A21*A33)*D;
-                                  out->diag[i*9+2]= (A21*A32-A31*A22)*D;
-                                  out->diag[i*9+3]= (A13*A32-A12*A33)*D;
-                                  out->diag[i*9+4]= (A11*A33-A31*A13)*D;
-                                  out->diag[i*9+5]= (A12*A31-A11*A32)*D;
-                                  out->diag[i*9+6]= (A12*A23-A13*A22)*D;
-                                  out->diag[i*9+7]= (A13*A21-A11*A23)*D;
-                                  out->diag[i*9+8]= (A11*A22-A12*A21)*D;
-                           } else {
-                                setError(ZERO_DIVISION_ERROR, "Solver_getGSMPI: non-regular main diagonal block.");
-                           }
-                           break;
-                       }
-                   }
-                   out->main_iptr[i]=iptr_main;
-                }
-             }
-       }
-
-       time_fac=timer()-time0;
-     }
-  }
-  if (noError()) {
-      if (verbose) {
-         printf("GS_MPI: %d color used \n",out->num_colors);
-         printf("timing: GS_MPI: coloring/elimination : %e/%e\n",time_color,time_fac);
-     }
-     return out;
-  } else  {
-     Solver_GSMPI_free(out);
-     return NULL;
-  }
-}
-
-void Solver_GS_local(SystemMatrix* A, Solver_GS * gs, double * x, double * b);
-
-/************************************************************************************/
-
-/* Applies MPI versioned GS
-
-     In fact it solves Ax=b in two steps:
-     step 1: among different nodes (MPI ranks), we use block Jacobi
-	   x{k} = x{k-1} + D{-1}(b-A*x{k-1})
-        => D*x{k} = b - (E+F)x{k-1}
-	      where matrix D is (let p be the number of nodes):
-	       --------------------
-	       |A1|  |  | ...  |  |
-	       --------------------
-	       |  |A2|  | ...  |  |
-	       --------------------
-	       |  |  |A3| ...  |  |
-	       --------------------
-               |          ...     |
-	       --------------------
-               |  |  |  | ...  |Ap|
-	       --------------------
-	      and Ai (i \in [1,p]) represents the mainBlock of matrix
-	      A on node i. Matrix (E+F) is represented as the coupleBlock
-              of matrix A on each node (annotated as ACi).
-           Therefore, step 1 can be turned into the following for node i:
-       => Ai * x{k} = b - ACi * x{k-1}
-           where both x{k} and b are the segment of x and b on node i,
-	   and x{k-1} is the old segment values of x on all other nodes.
-
-     step 2: inside node i, we use Gauss-Seidel
-         let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
-         by using symmetric Gauss-Seidel, this can be solved in a forward
-         phase and a backward phase:
-	   forward phase:  x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
-           backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
-*/
-
-void Solver_solveGSMPI(SystemMatrix* A, Solver_GS * gs, double * x, double * b) {
-     register dim_t i;
-     dim_t n_block=gs->n_block;
-     dim_t n=gs->n;
-     dim_t sweeps=gs->sweeps;
-
-     /*xi{0} = 0
-       xi{1} = Ai{-1} * bi
-       xi{2} = Ai{-1} * (bi - ACi * xj{1})
-       ...
-       xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
-     #pragma omp parallel for private(i) schedule(static)
-     for (i=0;i<n*n_block;++i) x[i]=0;
-
-     Solver_GS_local(A,gs,x,b);
-
-     if (sweeps > 1) {
-          double *new_b=new double[n*n_block];
-          double *remote_x=NULL;
-
-          while (sweeps > 1) {
-               /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
-                  values of x, which requires MPI communication */
-               #pragma omp parallel for private(i) schedule(static)
-               for (i=0;i<n*n_block;++i) new_b[i]=b[i];
-
-               if (A->col_coupleBlock->pattern->ptr!=NULL){
-                    SystemMatrix_startCollect(A,x);
-                    remote_x=SystemMatrix_finishCollect(A);
-                    SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
-               }
-
-               Solver_GS_local(A,gs,x,new_b);
-               sweeps --;
-          }
-          delete[] new_b;
-     }
-
-     return;
-}
-
-/* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
-void Solver_GS_local(SystemMatrix* A, Solver_GS * gs, double * x, double * b) {
-     dim_t n_block=gs->n_block;
-     dim_t n=gs->n;
-     double sum0, sum1, sum2, X0, X1, X2;
-     double *val=A->mainBlock->val;
-     double *diag=gs->diag;
-     index_t *ptr=gs->pattern->ptr;
-     index_t *index=gs->pattern->index;
-     dim_t i, j, iptr, xi, ai, xj, aj;
-#ifdef _OPENMP
-     dim_t nt, len, rest, t, istart, iend;
-
-     nt=omp_get_max_threads();
-     len=n/nt;
-     rest=n-len*nt;
-#endif
-     /* TO BE DONE: add handler to deal with the case "n is too small"
-                    to be worth run in threads. */
-
-#ifdef _OPENMP
-     /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x values
-        computed by other threads in previous sweep */
-     if (nt > 1) {
-     if (n_block == 1){
-         #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
-         for (t=0; t<nt; t++) {
-              istart=len*t+MIN(t,rest);
-              iend=istart+len+(t<rest ? 1:0);
-              for (i=istart; i<iend; i++){
-                   sum0=b[i];
-                   for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
-                        j=index[iptr];
-                        if (j<istart || j>=iend){
-                            sum0 = sum0 - val[iptr] * x[j];
-                        }
-                   }
-                   b[i]=sum0;
-              }
-         }
-     } else if (n_block == 2) {
-         #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
-         for (t=0; t<nt; t++) {
-              istart=len*t+MIN(t,rest);
-              iend=istart+len+(t<rest ? 1:0);
-              for (i=istart; i<iend; i++){
-                   xi=2*i;
-                   sum0=b[xi];
-                   sum1=b[xi+1];
-                   for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
-                        j=index[iptr];
-                        if (j<istart || j>=iend){
-                            xj=2*j;
-                            aj=4*iptr;
-                            X0=x[xj];
-                            X1=x[xj+1];
-                            sum0 = sum0 - val[aj  ]*X0 - val[aj+2]*X1;
-                            sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
-                        }
-                   }
-                   b[xi]=sum0;
-                   b[xi+1]=sum1;
-              }
-         }
-     } else if (n_block == 3) {
-         #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
-         for (t=0; t<nt; t++) {
-              istart=len*t+MIN(t,rest);
-              iend=istart+len+(t<rest ? 1:0);
-              for (i=istart; i<iend; i++){
-                   xi=3*i;
-                   sum0=b[xi];
-                   sum1=b[xi+1];
-                   sum2=b[xi+2];
-                   for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
-                        j=index[iptr];
-                        if (j<istart || j>=iend){
-                            xj=3*j;
-                            aj=9*iptr;
-                            X0=x[xj];
-                            X1=x[xj+1];
-                            X2=x[xj+2];
-                            sum0 = sum0 - val[aj  ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
-                            sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
-                            sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
-                        }
-                   }
-                   b[xi]=sum0;
-                   b[xi+1]=sum1;
-                   b[xi+2]=sum2;
-              }
-         }
-     }
-     }
-#endif
-
-     /* step 1: forward iteration
-               x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
-     /* One Gauss-Seidel iteration
-        In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
-           => into a loop (without coloring):
-            for i in [0,n-1] do
-               x_i = (1/a_ii) *
-                 (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
-        where the first "\sum" sums up newly updated values of x elements
-        while the second "\sum" sums up previous (old) values of x elements.
-        In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
-     */
-     if (n_block == 1){
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
-         for (t=0; t<nt; t++) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=istart; i<iend; i++){
-#else
-         for (i=0; i<n; i++) {
-#endif
-              sum0 = b[i];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       sum0 = sum0 - val[iptr] * x[j];
-                   }
-              }
-              x[i] = sum0*diag[i];
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     } else if (n_block == 2) {
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
-         for (t=0; t<nt; t++) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=istart; i<iend; i++){
-#else
-         for (i=0; i<n; i++) {
-#endif
-              xi=2*i;
-              ai=4*i;
-              sum0 = b[xi];
-              sum1 = b[xi+1];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       xj=2*j;
-                       aj=4*iptr;
-                       X0=x[xj];
-                       X1=x[xj+1];
-                       sum0 = sum0 - val[aj  ]*X0 - val[aj+2]*X1;
-                       sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
-                   }
-              }
-              x[xi  ]=diag[ai  ]*sum0 + diag[ai+2]*sum1;
-              x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     } else if (n_block == 3) {
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
-         for (t=0; t<nt; t++) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=istart; i<iend; i++){
-#else
-         for (i=0; i<n; i++) {
-#endif
-              xi=3*i;
-              ai=9*i;
-              sum0 = b[xi];
-              sum1 = b[xi+1];
-              sum2 = b[xi+2];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       xj=3*j;
-                       aj=9*iptr;
-                       X0=x[xj];
-                       X1=x[xj+1];
-                       X2=x[xj+2];
-                       sum0 = sum0 - val[aj  ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
-                       sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
-                       sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
-                   }
-              }
-              x[xi  ] = diag[ai  ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
-              x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
-              x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     }
-
-     /* step 2: backward iteration
-               x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
-     if (n_block == 1){
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
-         for (t=nt-1; t>=0; t--) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=iend-1; i>=istart; i--){
-#else
-         for (i=n-1; i>=0; i--) {
-#endif
-              sum0 = b[i];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       sum0 = sum0 - val[iptr] * x[j];
-                   }
-              }
-              x[i] = sum0*diag[i];
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     } else if (n_block == 2) {
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
-         for (t=nt-1; t>=0; t--) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=iend-1; i>=istart; i--){
-#else
-         for (i=n-1; i>=0; i--) {
-#endif
-              xi=2*i;
-              ai=4*i;
-              sum0 = b[xi];
-              sum1 = b[xi+1];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       xj=2*j;
-                       aj=4*iptr;
-                       X0=x[xj];
-                       X1=x[xj+1];
-                       sum0 = sum0 - val[aj  ]*X0 - val[aj+2]*X1;
-                       sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
-                   }
-              }
-              x[xi  ]=diag[ai  ]*sum0 + diag[ai+2]*sum1;
-              x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     } else if (n_block == 3) {
-#ifdef _OPENMP
-         #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
-         for (t=nt-1; t>=0; t--) {
-           istart=len*t+MIN(t,rest);
-           iend=istart+len+(t<rest ? 1:0);
-           for (i=iend-1; i>=istart; i--){
-#else
-         for (i=n-1; i>=0; i--) {
-#endif
-              xi=3*i;
-              ai=9*i;
-              sum0 = b[xi];
-              sum1 = b[xi+1];
-              sum2 = b[xi+2];
-              for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
-                   j=index[iptr];
-#ifdef _OPENMP
-                   if (j >= istart && j < iend && i != j){
-#else
-                   if (i != j) {
-#endif
-                       xj=3*j;
-                       aj=9*iptr;
-                       X0=x[xj];
-                       X1=x[xj+1];
-                       X2=x[xj+2];
-                       sum0 = sum0 - val[aj  ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
-                       sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
-                       sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
-                   }
-              }
-              x[xi  ] = diag[ai  ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
-              x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
-              x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
-#ifdef _OPENMP
-           }
-         }
-#else
-         }
-#endif
-     }
-}
-
diff --git a/paso/src/ILU.cpp b/paso/src/ILU.cpp
index 0079313..94f6fc2 100644
--- a/paso/src/ILU.cpp
+++ b/paso/src/ILU.cpp
@@ -27,8 +27,8 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "Preconditioner.h"
 #include "PasoUtil.h"
+#include "Preconditioner.h"
 
 namespace paso {
 
@@ -43,128 +43,126 @@ void Solver_ILU_free(Solver_ILU * in)
 /// constructs the incomplete block factorization
 Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
 {
-  const dim_t n=A->numRows;
-  const dim_t n_block=A->row_block_size;
-  const index_t* colorOf = A->pattern->borrowColoringPointer();
-  const dim_t num_colors = A->pattern->getNumColors();
-  const index_t *ptr_main = A->borrowMainDiagonalPointer();
-  register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
-  register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
-  register index_t i,iptr_main,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2, iptr;
-  double time0=0,time_fac=0;
-  /* allocations: */
-  Solver_ILU* out=new Solver_ILU;
-  out->factors=new double[A->len];
+    const dim_t n=A->numRows;
+    const dim_t n_block=A->row_block_size;
+    const index_t* colorOf = A->pattern->borrowColoringPointer();
+    const dim_t num_colors = A->pattern->getNumColors();
+    const index_t *ptr_main = A->borrowMainDiagonalPointer();
+    double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
+    double S11,S12,S13,S21,S22,S23,S31,S32,S33;
+    index_t i,iptr_main,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2, iptr;
+    Solver_ILU* out=new Solver_ILU;
+    out->factors=new double[A->len];
 
-  if ( ! Esys_checkPtr(out->factors)  ) {
+    double time0 = escript::gettime();
 
-       time0=Esys_timer();
+#pragma omp parallel for schedule(static) private(i,iptr,k)
+    for (i = 0; i < n; ++i) {
+        for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
+            for (k=0;k<n_block*n_block;++k)
+                out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
+        }
+    }
 
-       #pragma omp parallel for schedule(static) private(i,iptr,k)
-       for (i = 0; i < n; ++i) {
-               for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
-                     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
-               }
-       }
-       /* start factorization */
-       for (color=0;color<num_colors && Esys_noError();++color) {
-              if (n_block==1) {
-                 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
-                 for (i = 0; i < n; ++i) {
-                    if (colorOf[i]==color) {
-                       for (color2=0;color2<color;++color2) {
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]==color2) {
+    // start factorization
+    for (color=0; color<num_colors; ++color) {
+        if (n_block==1) {
+#pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    for (color2=0;color2<color;++color2) {
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]==color2) {
                                 A11=out->factors[iptr_ik];
                                 /* a_ij=a_ij-a_ik*a_kj */
                                 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
-                                   j=A->pattern->index[iptr_kj];
-                                   if (colorOf[j]>color2) {
-                                      S11=out->factors[iptr_kj];
-                                      for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
-                                         if (j==A->pattern->index[iptr_ij]) {
-                                            out->factors[iptr_ij]-=A11*S11;
-                                            break;
-                                         }
-                                      }
-                                   }
+                                    j=A->pattern->index[iptr_kj];
+                                    if (colorOf[j]>color2) {
+                                        S11=out->factors[iptr_kj];
+                                        for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
+                                            if (j==A->pattern->index[iptr_ij]) {
+                                                out->factors[iptr_ij]-=A11*S11;
+                                                break;
+                                            }
+                                        }
+                                    }
                                 }
-                             }
-                          }
-                       }
-                       iptr_main=ptr_main[i];
-                       D=out->factors[iptr_main];
-                       if (ABS(D)>0.) {
-                          D=1./D;
-                          out->factors[iptr_main]=D;
-                          /* a_ik=a_ii^{-1}*a_ik */
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]>color) {
+                            }
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    D=out->factors[iptr_main];
+                    if (std::abs(D)>0.) {
+                        D=1./D;
+                        out->factors[iptr_main]=D;
+                        /* a_ik=a_ii^{-1}*a_ik */
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]>color) {
                                 A11=out->factors[iptr_ik];
                                 out->factors[iptr_ik]=A11*D;
-                             }
-                          }
-                       } else {
-                            Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
-                       }
+                            }
+                        }
+                    } else {
+                        throw PasoException("Solver_getILU: non-regular main diagonal block.");
                     }
-                 }
-              } else if (n_block==2) {
-                 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
-                 for (i = 0; i < n; ++i) {
-                    if (colorOf[i]==color) {
-                       for (color2=0;color2<color;++color2) {
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]==color2) {
+                }
+            }
+        } else if (n_block==2) {
+#pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    for (color2=0;color2<color;++color2) {
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]==color2) {
                                 A11=out->factors[iptr_ik*4  ];
                                 A21=out->factors[iptr_ik*4+1];
                                 A12=out->factors[iptr_ik*4+2];
                                 A22=out->factors[iptr_ik*4+3];
                                 /* a_ij=a_ij-a_ik*a_kj */
                                 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
-                                   j=A->pattern->index[iptr_kj];
-                                   if (colorOf[j]>color2) {
-                                      S11=out->factors[iptr_kj*4];
-                                      S21=out->factors[iptr_kj*4+1];
-                                      S12=out->factors[iptr_kj*4+2];
-                                      S22=out->factors[iptr_kj*4+3];
-                                      for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
-                                         if (j==A->pattern->index[iptr_ij]) {
-                                            out->factors[4*iptr_ij  ]-=A11*S11+A12*S21;
-                                            out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
-                                            out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
-                                            out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
-                                            break;
-                                         }
-                                      }
-                                   }
+                                    j=A->pattern->index[iptr_kj];
+                                    if (colorOf[j]>color2) {
+                                        S11=out->factors[iptr_kj*4];
+                                        S21=out->factors[iptr_kj*4+1];
+                                        S12=out->factors[iptr_kj*4+2];
+                                        S22=out->factors[iptr_kj*4+3];
+                                        for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
+                                            if (j==A->pattern->index[iptr_ij]) {
+                                                out->factors[4*iptr_ij  ]-=A11*S11+A12*S21;
+                                                out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
+                                                out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
+                                                out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
+                                                break;
+                                            }
+                                        }
+                                    }
                                 }
-                             }
-                          }
-                       }
-                       iptr_main=ptr_main[i];
-                       A11=out->factors[iptr_main*4];
-                       A21=out->factors[iptr_main*4+1];
-                       A12=out->factors[iptr_main*4+2];
-                       A22=out->factors[iptr_main*4+3];
-                       D = A11*A22-A12*A21;
-                       if (ABS(D)>0.) {
-                          D=1./D;
-                          S11= A22*D;
-                          S21=-A21*D;
-                          S12=-A12*D;
-                          S22= A11*D;
-                          out->factors[iptr_main*4]  = S11;
-                          out->factors[iptr_main*4+1]= S21;
-                          out->factors[iptr_main*4+2]= S12;
-                          out->factors[iptr_main*4+3]= S22;
-                          /* a_ik=a_ii^{-1}*a_ik */
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]>color) {
+                            }
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    A11=out->factors[iptr_main*4];
+                    A21=out->factors[iptr_main*4+1];
+                    A12=out->factors[iptr_main*4+2];
+                    A22=out->factors[iptr_main*4+3];
+                    D = A11*A22-A12*A21;
+                    if (std::abs(D)>0.) {
+                        D=1./D;
+                        S11= A22*D;
+                        S21=-A21*D;
+                        S12=-A12*D;
+                        S22= A11*D;
+                        out->factors[iptr_main*4]  = S11;
+                        out->factors[iptr_main*4+1]= S21;
+                        out->factors[iptr_main*4+2]= S12;
+                        out->factors[iptr_main*4+3]= S22;
+                        /* a_ik=a_ii^{-1}*a_ik */
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]>color) {
                                 A11=out->factors[iptr_ik*4  ];
                                 A21=out->factors[iptr_ik*4+1];
                                 A12=out->factors[iptr_ik*4+2];
@@ -173,21 +171,21 @@ Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
                                 out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
                                 out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
                                 out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
-                             }
-                          }
-                       } else {
-                            Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
-                       }
+                            }
+                        }
+                    } else {
+                        throw PasoException("Solver_getILU: non-regular main diagonal block.");
                     }
-                 }
-              } else if (n_block==3) {
-                 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
-                 for (i = 0; i < n; ++i) {
-                    if (colorOf[i]==color) {
-                       for (color2=0;color2<color;++color2) {
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]==color2) {
+                }
+            }
+        } else if (n_block==3) {
+#pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    for (color2=0;color2<color;++color2) {
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]==color2) {
                                 A11=out->factors[iptr_ik*9  ];
                                 A21=out->factors[iptr_ik*9+1];
                                 A31=out->factors[iptr_ik*9+2];
@@ -199,73 +197,73 @@ Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
                                 A33=out->factors[iptr_ik*9+8];
                                 /* a_ij=a_ij-a_ik*a_kj */
                                 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
-                                   j=A->pattern->index[iptr_kj];
-                                   if (colorOf[j]>color2) {
-                                      S11=out->factors[iptr_kj*9  ];
-                                      S21=out->factors[iptr_kj*9+1];
-                                      S31=out->factors[iptr_kj*9+2];
-                                      S12=out->factors[iptr_kj*9+3];
-                                      S22=out->factors[iptr_kj*9+4];
-                                      S32=out->factors[iptr_kj*9+5];
-                                      S13=out->factors[iptr_kj*9+6];
-                                      S23=out->factors[iptr_kj*9+7];
-                                      S33=out->factors[iptr_kj*9+8];
-                                      for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
-                                         if (j==A->pattern->index[iptr_ij]) {
-                                            out->factors[iptr_ij*9  ]-=A11*S11+A12*S21+A13*S31;
-                                            out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
-                                            out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
-                                            out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
-                                            out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
-                                            out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
-                                            out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
-                                            out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
-                                            out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
-                                            break;
-                                         }
-                                      }
-                                   }
+                                    j=A->pattern->index[iptr_kj];
+                                    if (colorOf[j]>color2) {
+                                        S11=out->factors[iptr_kj*9  ];
+                                        S21=out->factors[iptr_kj*9+1];
+                                        S31=out->factors[iptr_kj*9+2];
+                                        S12=out->factors[iptr_kj*9+3];
+                                        S22=out->factors[iptr_kj*9+4];
+                                        S32=out->factors[iptr_kj*9+5];
+                                        S13=out->factors[iptr_kj*9+6];
+                                        S23=out->factors[iptr_kj*9+7];
+                                        S33=out->factors[iptr_kj*9+8];
+                                        for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
+                                            if (j==A->pattern->index[iptr_ij]) {
+                                                out->factors[iptr_ij*9  ]-=A11*S11+A12*S21+A13*S31;
+                                                out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
+                                                out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
+                                                out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
+                                                out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
+                                                out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
+                                                out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
+                                                out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
+                                                out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
+                                                break;
+                                            }
+                                        }
+                                    }
                                 }
-                             }
-                          }
-                       }
-                       iptr_main=ptr_main[i];
-                       A11=out->factors[iptr_main*9  ];
-                       A21=out->factors[iptr_main*9+1];
-                       A31=out->factors[iptr_main*9+2];
-                       A12=out->factors[iptr_main*9+3];
-                       A22=out->factors[iptr_main*9+4];
-                       A32=out->factors[iptr_main*9+5];
-                       A13=out->factors[iptr_main*9+6];
-                       A23=out->factors[iptr_main*9+7];
-                       A33=out->factors[iptr_main*9+8];
-                       D  =  A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
-                       if (ABS(D)>0.) {
-                          D=1./D;
-                          S11=(A22*A33-A23*A32)*D;
-                          S21=(A31*A23-A21*A33)*D;
-                          S31=(A21*A32-A31*A22)*D;
-                          S12=(A13*A32-A12*A33)*D;
-                          S22=(A11*A33-A31*A13)*D;
-                          S32=(A12*A31-A11*A32)*D;
-                          S13=(A12*A23-A13*A22)*D;
-                          S23=(A13*A21-A11*A23)*D;
-                          S33=(A11*A22-A12*A21)*D;
+                            }
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    A11=out->factors[iptr_main*9  ];
+                    A21=out->factors[iptr_main*9+1];
+                    A31=out->factors[iptr_main*9+2];
+                    A12=out->factors[iptr_main*9+3];
+                    A22=out->factors[iptr_main*9+4];
+                    A32=out->factors[iptr_main*9+5];
+                    A13=out->factors[iptr_main*9+6];
+                    A23=out->factors[iptr_main*9+7];
+                    A33=out->factors[iptr_main*9+8];
+                    D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
+                    if (std::abs(D)>0.) {
+                        D=1./D;
+                        S11=(A22*A33-A23*A32)*D;
+                        S21=(A31*A23-A21*A33)*D;
+                        S31=(A21*A32-A31*A22)*D;
+                        S12=(A13*A32-A12*A33)*D;
+                        S22=(A11*A33-A31*A13)*D;
+                        S32=(A12*A31-A11*A32)*D;
+                        S13=(A12*A23-A13*A22)*D;
+                        S23=(A13*A21-A11*A23)*D;
+                        S33=(A11*A22-A12*A21)*D;
 
-                          out->factors[iptr_main*9  ]=S11;
-                          out->factors[iptr_main*9+1]=S21;
-                          out->factors[iptr_main*9+2]=S31;
-                          out->factors[iptr_main*9+3]=S12;
-                          out->factors[iptr_main*9+4]=S22;
-                          out->factors[iptr_main*9+5]=S32;
-                          out->factors[iptr_main*9+6]=S13;
-                          out->factors[iptr_main*9+7]=S23;
-                          out->factors[iptr_main*9+8]=S33;
+                        out->factors[iptr_main*9  ]=S11;
+                        out->factors[iptr_main*9+1]=S21;
+                        out->factors[iptr_main*9+2]=S31;
+                        out->factors[iptr_main*9+3]=S12;
+                        out->factors[iptr_main*9+4]=S22;
+                        out->factors[iptr_main*9+5]=S32;
+                        out->factors[iptr_main*9+6]=S13;
+                        out->factors[iptr_main*9+7]=S23;
+                        out->factors[iptr_main*9+8]=S33;
 
-                          /* a_ik=a_ii^{-1}*a_ik */
-                          for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                             k=A->pattern->index[iptr_ik];
-                             if (colorOf[k]>color) {
+                        /* a_ik=a_ii^{-1}*a_ik */
+                        for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                            k=A->pattern->index[iptr_ik];
+                            if (colorOf[k]>color) {
                                 A11=out->factors[iptr_ik*9  ];
                                 A21=out->factors[iptr_ik*9+1];
                                 A31=out->factors[iptr_ik*9+2];
@@ -284,30 +282,27 @@ Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
                                 out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
                                 out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
                                 out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
-                             }
-                          }
-                       } else {
-                            Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
-                       }
+                            }
+                        }
+                    } else {
+                        throw PasoException("Solver_getILU: non-regular main diagonal block.");
                     }
-                 }
-              } else {
-                 Esys_setError(VALUE_ERROR, "Solver_getILU: block size greater than 3 is not supported.");
-              }
-              #pragma omp barrier
-       }
-       time_fac=Esys_timer()-time0;
-  }
-  if (Esys_noError()) {
-      if (verbose) printf("timing: ILU: coloring/elimination: %e sec\n",time_fac);
-     return out;
-  } else  {
-     Solver_ILU_free(out);
-     return NULL;
-  }
+                }
+            }
+        } else {
+            throw PasoException("Solver_getILU: block size greater than 3 is not supported.");
+        }
+#pragma omp barrier
+    }
+
+    if (verbose) {
+        const double time_fac=escript::gettime()-time0;
+        printf("timing: ILU: coloring/elimination: %e sec\n",time_fac);
+    }
+    return out;
 }
 
-/************************************************************************************/
+/****************************************************************************/
 
 /* Applies ILU precondition b-> x
 
@@ -321,151 +316,152 @@ Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
 void Solver_solveILU(SparseMatrix_ptr A, Solver_ILU* ilu, double* x,
                      const double* b)
 {
-     register dim_t i,k;
-     register index_t color,iptr_ik,iptr_main;
-     register double S1,S2,S3,R1,R2,R3;
-     const dim_t n=A->numRows;
-     const dim_t n_block=A->row_block_size;
-     const index_t* colorOf = A->pattern->borrowColoringPointer();
-     const dim_t num_colors = A->pattern->getNumColors();
-     const index_t *ptr_main = A->borrowMainDiagonalPointer();
+    dim_t i,k;
+    index_t color,iptr_ik,iptr_main;
+    double S1,S2,S3,R1,R2,R3;
+    const dim_t n=A->numRows;
+    const dim_t n_block=A->row_block_size;
+    const index_t* colorOf = A->pattern->borrowColoringPointer();
+    const dim_t num_colors = A->pattern->getNumColors();
+    const index_t *ptr_main = A->borrowMainDiagonalPointer();
 
-     /* copy x into b */
-     #pragma omp parallel for private(i) schedule(static)
-     for (i=0;i<n*n_block;++i) x[i]=b[i];
-     /* forward substitution */
-     for (color=0;color<num_colors;++color) {
-           if (n_block==1) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[i];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]<color) {
-                             R1=x[k];
-                             S1-=ilu->factors[iptr_ik]*R1;
-                          }
-                     }
-                     iptr_main=ptr_main[i];
-                     x[i]=ilu->factors[iptr_main]*S1;
-                   }
-              }
-           } else if (n_block==2) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[2*i];
-                     S2=x[2*i+1];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]<color) {
-                             R1=x[2*k];
-                             R2=x[2*k+1];
-                             S1-=ilu->factors[4*iptr_ik  ]*R1+ilu->factors[4*iptr_ik+2]*R2;
-                             S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
-                          }
-                     }
-                     iptr_main=ptr_main[i];
-                     x[2*i  ]=ilu->factors[4*iptr_main  ]*S1+ilu->factors[4*iptr_main+2]*S2;
-                     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
-                   }
+    /* copy x into b */
+#pragma omp parallel for private(i) schedule(static)
+    for (i=0;i<n*n_block;++i)
+        x[i]=b[i];
 
-              }
-           } else if (n_block==3) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[3*i];
-                     S2=x[3*i+1];
-                     S3=x[3*i+2];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]<color) {
-                             R1=x[3*k];
-                             R2=x[3*k+1];
-                             R3=x[3*k+2];
-                             S1-=ilu->factors[9*iptr_ik  ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
-                             S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
-                             S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
-                          }
-                     }
-                     iptr_main=ptr_main[i];
-                     x[3*i  ]=ilu->factors[9*iptr_main  ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
-                     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
-                     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
-                 }
-              }
-           }
-     }
-     /* backward substitution */
-     for (color=(num_colors)-1;color>-1;--color) {
-           if (n_block==1) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[i];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]>color) {
-                             R1=x[k];
-                             S1-=ilu->factors[iptr_ik]*R1;
-                          }
-                     }
-                     x[i]=S1;
-                   }
-              }
-           } else if (n_block==2) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[2*i];
-                     S2=x[2*i+1];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]>color) {
-                             R1=x[2*k];
-                             R2=x[2*k+1];
-                             S1-=ilu->factors[4*iptr_ik  ]*R1+ilu->factors[4*iptr_ik+2]*R2;
-                             S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
-                          }
-                     }
-                     x[2*i]=S1;
-                     x[2*i+1]=S2;
-                   }
-              }
-           } else if (n_block==3) {
-              #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
-              for (i = 0; i < n; ++i) {
-                   if (colorOf[i]==color) {
-                     /* x_i=x_i-a_ik*x_k */
-                     S1=x[3*i  ];
-                     S2=x[3*i+1];
-                     S3=x[3*i+2];
-                     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
-                          k=A->pattern->index[iptr_ik];
-                          if (colorOf[k]>color) {
-                             R1=x[3*k];
-                             R2=x[3*k+1];
-                             R3=x[3*k+2];
-                             S1-=ilu->factors[9*iptr_ik  ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
-                             S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
-                             S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
-                          }
-                     }
-                     x[3*i]=S1;
-                     x[3*i+1]=S2;
-                     x[3*i+2]=S3;
-                   }
-              }
-         }
-         #pragma omp barrier
-     }
+    /* forward substitution */
+    for (color=0;color<num_colors;++color) {
+        if (n_block==1) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[i];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]<color) {
+                            R1=x[k];
+                            S1-=ilu->factors[iptr_ik]*R1;
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    x[i]=ilu->factors[iptr_main]*S1;
+                }
+            }
+        } else if (n_block==2) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[2*i];
+                    S2=x[2*i+1];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]<color) {
+                            R1=x[2*k];
+                            R2=x[2*k+1];
+                            S1-=ilu->factors[4*iptr_ik  ]*R1+ilu->factors[4*iptr_ik+2]*R2;
+                            S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    x[2*i  ]=ilu->factors[4*iptr_main  ]*S1+ilu->factors[4*iptr_main+2]*S2;
+                    x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
+                }
+            }
+        } else if (n_block==3) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[3*i];
+                    S2=x[3*i+1];
+                    S3=x[3*i+2];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]<color) {
+                            R1=x[3*k];
+                            R2=x[3*k+1];
+                            R3=x[3*k+2];
+                            S1-=ilu->factors[9*iptr_ik  ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
+                            S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
+                            S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
+                        }
+                    }
+                    iptr_main=ptr_main[i];
+                    x[3*i  ]=ilu->factors[9*iptr_main  ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
+                    x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
+                    x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
+                }
+            }
+        }
+    }
+    /* backward substitution */
+    for (color=num_colors-1; color>-1; --color) {
+        if (n_block==1) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[i];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]>color) {
+                            R1=x[k];
+                            S1-=ilu->factors[iptr_ik]*R1;
+                        }
+                    }
+                    x[i]=S1;
+                }
+            }
+        } else if (n_block==2) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[2*i];
+                    S2=x[2*i+1];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]>color) {
+                            R1=x[2*k];
+                            R2=x[2*k+1];
+                            S1-=ilu->factors[4*iptr_ik  ]*R1+ilu->factors[4*iptr_ik+2]*R2;
+                            S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
+                        }
+                    }
+                    x[2*i]=S1;
+                    x[2*i+1]=S2;
+                }
+            }
+        } else if (n_block==3) {
+#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
+            for (i = 0; i < n; ++i) {
+                if (colorOf[i]==color) {
+                    /* x_i=x_i-a_ik*x_k */
+                    S1=x[3*i  ];
+                    S2=x[3*i+1];
+                    S3=x[3*i+2];
+                    for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
+                        k=A->pattern->index[iptr_ik];
+                        if (colorOf[k]>color) {
+                            R1=x[3*k];
+                            R2=x[3*k+1];
+                            R3=x[3*k+2];
+                            S1-=ilu->factors[9*iptr_ik  ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
+                            S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
+                            S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
+                        }
+                    }
+                    x[3*i]=S1;
+                    x[3*i+1]=S2;
+                    x[3*i+2]=S3;
+                }
+            }
+        }
+#pragma omp barrier
+    }
 }
 
 } // namespace paso
diff --git a/paso/src/LocalAMG.cpp b/paso/src/LocalAMG.cpp
index 17c1989..a576efc 100644
--- a/paso/src/LocalAMG.cpp
+++ b/paso/src/LocalAMG.cpp
@@ -25,17 +25,18 @@
 
 /****************************************************************************/
 
-#define SHOW_TIMING false
-#define USE_TRANSPOSE true
-#define SMALL_PANEL true
-
-#include <iostream>
 #include "Paso.h"
-#include "Preconditioner.h"
 #include "Options.h"
 #include "PasoUtil.h"
-#include "UMFPACK.h"
+#include "Preconditioner.h"
 #include "MKL.h"
+#include "UMFPACK.h"
+
+#include <iostream>
+
+#define SHOW_TIMING false
+#define USE_TRANSPOSE true
+#define SMALL_PANEL true
 
 namespace paso {
 
@@ -146,7 +147,7 @@ Preconditioner_LocalAMG* Preconditioner_LocalAMG_alloc(SparseMatrix_ptr A_p,
     /*
          set splitting of unknowns:
     */
-    time0=Esys_timer();
+    time0=escript::gettime();
     if (n_block>1) {
         Preconditioner_LocalAMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau);
     } else {
@@ -158,169 +159,149 @@ Preconditioner_LocalAMG* Preconditioner_LocalAMG_alloc(SparseMatrix_ptr A_p,
     /* in BoomerAMG if interpolation is used FF connectivity is required: */
     if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
         Preconditioner_LocalAMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
-    options->coarsening_selection_time=Esys_timer()-time0 + std::max(0., options->coarsening_selection_time);
+    options->coarsening_selection_time=escript::gettime()-time0 + std::max(0., options->coarsening_selection_time);
 
-    if (Esys_noError()) {
-        #pragma omp parallel for private(i) schedule(static)
-        for (i = 0; i < n; ++i) F_marker[i]=((F_marker[i] ==  PASO_AMG_IN_F) ? PASO_AMG_IN_C : PASO_AMG_IN_F);
-
-        /*
-           count number of unknowns to be eliminated:
-        */
-        n_F=util::cumsum_maskedTrue(n,counter, (int*)F_marker);
-        n_C=n-n_F;
-        if (verbose)
-            std::cout << "Preconditioner: AMG level " << level << ": "
-                << n_F << " unknowns are flagged for elimination. "
-                << n-n_F << " left." << std::endl;
-
-        if ( n_F == 0 ) {  /* This is a nasty case. A direct solver should be used, return NULL */
-           out = NULL;
-        } else {
-           out = new Preconditioner_LocalAMG;
-           out->level = level;
-           out->post_sweeps = options->post_sweeps;
-           out->pre_sweeps  = options->pre_sweeps;
-           out->r = NULL;
-           out->x_C = NULL;
-           out->b_C = NULL;
-           out->AMG_C = NULL;
-           out->Smoother=NULL;
-           mask_C=new index_t[n];
-           rows_in_F=new index_t[n_F];
-           if (Esys_noError()) {
-                out->Smoother = Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
-
-                if (n_C != 0) {
-                    /* if nothing has been removed we have a diagonal
-                     * dominant matrix and we just run a few steps of
-                     * the smoother */
-                    /* allocate helpers :*/
-                    out->x_C = new double[n_block*n_C];
-                    out->b_C = new double[n_block*n_C];
-                    out->r   = new double[n_block*n];
-
-                    if (Esys_noError()) {
-                       /* creates index for F */
-                       #pragma omp parallel private(i)
-                       {
-                          #pragma omp for schedule(static)
-                          for (i = 0; i < n; ++i) {
-                             if (F_marker[i])
-                                 rows_in_F[counter[i]]=i;
-                          }
-                       }
-                       // create mask of C nodes with value >-1 gives new id
-                       i=util::cumsum_maskedFalse(n, counter, (int*)F_marker);
-
-                       #pragma omp parallel for private(i) schedule(static)
-                       for (i = 0; i < n; ++i) {
-                          if  (F_marker[i]) {
-                             mask_C[i]=-1;
-                          } else {
-                             mask_C[i]=counter[i];;
-                          }
-                       }
-                       /*
-                          get Prolongation :
-                       */
-                       time0=Esys_timer();
-                       out->P=Preconditioner_LocalAMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
-                       if (SHOW_TIMING)
-                           std::cout << "timing: level " << level <<
-                               ": getProlongation: " << Esys_timer()-time0
-                               << std::endl;
-                    }
-                    /*
-                       construct Restriction operator as transposed of Prolongation operator:
-                    */
-                    if (Esys_noError()) {
-                       time0=Esys_timer();
-                       out->R = out->P->getTranspose();
-                       if (SHOW_TIMING)
-                           std::cout << "timing: level " << level
-                               << ": SparseMatrix::getTranspose: "
-                               << Esys_timer()-time0 << std::endl;
-                    }
-                    /*
-                    construct coarse level matrix:
-                    */
-                    if (Esys_noError()) {
-                       SparseMatrix_ptr Atemp;
-                       time0=Esys_timer();
-                       if (USE_TRANSPOSE)
-                         Atemp = SparseMatrix_MatrixMatrixTranspose(A_p,out->P,out->R);
-                       else
-                         Atemp = SparseMatrix_MatrixMatrix(A_p,out->P);
-                       A_C=SparseMatrix_MatrixMatrix(out->R, Atemp);
-                       if (SHOW_TIMING)
-                           std::cout << "timing: level " << level
-                               << ": construct coarse matrix: "
-                               << Esys_timer()-time0 << std::endl;
-                    }
+    #pragma omp parallel for private(i) schedule(static)
+    for (i = 0; i < n; ++i) F_marker[i]=((F_marker[i] ==  PASO_AMG_IN_F) ? PASO_AMG_IN_C : PASO_AMG_IN_F);
 
-                    /*
-                       construct coarser level:
-                    */
-                    if ( Esys_noError()) {
-                       out->AMG_C=Preconditioner_LocalAMG_alloc(A_C,level+1,options);
-                    }
-                    if ( Esys_noError()) {
-                        if ( out->AMG_C == NULL ) {
-                          out->reordering = options->reordering;
-                          out->refinements = options->coarse_matrix_refinements;
-                          // no coarse level matrix has been constructed.
-                          // Use direct solver
-#ifdef MKL
-                          out->A_C = A_C->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1);
-                          A_C.reset();
-                          out->A_C->solver_package = PASO_MKL;
-                          if (verbose)
-                              std::cout << "Preconditioner: AMG: use MKL "
-                                  << "direct solver on the coarsest level "
-                                  << "(number of unknowns = "
-                                  << n_C*n_block << ")." << std::endl;
-#elif defined USE_UMFPACK
-                          out->A_C = A_C->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC);
-                          A_C.reset();
-                          out->A_C->solver_package = PASO_UMFPACK;
-                          if (verbose)
-                              std::cout << "Preconditioner: AMG: use "
-                                  << "UMFPACK direct solver on the "
-                                  << "coarsest level (number of unknowns = "
-                                  << n_C*n_block << ")." << std::endl;
+    /*
+       count number of unknowns to be eliminated:
+    */
+    n_F=util::cumsum_maskedTrue(n,counter, (int*)F_marker);
+    n_C=n-n_F;
+    if (verbose)
+        std::cout << "Preconditioner: AMG level " << level << ": "
+            << n_F << " unknowns are flagged for elimination. "
+            << n-n_F << " left." << std::endl;
+
+    if ( n_F == 0 ) {  /* This is a nasty case. A direct solver should be used, return NULL */
+        out = NULL;
+    } else {
+        out = new Preconditioner_LocalAMG;
+        out->level = level;
+        out->post_sweeps = options->post_sweeps;
+        out->pre_sweeps  = options->pre_sweeps;
+        out->r = NULL;
+        out->x_C = NULL;
+        out->b_C = NULL;
+        out->AMG_C = NULL;
+        out->Smoother=NULL;
+        mask_C=new index_t[n];
+        rows_in_F=new index_t[n_F];
+        out->Smoother = Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
+
+        if (n_C != 0) {
+            /* if nothing has been removed we have a diagonal
+             * dominant matrix and we just run a few steps of
+             * the smoother */
+            /* allocate helpers :*/
+            out->x_C = new double[n_block*n_C];
+            out->b_C = new double[n_block*n_C];
+            out->r   = new double[n_block*n];
+
+            /* creates index for F */
+#pragma omp parallel private(i)
+            {
+#pragma omp for schedule(static)
+                for (i = 0; i < n; ++i) {
+                    if (F_marker[i])
+                        rows_in_F[counter[i]]=i;
+                }
+            }
+            // create mask of C nodes with value >-1 gives new id
+            i=util::cumsum_maskedFalse(n, counter, (int*)F_marker);
+
+#pragma omp parallel for private(i) schedule(static)
+            for (i = 0; i < n; ++i) {
+                if (F_marker[i]) {
+                    mask_C[i]=-1;
+                } else {
+                    mask_C[i]=counter[i];;
+                }
+            }
+            /*
+              get Prolongation :
+            */
+            time0=escript::gettime();
+            out->P=Preconditioner_LocalAMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
+            if (SHOW_TIMING)
+                std::cout << "timing: level " << level <<
+                   ": getProlongation: " << escript::gettime()-time0
+                   << std::endl;
+            /*
+               construct Restriction operator as transposed of Prolongation operator:
+            */
+            time0=escript::gettime();
+            out->R = out->P->getTranspose();
+            if (SHOW_TIMING)
+                std::cout << "timing: level " << level
+                    << ": SparseMatrix::getTranspose: "
+                    << escript::gettime()-time0 << std::endl;
+            /*
+            construct coarse level matrix:
+            */
+            SparseMatrix_ptr Atemp;
+            time0=escript::gettime();
+            if (USE_TRANSPOSE)
+                Atemp = SparseMatrix_MatrixMatrixTranspose(A_p,out->P,out->R);
+            else
+                Atemp = SparseMatrix_MatrixMatrix(A_p,out->P);
+            A_C=SparseMatrix_MatrixMatrix(out->R, Atemp);
+            if (SHOW_TIMING)
+                std::cout << "timing: level " << level
+                       << ": construct coarse matrix: "
+                       << escript::gettime()-time0 << std::endl;
+
+            /*
+               construct coarser level:
+            */
+            out->AMG_C=Preconditioner_LocalAMG_alloc(A_C,level+1,options);
+            if ( out->AMG_C == NULL ) {
+                out->reordering = options->reordering;
+                out->refinements = options->coarse_matrix_refinements;
+                // no coarse level matrix has been constructed.
+                // Use direct solver
+#ifdef ESYS_HAVE_MKL
+                out->A_C = A_C->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1);
+                A_C.reset();
+                out->A_C->solver_package = PASO_MKL;
+                if (verbose)
+                    std::cout << "Preconditioner: AMG: use MKL "
+                      << "direct solver on the coarsest level "
+                      << "(number of unknowns = "
+                      << n_C*n_block << ")." << std::endl;
+#elif defined ESYS_HAVE_UMFPACK
+                out->A_C = A_C->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC);
+                A_C.reset();
+                out->A_C->solver_package = PASO_UMFPACK;
+                if (verbose)
+                    std::cout << "Preconditioner: AMG: use "
+                      << "UMFPACK direct solver on the "
+                      << "coarsest level (number of unknowns = "
+                      << n_C*n_block << ")." << std::endl;
 #else
-                          out->A_C = A_C;
-                          out->A_C->solver_p = Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
-                          out->A_C->solver_package = PASO_SMOOTHER;
-                          if (verbose)
-                              std::cout << "Preconditioner: AMG: use "
-                                  << "smoother on the coarsest level "
-                                  << "(number of unknowns = "
-                                  << n_C*n_block << ")." << std::endl;
+                out->A_C = A_C;
+                out->A_C->solver_p = Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
+                out->A_C->solver_package = PASO_SMOOTHER;
+                if (verbose)
+                    std::cout << "Preconditioner: AMG: use "
+                      << "smoother on the coarsest level "
+                      << "(number of unknowns = "
+                      << n_C*n_block << ")." << std::endl;
 #endif
-                        } else {
-                            // finally we set some helpers for the solver step
-                            out->A_C = A_C;
-                        }
-                    }
-                }
+            } else {
+                // finally we set some helpers for the solver step
+                out->A_C = A_C;
             }
-            delete[] mask_C;
-            delete[] rows_in_F;
         }
+        delete[] mask_C;
+        delete[] rows_in_F;
     }
     delete[] counter;
     delete[] F_marker;
     delete[] degree_S;
     delete[] S;
-
-    if (Esys_noError()) {
-        return out;
-    } else  {
-        Preconditioner_LocalAMG_free(out);
-        return NULL;
-    }
+    return out;
 }
 
 
@@ -334,22 +315,22 @@ void Preconditioner_LocalAMG_solve(SparseMatrix_ptr A,
     const dim_t pre_sweeps=amg->pre_sweeps;
 
     // presmoothing
-    time0=Esys_timer();
+    time0=escript::gettime();
     Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, false);
-    time0=Esys_timer()-time0;
+    time0=escript::gettime()-time0;
     if (SHOW_TIMING)
         std::cout << "timing: level " << amg->level << ": Presmoothing: "
              << time0 << std::endl;;
     // end of presmoothing
 
-    time0=Esys_timer();
+    time0=escript::gettime();
     util::copy(n, amg->r, b);                            /*  r <- b */
     SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
     SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
-    time0=Esys_timer()-time0;
+    time0=escript::gettime()-time0;
     /* coarse level solve */
     if (amg->AMG_C == NULL) {
-            time0=Esys_timer();
+            time0=escript::gettime();
             /*  A_C is the coarsest level */
             switch (amg->A_C->solver_package) {
                case (PASO_MKL):
@@ -364,19 +345,19 @@ void Preconditioner_LocalAMG_solve(SparseMatrix_ptr A,
             }
             if (SHOW_TIMING)
                 std::cout << "timing: level " << amg->level
-                    << ": DIRECT SOLVER: " << Esys_timer()-time0 << std::endl;
+                    << ": DIRECT SOLVER: " << escript::gettime()-time0 << std::endl;
     } else {
             Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
     }
-    time0=time0+Esys_timer();
+    time0=time0+escript::gettime();
     SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
 
     /*postsmoothing*/
 
     /*solve Ax=b with initial guess x */
-    time0=Esys_timer();
+    time0=escript::gettime();
     Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, true);
-    time0=Esys_timer()-time0;
+    time0=escript::gettime()-time0;
     if (SHOW_TIMING)
          std::cout << "timing: level " << amg->level << ": Postsmoothing: "
              << time0 << std::endl;
@@ -401,11 +382,11 @@ void Preconditioner_LocalAMG_setStrongConnections(SparseMatrix_ptr A,
 #pragma omp parallel for private(i,iptr) schedule(static)
     for (i=0;i<n;++i) {
         double max_offdiagonal = 0.;
-        register double sum_row=0;
-        register double main_row=0;
+        double sum_row=0;
+        double main_row=0;
         #pragma ivdep
         for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-            register index_t j=A->pattern->index[iptr];
+            index_t j=A->pattern->index[iptr];
             const double fnorm=std::abs(A->val[iptr]);
 
             if(j != i) {
@@ -417,11 +398,11 @@ void Preconditioner_LocalAMG_setStrongConnections(SparseMatrix_ptr A,
         }
         {
             const double threshold = theta*max_offdiagonal;
-            register dim_t kdeg=0;
+            dim_t kdeg=0;
             if (tau*main_row < sum_row) { /* no diagonal dominance */
                 #pragma ivdep
                 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-                    register index_t j=A->pattern->index[iptr];
+                    index_t j=A->pattern->index[iptr];
                     if (std::abs(A->val[iptr])>threshold && i!=j) {
                         S[A->pattern->ptr[i]+kdeg] = j;
                         kdeg++;
@@ -463,11 +444,11 @@ void Preconditioner_LocalAMG_setStrongConnections_Block(SparseMatrix_ptr A,
          for (i=0;i<n;++i) {
 
             double max_offdiagonal = 0.;
-            register double sum_row=0;
-            register double main_row=0;
+            double sum_row=0;
+            double main_row=0;
 
             for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-               register index_t j=A->pattern->index[iptr];
+               index_t j=A->pattern->index[iptr];
                double fnorm=0;
                #pragma ivdep
                for(bi=0;bi<n_block*n_block;++bi) {
@@ -486,11 +467,11 @@ void Preconditioner_LocalAMG_setStrongConnections_Block(SparseMatrix_ptr A,
             }
             {
                const double threshold = theta*max_offdiagonal;
-               register dim_t kdeg=0;
+               dim_t kdeg=0;
                if (tau*main_row < sum_row) { /* no diagonal dominance */
                   #pragma ivdep
                   for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
-                     register index_t j=A->pattern->index[iptr];
+                     index_t j=A->pattern->index[iptr];
                      if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
                         S[A->pattern->ptr[i]+kdeg] = j;
                         kdeg++;
@@ -512,7 +493,7 @@ void Preconditioner_LocalAMG_RungeStuebenSearch(dim_t n,
     bool* notInPanel=NULL;
     index_t *lambda=NULL, *ST=NULL, *panel=NULL, lambda_max, lambda_k;
     dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
-    register index_t j, itmp;
+    index_t j, itmp;
 
     // make sure that the return of util::arg_max is not pointing to nirvana
     if (n<=0)
@@ -528,160 +509,99 @@ void Preconditioner_LocalAMG_RungeStuebenSearch(dim_t n,
         panel=new index_t[n];
     }
 
-    if (Esys_noError() ) {
-        /* initialize split_marker: */
-        /* Those unknowns which are not influenced go into F, the rest is available for F or C */
-        #pragma omp parallel for private(i) schedule(static)
-        for (i=0;i<n;++i) {
-            degree_ST[i]=0;
-            if (degree_S[i]>0) {
-                lambda[i]=0;
-                split_marker[i]=PASO_AMG_UNDECIDED;
-            } else {
-                split_marker[i]=PASO_AMG_IN_F;
-                lambda[i]=-1;
-            }
+    /* initialize split_marker: */
+    /* Those unknowns which are not influenced go into F, the rest is available for F or C */
+    #pragma omp parallel for private(i) schedule(static)
+    for (i=0;i<n;++i) {
+        degree_ST[i]=0;
+        if (degree_S[i]>0) {
+            lambda[i]=0;
+            split_marker[i]=PASO_AMG_UNDECIDED;
+        } else {
+            split_marker[i]=PASO_AMG_IN_F;
+            lambda[i]=-1;
         }
-        /* create transpose :*/
-        for (i=0;i<n;++i) {
-            for (p=0; p<degree_S[i]; ++p) {
-               j=S[offset_S[i]+p];
-               ST[offset_S[j]+degree_ST[j]]=i;
-               degree_ST[j]++;
-            }
+    }
+    /* create transpose :*/
+    for (i=0;i<n;++i) {
+        for (p=0; p<degree_S[i]; ++p) {
+           j=S[offset_S[i]+p];
+           ST[offset_S[j]+degree_ST[j]]=i;
+           degree_ST[j]++;
         }
+    }
 
-        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
-        #pragma omp parallel for private(i, j, p, itmp) schedule(static)
-        for (i=0;i<n;++i) {
-            if (split_marker[i]==PASO_AMG_UNDECIDED) {
-                itmp=lambda[i];
-                for (p=0; p<degree_ST[i]; ++p) {
-                    j=ST[offset_S[i]+p];
-                    if (split_marker[j]==PASO_AMG_UNDECIDED) {
-                        itmp++;
-                    } else {  /* at this point there are no C points */
-                        itmp+=2;
-                    }
+    /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
+    #pragma omp parallel for private(i, j, p, itmp) schedule(static)
+    for (i=0;i<n;++i) {
+        if (split_marker[i]==PASO_AMG_UNDECIDED) {
+            itmp=lambda[i];
+            for (p=0; p<degree_ST[i]; ++p) {
+                j=ST[offset_S[i]+p];
+                if (split_marker[j]==PASO_AMG_UNDECIDED) {
+                    itmp++;
+                } else {  /* at this point there are no C points */
+                    itmp+=2;
                 }
-                lambda[i]=itmp;
             }
+            lambda[i]=itmp;
         }
-        if (usePanel && !SMALL_PANEL) {
-            #pragma omp parallel for private(i) schedule(static)
-            for (i=0;i<n;++i)
-                notInPanel[i]=true;
-        }
-
-        // start search
-        i=util::arg_max(n,lambda);
-        while (lambda[i] > -1) { // is there any undecided unknown?
-            if (SMALL_PANEL) {
-                do {
-                    len_panel=0;
-                    // the unknown i is moved to C
-                    split_marker[i]=PASO_AMG_IN_C;
-                    lambda[i]=-1; // lambda from unavailable unknowns is set to -1
-
-                    // all undecided unknowns strongly coupled to i are moved to F
-                    for (p=0; p<degree_ST[i]; ++p) {
-                        j=ST[offset_S[i]+p];
-
-                        if (split_marker[j]==PASO_AMG_UNDECIDED) {
-                            split_marker[j]=PASO_AMG_IN_F;
-                            lambda[j]=-1;
-                            for (q=0; q<degree_S[j]; ++q) {
-                                k=S[offset_S[j]+q];
-                                if (split_marker[k]==PASO_AMG_UNDECIDED) {
-                                    lambda[k]++;
-                                    panel[len_panel]=k;
-                                    len_panel++;
-                                }
-                            }
-                        }
-                    }
-                    for (p=0; p<degree_S[i]; ++p) {
-                        j=S[offset_S[i]+p];
-                        if (split_marker[j]==PASO_AMG_UNDECIDED) {
-                            lambda[j]--;
-                            panel[len_panel]=j;
-                            len_panel++;
-                        }
-                    }
+    }
+    if (usePanel && !SMALL_PANEL) {
+        #pragma omp parallel for private(i) schedule(static)
+        for (i=0;i<n;++i)
+            notInPanel[i]=true;
+    }
 
-                    lambda_max=-1;
-                    for (q=0; q<len_panel; q++) {
-                        k = panel[q];
-                        j = lambda[k];
-                        if (lambda_max < j) {
-                            lambda_max = j;
-                            i = k;
-                        }
-                    }
-                } while (len_panel>0);
-            } else if (usePanel) {
+    // start search
+    i=util::arg_max(n,lambda);
+    while (lambda[i] > -1) { // is there any undecided unknown?
+        if (SMALL_PANEL) {
+            do {
                 len_panel=0;
-                do {
-                    // the unknown i is moved to C
-                    split_marker[i]=PASO_AMG_IN_C;
-                    lambda[i]=-1; // lambda from unavailable unknowns is set to -1
-
-                    // all undecided unknowns strongly coupled to i are moved to F
-                    for (p=0; p<degree_ST[i]; ++p) {
-                        j=ST[offset_S[i]+p];
-                        if (split_marker[j]==PASO_AMG_UNDECIDED) {
-                            split_marker[j]=PASO_AMG_IN_F;
-                            lambda[j]=-1;
-                            for (q=0; q<degree_S[j]; ++q) {
-                                k=S[offset_S[j]+q];
-                                if (split_marker[k]==PASO_AMG_UNDECIDED) {
-                                    lambda[k]++;
-                                    if (notInPanel[k]) {
-                                        notInPanel[k]=false;
-                                        panel[len_panel]=k;
-                                        len_panel++;
-                                    }
-                                } // the unknown i is moved to C
-                                split_marker[i]=PASO_AMG_IN_C;
-                                lambda[i]=-1; // lambda from unavailable unknowns is set to -1
-                            }
-                        }
-                    }
-                    for (p=0; p<degree_S[i]; ++p) {
-                        j=S[offset_S[i]+p];
-                        if (split_marker[j]==PASO_AMG_UNDECIDED) {
-                            lambda[j]--;
-                            if (notInPanel[j]) {
-                                notInPanel[j]=false;
-                                panel[len_panel]=j;
+                // the unknown i is moved to C
+                split_marker[i]=PASO_AMG_IN_C;
+                lambda[i]=-1; // lambda from unavailable unknowns is set to -1
+
+                // all undecided unknowns strongly coupled to i are moved to F
+                for (p=0; p<degree_ST[i]; ++p) {
+                    j=ST[offset_S[i]+p];
+
+                    if (split_marker[j]==PASO_AMG_UNDECIDED) {
+                        split_marker[j]=PASO_AMG_IN_F;
+                        lambda[j]=-1;
+                        for (q=0; q<degree_S[j]; ++q) {
+                            k=S[offset_S[j]+q];
+                            if (split_marker[k]==PASO_AMG_UNDECIDED) {
+                                lambda[k]++;
+                                panel[len_panel]=k;
                                 len_panel++;
                             }
                         }
                     }
+                }
+                for (p=0; p<degree_S[i]; ++p) {
+                    j=S[offset_S[i]+p];
+                    if (split_marker[j]==PASO_AMG_UNDECIDED) {
+                        lambda[j]--;
+                        panel[len_panel]=j;
+                        len_panel++;
+                    }
+                }
 
-                    // consolidate panel
-                    // remove lambda[q]=-1
-                    lambda_max=-1;
-                    i=-1;
-                    len_panel_new=0;
-                    for (q=0; q<len_panel; q++) {
-                        k=panel[q];
-                        lambda_k=lambda[k];
-                        if (split_marker[k]==PASO_AMG_UNDECIDED) {
-                            panel[len_panel_new]=k;
-                            len_panel_new++;
-
-                            if (lambda_max == lambda_k) {
-                                if (k<i) i=k;
-                            } else if (lambda_max < lambda_k) {
-                                lambda_max =lambda_k;
-                                i=k;
-                            }
-                        }
+                lambda_max=-1;
+                for (q=0; q<len_panel; q++) {
+                    k = panel[q];
+                    j = lambda[k];
+                    if (lambda_max < j) {
+                        lambda_max = j;
+                        i = k;
                     }
-                    len_panel=len_panel_new;
-                } while (len_panel>0);
-            } else {
+                }
+            } while (len_panel>0);
+        } else if (usePanel) {
+            len_panel=0;
+            do {
                 // the unknown i is moved to C
                 split_marker[i]=PASO_AMG_IN_C;
                 lambda[i]=-1; // lambda from unavailable unknowns is set to -1
@@ -692,22 +612,81 @@ void Preconditioner_LocalAMG_RungeStuebenSearch(dim_t n,
                     if (split_marker[j]==PASO_AMG_UNDECIDED) {
                         split_marker[j]=PASO_AMG_IN_F;
                         lambda[j]=-1;
-
                         for (q=0; q<degree_S[j]; ++q) {
                             k=S[offset_S[j]+q];
-                            if (split_marker[k]==PASO_AMG_UNDECIDED)
+                            if (split_marker[k]==PASO_AMG_UNDECIDED) {
                                 lambda[k]++;
+                                if (notInPanel[k]) {
+                                    notInPanel[k]=false;
+                                    panel[len_panel]=k;
+                                    len_panel++;
+                                }
+                            } // the unknown i is moved to C
+                            split_marker[i]=PASO_AMG_IN_C;
+                            lambda[i]=-1; // lambda from unavailable unknowns is set to -1
                         }
                     }
                 }
                 for (p=0; p<degree_S[i]; ++p) {
                     j=S[offset_S[i]+p];
-                    if (split_marker[j]==PASO_AMG_UNDECIDED)
+                    if (split_marker[j]==PASO_AMG_UNDECIDED) {
                         lambda[j]--;
+                        if (notInPanel[j]) {
+                            notInPanel[j]=false;
+                            panel[len_panel]=j;
+                            len_panel++;
+                        }
+                    }
+                }
+
+                // consolidate panel
+                // remove lambda[q]=-1
+                lambda_max=-1;
+                i=-1;
+                len_panel_new=0;
+                for (q=0; q<len_panel; q++) {
+                    k=panel[q];
+                    lambda_k=lambda[k];
+                    if (split_marker[k]==PASO_AMG_UNDECIDED) {
+                        panel[len_panel_new]=k;
+                        len_panel_new++;
+
+                        if (lambda_max == lambda_k) {
+                            if (k<i) i=k;
+                        } else if (lambda_max < lambda_k) {
+                            lambda_max =lambda_k;
+                            i=k;
+                        }
+                    }
+                }
+                len_panel=len_panel_new;
+            } while (len_panel>0);
+        } else {
+            // the unknown i is moved to C
+            split_marker[i]=PASO_AMG_IN_C;
+            lambda[i]=-1; // lambda from unavailable unknowns is set to -1
+
+            // all undecided unknowns strongly coupled to i are moved to F
+            for (p=0; p<degree_ST[i]; ++p) {
+                j=ST[offset_S[i]+p];
+                if (split_marker[j]==PASO_AMG_UNDECIDED) {
+                    split_marker[j]=PASO_AMG_IN_F;
+                    lambda[j]=-1;
+
+                    for (q=0; q<degree_S[j]; ++q) {
+                        k=S[offset_S[j]+q];
+                        if (split_marker[k]==PASO_AMG_UNDECIDED)
+                            lambda[k]++;
+                    }
                 }
             }
-            i=util::arg_max(n,lambda);
+            for (p=0; p<degree_S[i]; ++p) {
+                j=S[offset_S[i]+p];
+                if (split_marker[j]==PASO_AMG_UNDECIDED)
+                    lambda[j]--;
+            }
         }
+        i=util::arg_max(n,lambda);
     }
     delete[] lambda;
     delete[] ST;
@@ -730,15 +709,15 @@ void Preconditioner_LocalAMG_enforceFFConnectivity(dim_t n,
     for (i=0; i<n; ++i) {
         if (split_marker[i]==PASO_AMG_IN_F && degree_S[i]>0) {
             for (p=0; p<degree_S[i]; ++p) {
-                register index_t j=S[offset_S[i]+p];
+                index_t j=S[offset_S[i]+p];
                 if ( (split_marker[j]==PASO_AMG_IN_F)  && (degree_S[j]>0) )  {
                     // i and j are now two F nodes which are strongly connected
                     // is there a C node they share ?
-                    register index_t sharing=-1;
+                    index_t sharing=-1;
                     for (q=0; q<degree_S[i]; ++q) {
                         index_t k=S[offset_S[i]+q];
                         if (split_marker[k]==PASO_AMG_IN_C) {
-                            register index_t* where_k = (index_t*)bsearch(
+                            index_t* where_k = (index_t*)bsearch(
                                     &k, &(S[offset_S[j]]), degree_S[j],
                                     sizeof(index_t), util::comparIndex);
                             if (where_k != NULL) {
diff --git a/paso/src/LocalAMG_Prolongation.cpp b/paso/src/LocalAMG_Prolongation.cpp
index 18be821..4db931a 100644
--- a/paso/src/LocalAMG_Prolongation.cpp
+++ b/paso/src/LocalAMG_Prolongation.cpp
@@ -26,9 +26,10 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "SparseMatrix.h"
+#include "Options.h"
 #include "PasoUtil.h"
 #include "Preconditioner.h"
+#include "SparseMatrix.h"
 
 namespace paso {
 
@@ -101,45 +102,30 @@ SparseMatrix_ptr Preconditioner_LocalAMG_getProlongation(SparseMatrix_ptr A_p,
         }
     }
     Pattern_ptr outpattern;
-    if (Esys_noError()) {
-        outpattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, n, n_C,
-                                           ptr, index));
-    } else {
-        delete[] ptr;
-        delete[] index;
-    }
+    outpattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, n, n_C, ptr, index));
     /* now we need to create a matrix and fill it */
     SparseMatrix_ptr out;
-    if (Esys_noError()) {
-        out.reset(new SparseMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK,
-                    outpattern, n_block, n_block, false));
-    }
+    out.reset(new SparseMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK,
+                               outpattern, n_block, n_block, false));
 
-    if (Esys_noError()) {
-        if ( (interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) || (interpolation_method == PASO_CLASSIC_INTERPOLATION) ) {
-            if (n_block == 1) {
-                Preconditioner_LocalAMG_setClassicProlongation(
-                        out, A_p, offset_S, degree_S, S, counter_C);
-            } else {
-                Preconditioner_LocalAMG_setClassicProlongation_Block(
-                        out, A_p, offset_S, degree_S, S, counter_C);
-            }
+    if ( (interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) || (interpolation_method == PASO_CLASSIC_INTERPOLATION) ) {
+        if (n_block == 1) {
+            Preconditioner_LocalAMG_setClassicProlongation(
+                    out, A_p, offset_S, degree_S, S, counter_C);
         } else {
-            if (n_block == 1) {
-                Preconditioner_LocalAMG_setDirectProlongation(
-                        out, A_p, counter_C);
-            } else {
-                Preconditioner_LocalAMG_setDirectProlongation_Block(
-                        out, A_p, counter_C);
-            }
+            Preconditioner_LocalAMG_setClassicProlongation_Block(
+                    out, A_p, offset_S, degree_S, S, counter_C);
         }
-    }
-    if (Esys_noError()) {
-        return out;
     } else {
-        out.reset();
-        return out;
+        if (n_block == 1) {
+            Preconditioner_LocalAMG_setDirectProlongation(
+                    out, A_p, counter_C);
+        } else {
+            Preconditioner_LocalAMG_setDirectProlongation_Block(
+                    out, A_p, counter_C);
+        }
     }
+    return out;
 }
 
 /*
@@ -162,8 +148,8 @@ void Preconditioner_LocalAMG_setDirectProlongation(SparseMatrix_ptr P_p,
 {
    dim_t i;
    const dim_t n =A_p->numRows;
-   register double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp;
-   register index_t iPtr, j, offset;
+   double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp;
+   index_t iPtr, j, offset;
    index_t *where_p, *start_p;
 
    #pragma omp parallel for private(A_ii, offset, where_p, start_p, i, alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos,iPtr,j, A_ij , rtmp)  schedule(static)
@@ -246,8 +232,8 @@ void Preconditioner_LocalAMG_setDirectProlongation_Block(SparseMatrix_ptr P_p,
    const dim_t row_block=A_p->row_block_size;
    const dim_t A_block = A_p->block_size;
    double *alpha, *beta, *sum_all_neg, *sum_all_pos, *sum_strong_neg, *sum_strong_pos, *A_ii;
-   register double A_ij, rtmp;
-   register index_t iPtr, j, offset, ib;
+   double A_ij, rtmp;
+   index_t iPtr, j, offset, ib;
    index_t *where_p, *start_p;
 
    #pragma omp parallel private(ib, rtmp, A_ii, offset, where_p, start_p, i, alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos,iPtr,j, A_ij )
@@ -409,7 +395,7 @@ void Preconditioner_LocalAMG_setClassicProlongation(SparseMatrix_ptr P_p,
                         if  (counter_C[j]>=0)  { /* j is an interpolation point : add A_ij into P */
                                const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p,degree_P_i, sizeof(index_t), util::comparIndex);
                                if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_LocalAMG_setClassicProlongation: Interpolation point is missing.");
+                                   throw PasoException("Preconditioner_LocalAMG_setClassicProlongation: Interpolation point is missing.");
                                } else {
                                     const index_t offset = P_p->pattern->ptr[i]+ (index_t)(where_p-start_p);
                                     P_p->val[offset]+=A_ij;
@@ -509,7 +495,7 @@ void Preconditioner_LocalAMG_setClassicProlongation_Block(SparseMatrix_ptr P_p,
                         if  (counter_C[j]>=0)  { /* j is an interpolation point : add A_ij into P */
                                const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p,degree_P_i, sizeof(index_t), util::comparIndex);
                                if (where_p == NULL)  {
-                                       Esys_setError(SYSTEM_ERROR, "Preconditioner_LocalAMG_setClassicProlongation_Block: Interpolation point is missing.");
+                                   throw PasoException("Preconditioner_LocalAMG_setClassicProlongation_Block: Interpolation point is missing.");
                                } else {
                                     const index_t offset = P_p->pattern->ptr[i]+ (index_t)(where_p-start_p);
                                     for (ib=0; ib<row_block; ib++) P_p->val[offset*row_block+ib] +=A_ij[(row_block+1)*ib];
@@ -555,7 +541,7 @@ void Preconditioner_LocalAMG_setClassicProlongation_Block(SparseMatrix_ptr P_p,
                  }
               }  /* i has been processed, now we need to do some rescaling */
               for (ib=0; ib<row_block; ib++) {
-                   register double a2=a[ib];
+                   double a2=a[ib];
                    if (std::abs(a2)>0.) {
                         a2=-1./a2;
                         for (iPtr=P_p->pattern->ptr[i]; iPtr<P_p->pattern->ptr[i + 1]; ++iPtr) {
diff --git a/paso/src/MINRES.cpp b/paso/src/MINRES.cpp
index 23e36a3..d8032d8 100644
--- a/paso/src/MINRES.cpp
+++ b/paso/src/MINRES.cpp
@@ -17,10 +17,9 @@
 
 /* MINRES iterations */
 
-#include "SystemMatrix.h"
-#include "Paso.h"
 #include "Solver.h"
 #include "PasoUtil.h"
+#include "SystemMatrix.h"
 
 namespace paso {
 
@@ -48,21 +47,21 @@ namespace paso {
 *
 *  INFO    (output) INT
 *
-*          = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
-*          = SOLVER_MAXITER_REACHED
-*          = SOLVER_INPUT_ERROR Illegal parameter:
-*          = SOLVER_MEMORY_ERROR :
-*          = SOLVER_NEGATIVE_NORM_ERROR :
+*          = NoError: Successful exit. Iterated approximate solution returned.
+*          = MaxIterReached
+*          = InputError Illegal parameter:
+*          = MemoryError :
+*          = NegativeNormError :
 *
 *  ==============================================================
 */
 
-err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
-                    dim_t* iter, double* tolerance, Performance* pp)
+SolverResult Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
+                           dim_t* iter, double* tolerance, Performance* pp)
 {
     const dim_t maxit = *iter;
     if (maxit <= 0) {
-        return SOLVER_INPUT_ERROR;
+        return InputError;
     }
 
     double delta,gamma=0.,gamma_old=0.,eta=0.,dp0=0., c=1.0,c_old=1.0;
@@ -72,7 +71,7 @@ err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
     dim_t num_iter = 0;
     const dim_t n = A->getTotalNumRows();
     bool convergeFlag=false;
-    err_t status = SOLVER_NO_ERROR;
+    SolverResult status = NoError;
 
     double* ZNEW = new double[n];
     double* Z = new double[n];
@@ -89,8 +88,8 @@ err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
     dp = util::innerProduct(n, R ,Z,A->mpi_info);
     dp0 = dp;
     if (dp < 0) {
-        status = SOLVER_NEGATIVE_NORM_ERROR;
-    } else if (ABS(dp) <= 0) {
+        status = NegativeNormError;
+    } else if (std::abs(dp) <= 0) {
         // happy break down
         convergeFlag = true;
     } else {
@@ -103,12 +102,12 @@ err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
         tol=(*tolerance)*norm_scal;
     }
 
-    while (!convergeFlag && status == SOLVER_NO_ERROR) {
+    while (!convergeFlag && status == NoError) {
         //  z <- z / gamma
         util::scale(n, Z, 1./gamma);
 
         //  Az <- A*z
-        SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, Z, PASO_ZERO, AZ);
+        A->MatrixVector_CSR_OFFSET0(PASO_ONE, Z, PASO_ZERO, AZ);
 
         //  delta <- Az'.z
         delta = util::innerProduct(n, AZ, Z, A->mpi_info);
@@ -128,11 +127,11 @@ err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
 
         dp = util::innerProduct(n, R, ZNEW, A->mpi_info);
         if (dp < 0.) {
-            status = SOLVER_NEGATIVE_NORM_ERROR;
-        } else if (ABS(dp) == 0.) {
+            status = NegativeNormError;
+        } else if (std::abs(dp) == 0.) {
             // happy break down
             convergeFlag = true;
-        } else if (ABS(dp) > 0.e-13 * ABS(dp0)) {
+        } else if (std::abs(dp) > 0.e-13 * std::abs(dp0)) {
             //  gamma <- sqrt(r'*z)
             gamma_old = gamma;
             gamma = sqrt(dp);
@@ -169,12 +168,12 @@ err_t Solver_MINRES(SystemMatrix_ptr A, double* R, double* X,
             eta = - s * eta;
             convergeFlag = rnorm_prec <= tol;
         } else {
-            status = SOLVER_BREAKDOWN;
+            status = Breakdown;
         }
         util::copy(n, Z, ZNEW);
         ++num_iter;
         if (!convergeFlag && num_iter >= maxit)
-            status = SOLVER_MAXITER_REACHED;
+            status = MaxIterReached;
     }
     delete[] Z;
     delete[] ZNEW;
diff --git a/paso/src/MKL.cpp b/paso/src/MKL.cpp
index b7dfa23..4925afb 100644
--- a/paso/src/MKL.cpp
+++ b/paso/src/MKL.cpp
@@ -28,12 +28,13 @@
 
 #include "MKL.h"
 #include "Options.h"
+#include "PasoException.h"
 
 namespace paso {
 
 void MKL_free(SparseMatrix* A)
 {
-#ifdef MKL
+#ifdef ESYS_HAVE_MKL
     if (A && A->solver_p && A->solver_package==PASO_MKL) {
         ES_MKL_INT mtype = MKL_MTYPE_REAL_UNSYM;
         ES_MKL_INT n = A->numRows;
@@ -57,8 +58,7 @@ void MKL_free(SparseMatrix* A)
         delete[] A->solver_p;
         A->solver_p=NULL;
         if (error != MKL_ERROR_NO)
-            Esys_setError(SYSTEM_ERROR,
-                          "memory release in MKL library failed.");
+            throw PasoException("Memory release in MKL library failed.");
     }
 #endif
 }
@@ -66,17 +66,15 @@ void MKL_free(SparseMatrix* A)
 void MKL_solve(SparseMatrix_ptr A, double* out, double* in, index_t reordering,
                dim_t numRefinements, bool verbose)
 {
-#ifdef MKL
+#ifdef ESYS_HAVE_MKL
     if (! (A->type & (MATRIX_FORMAT_OFFSET1 + MATRIX_FORMAT_BLK1)) ) {
-        Esys_setError(TYPE_ERROR, "Paso: MKL requires CSR format with index offset 1 and block size 1.");
-        return;
+        throw PasoException("Paso: MKL requires CSR format with index offset 1 and block size 1.");
     }
 
     // MKL uses 'long long int' in 64-bit version, escript 'long'. Make sure
     // they are compatible
     if (sizeof(ES_MKL_INT) != sizeof(index_t)) {
-        Esys_setError(TYPE_ERROR, "Paso: MKL index type is not compatible with this escript build. Check compile options.");
-        return;
+        throw PasoException("Paso: MKL index type is not compatible with this escript build. Check compile options.");
     }
 
     ES_MKL_INT* ptr = reinterpret_cast<ES_MKL_INT*>(A->pattern->ptr);
@@ -132,14 +130,14 @@ void MKL_solve(SparseMatrix_ptr A, double* out, double* in, index_t reordering,
         A->solver_package = PASO_MKL;
         // symbolic factorization
         phase = MKL_PHASE_SYMBOLIC_FACTORIZATION;
-        time0 = Esys_timer();
+        time0 = escript::gettime();
         ES_PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, A->val,
                    ptr, index, &idum, &nrhs, iparm, &msglvl, in, out, &error);
         if (error != MKL_ERROR_NO) {
              if (verbose)
                  printf("MKL: symbolic factorization failed.\n");
-             Esys_setError(VALUE_ERROR,"symbolic factorization in MKL library failed.");
              MKL_free(A.get());
+             throw PasoException("symbolic factorization in MKL library failed.");
         } else {
             // LDU factorization
             phase = MKL_PHASE_FACTORIZATION;
@@ -148,31 +146,29 @@ void MKL_solve(SparseMatrix_ptr A, double* out, double* in, index_t reordering,
             if (error != MKL_ERROR_NO) {
                 if (verbose)
                     printf("MKL: LDU factorization failed.\n");
-                Esys_setError(ZERO_DIVISION_ERROR, "factorization in MKL library failed. Most likely the matrix is singular.");
                 MKL_free(A.get());
+                throw PasoException("factorization in MKL library failed. Most likely the matrix is singular.");
            }
            if (verbose)
-               printf("MKL: LDU factorization completed (time = %e).\n", Esys_timer()-time0);
+               printf("MKL: LDU factorization completed (time = %e).\n", escript::gettime()-time0);
         }
     }
     // forward backward substitution
-    if (Esys_noError())  {
-        time0 = Esys_timer();
-        phase = MKL_PHASE_SOLVE;
-        ES_PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, A->val,
-                   ptr, index, &idum, &nrhs, iparm, &msglvl, in, out, &error);
-        if (verbose) printf("MKL: solve completed.\n");
-        if (error != MKL_ERROR_NO) {
-            if (verbose)
-                printf("MKL: forward/backward substitution failed.\n");
-            Esys_setError(ZERO_DIVISION_ERROR, "forward/backward substitution in MKL library failed. Most likely the matrix is singular.");
-        } else {
-            if (verbose)
-                printf("MKL: forward/backward substitution completed (time = %e).\n", Esys_timer()-time0);
-        }
+    time0 = escript::gettime();
+    phase = MKL_PHASE_SOLVE;
+    ES_PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, A->val,
+               ptr, index, &idum, &nrhs, iparm, &msglvl, in, out, &error);
+    if (verbose) printf("MKL: solve completed.\n");
+    if (error != MKL_ERROR_NO) {
+        if (verbose)
+            printf("MKL: forward/backward substitution failed.\n");
+        throw PasoException("forward/backward substitution in MKL library failed. Most likely the matrix is singular.");
+    } else {
+        if (verbose)
+            printf("MKL: forward/backward substitution completed (time = %e).\n", escript::gettime()-time0);
     }
 #else
-    Esys_setError(SYSTEM_ERROR, "Paso: MKL is not available.");
+    throw PasoException("Paso: MKL is not available.");
 #endif
 }
 
diff --git a/paso/src/MKL.h b/paso/src/MKL.h
index 1aa49c6..ef36ac6 100644
--- a/paso/src/MKL.h
+++ b/paso/src/MKL.h
@@ -41,7 +41,7 @@ namespace paso {
 #define ES_MKL_INT MKL_INT
 #endif
 
-#ifdef MKL
+#ifdef ESYS_HAVE_MKL
 #include <mkl_pardiso.h>
 #endif
 
diff --git a/paso/src/MergedSolver.cpp b/paso/src/MergedSolver.cpp
index 8fb69cd..3acd2ef 100644
--- a/paso/src/MergedSolver.cpp
+++ b/paso/src/MergedSolver.cpp
@@ -26,10 +26,11 @@
 /****************************************************************************/
 
 #include "MergedSolver.h"
-#include "Preconditioner.h"
+#include "Options.h"
 #include "PasoUtil.h"
-#include "UMFPACK.h"
+#include "Preconditioner.h"
 #include "MKL.h"
+#include "UMFPACK.h"
 
 namespace paso {
 
@@ -39,7 +40,7 @@ MergedSolver::MergedSolver(const_SystemMatrix_ptr M, const Options* options)
     const index_t size = M->mpi_info->size;
     const dim_t global_n = M->getGlobalNumRows();
     const dim_t n_block = M->mainBlock->row_block_size;
-    const dim_t* dist = M->pattern->input_distribution->first_component;
+    const std::vector<index_t> dist(M->pattern->input_distribution->first_component);
 
     SparseMatrix_ptr M_temp(M->mergeSystemMatrix());
 
@@ -64,10 +65,10 @@ MergedSolver::MergedSolver(const_SystemMatrix_ptr M, const Options* options)
     }
 
     if (rank == 0) {
-#ifdef MKL
+#ifdef ESYS_HAVE_MKL
         A = M_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1);
         A->solver_package = PASO_MKL;
-#elif defined USE_UMFPACK
+#elif defined ESYS_HAVE_UMFPACK
         A = M_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC);
         A->solver_package = PASO_UMFPACK;
 #else
diff --git a/paso/src/MergedSolver.h b/paso/src/MergedSolver.h
index 55cdc57..4978b22 100644
--- a/paso/src/MergedSolver.h
+++ b/paso/src/MergedSolver.h
@@ -28,6 +28,7 @@
 #ifndef __PASO_MERGEDSOLVER_H__
 #define __PASO_MERGEDSOLVER_H__
 
+#include "Paso.h"
 #include "SystemMatrix.h"
 
 namespace paso {
@@ -39,7 +40,7 @@ struct MergedSolver
 
     void solve(double* local_x, const double* local_b);
 
-    esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
     SparseMatrix_ptr A;
     double* x;
     double* b;
diff --git a/paso/src/NewtonGMRES.cpp b/paso/src/NewtonGMRES.cpp
index c91aad0..fdc2b5b 100644
--- a/paso/src/NewtonGMRES.cpp
+++ b/paso/src/NewtonGMRES.cpp
@@ -31,21 +31,24 @@
 *
 */
 
-#include <iostream>
-#include "Solver.h"
+#include "Paso.h"
+#include "Options.h"
 #include "PasoUtil.h"
+#include "Solver.h"
+
+#include <iostream>
 
 namespace paso {
 
-err_t Solver_NewtonGMRES(Function* F, double* x, Options* options,
-                         Performance* pp)
+SolverResult Solver_NewtonGMRES(Function* F, double* x, Options* options,
+                                Performance* pp)
 {
     const double inner_tolerance_safety=.9;
     dim_t gmres_iter;
     double stop_tol, norm2_f,norm2_fo, normsup_f,reduction_f, gmres_tol, rtmp, quad_tolerance;
     bool convergeFlag=false, maxIterFlag=false, breakFlag=false;
     double *f=NULL, *step=NULL;
-    err_t Status=SOLVER_NO_ERROR;
+    SolverResult status=NoError;
     const bool debug = options->verbose;
     const dim_t n = F->getLen();
     dim_t iteration_count = 0;
@@ -80,7 +83,7 @@ err_t Solver_NewtonGMRES(Function* F, double* x, Options* options,
      */
     stop_tol=atol + rtol*normsup_f;
     if (stop_tol<=0) {
-        Status=SOLVER_INPUT_ERROR;
+        status=InputError;
         if (debug)
             std::cout << "NewtonGMRES: zero tolerance given." << std::endl;
     } else {
@@ -115,13 +118,13 @@ err_t Solver_NewtonGMRES(Function* F, double* x, Options* options,
             // call GMRES to get increment
             gmres_iter=lmaxit;
             gmres_tol=inner_tolerance;
-            Status = Solver_GMRES2(F, f, x, step, &gmres_iter, &gmres_tol, pp);
+            status = Solver_GMRES2(F, f, x, step, &gmres_iter, &gmres_tol, pp);
             inner_tolerance=std::max(inner_tolerance, gmres_tol/norm2_f);
             std::cout << "NewtonGMRES: actual rel. inner tolerance = "
                 << inner_tolerance << std::endl;
             iteration_count+=gmres_iter;
-            if ((Status==SOLVER_NO_ERROR) || (Status==SOLVER_MAXITER_REACHED)) {
-                Status=SOLVER_NO_ERROR;
+            if ((status==NoError) || (status==MaxIterReached)) {
+                status=NoError;
                 // update x
                 norm2_fo=norm2_f;
                 util::update(n,1.,x,1.,step);
@@ -161,14 +164,14 @@ err_t Solver_NewtonGMRES(Function* F, double* x, Options* options,
                 std::cout << "NewtonGMRES: maximum number of iteration steps "
                     << maxit << " reached." << std::endl;
         }
-        if (breakFlag) Status = SOLVER_BREAKDOWN;
-        if (maxIterFlag) Status = SOLVER_MAXITER_REACHED;
+        if (breakFlag) status = Breakdown;
+        if (maxIterFlag) status = MaxIterReached;
     }
     delete[] f;
     delete[] step;
     if (debug)
-        std::cout << "NewtonGMRES: STATUS return = " << Status << std::endl;
-    return Status;
+        std::cout << "NewtonGMRES: STATUS return = " << status << std::endl;
+    return status;
 }
 
 } // namespace paso
diff --git a/paso/src/Options.cpp b/paso/src/Options.cpp
index 3cd1053..dd414f4 100644
--- a/paso/src/Options.cpp
+++ b/paso/src/Options.cpp
@@ -14,22 +14,60 @@
 *
 *****************************************************************************/
 
+#include "Paso.h"
+#include "Options.h"
+#include "PasoException.h"
 
-/****************************************************************************/
+#include <escript/SolverOptions.h>
 
-/*   Paso: solver options */
+#include <boost/python/extract.hpp>
+#include <iostream>
+#include <sstream>
 
-/****************************************************************************/
+namespace bp = boost::python;
 
-/*   Copyrights by ACcESS Australia, 2003,2004 */
-/*   author: l.gross at uq.edu.au */
+namespace paso {
 
-/****************************************************************************/
-#include <iostream>
-#include "Paso.h"
-#include "Options.h"
+Options::Options(const bp::object& options)
+{
+    escript::SolverBuddy sb = bp::extract<escript::SolverBuddy>(options);
 
-namespace paso {
+    setDefaults();
+    method = mapEscriptOption(sb.getSolverMethod());
+    package = mapEscriptOption(sb.getPackage());
+    verbose = sb.isVerbose();
+    symmetric = sb.isSymmetric();
+    tolerance = sb.getTolerance();
+    absolute_tolerance = sb.getAbsoluteTolerance();
+    inner_tolerance = sb.getInnerTolerance();
+    adapt_inner_tolerance = sb.adaptInnerTolerance();
+    reordering = mapEscriptOption(sb.getReordering());
+    preconditioner = mapEscriptOption(sb.getPreconditioner());
+    ode_solver = mapEscriptOption(sb.getODESolver());
+    iter_max = sb.getIterMax();
+    inner_iter_max = sb.getInnerIterMax();
+    drop_tolerance = sb.getDropTolerance();
+    drop_storage = sb.getDropStorage();
+    truncation = sb.getTruncation();
+    restart = sb._getRestartForC();
+    sweeps = sb.getNumSweeps();
+    pre_sweeps = sb.getNumPreSweeps();
+    post_sweeps = sb.getNumPostSweeps();
+    level_max = sb.getLevelMax();
+    min_coarse_matrix_size = sb.getMinCoarseMatrixSize();
+    coarsening_threshold = sb.getCoarseningThreshold();
+    accept_failed_convergence = sb.acceptConvergenceFailure();
+    coarsening_method = mapEscriptOption(sb.getCoarsening());
+    smoother = mapEscriptOption(sb.getSmoother());
+    relaxation_factor = sb.getRelaxationFactor();
+    use_local_preconditioner = sb.useLocalPreconditioner();
+    min_coarse_sparsity = sb.getMinCoarseMatrixSparsity();
+    refinements = sb.getNumRefinements();
+    coarse_matrix_refinements = sb.getNumCoarseMatrixRefinements();
+    usePanel = sb.usePanel();
+    interpolation_method = sb.getAMGInterpolation();
+    diagonal_dominance_threshold = sb.getDiagonalDominanceThreshold();
+}
 
 void Options::setDefaults()
 {
@@ -201,10 +239,6 @@ const char* Options::name(int key)
             return "RILU";
        case PASO_DEFAULT_REORDERING:
             return "DEFAULT_REORDERING";
-       case PASO_SUPER_LU:
-            return "SUPER_LU";
-       case PASO_PASTIX:
-            return "PASTIX";
        case PASO_YAIR_SHAPIRA_COARSENING:
             return "YAIR_SHAPIRA_COARSENING";
        case PASO_RUGE_STUEBEN_COARSENING:
@@ -237,7 +271,7 @@ const char* Options::name(int key)
 }
 
 int Options::getSolver(int solver, int pack, bool symmetry,
-                       const esysUtils::JMPI& mpi_info)
+                       const escript::JMPI& mpi_info)
 {
     int out = PASO_DEFAULT;
     // PASO //
@@ -322,13 +356,13 @@ int Options::getSolver(int solver, int pack, bool symmetry,
     } else if (pack==PASO_UMFPACK) {
         out=PASO_DIRECT;
     } else {
-        Esys_setError(VALUE_ERROR, "Options::getSolver: Unidentified package.");
+        throw PasoException("Options::getSolver: Unidentified package.");
     }
     return out;
 }
 
 int Options::getPackage(int solver, int pack, bool symmetry,
-                        const esysUtils::JMPI& mpi_info)
+                        const escript::JMPI& mpi_info)
 {
     int out = PASO_PASO;
 
@@ -337,18 +371,16 @@ int Options::getPackage(int solver, int pack, bool symmetry,
             if (solver == PASO_DIRECT) {
                 // these packages require CSC which is not supported with MPI
                 if (mpi_info->size == 1) {
-#if defined MKL
+#ifdef ESYS_HAVE_MKL
                     out = PASO_MKL;
-#elif defined USE_UMFPACK
+#elif defined ESYS_HAVE_UMFPACK
                     out = PASO_UMFPACK;
-#elif defined PASTIX
-                    out = PASO_PASTIX
 #endif
                 } else{
-#if defined MKL
-                    Esys_setError(VALUE_ERROR, "MKL does not currently support MPI");
-#elif defined USE_UMFPACK
-                    Esys_setError(VALUE_ERROR, "UMFPACK does not currently support MPI");
+#ifdef ESYS_HAVE_MKL
+                    throw PasoException("MKL does not currently support MPI");
+#elif defined ESYS_HAVE_UMFPACK
+                    throw PasoException("UMFPACK does not currently support MPI");
 #endif
                 }
             }
@@ -359,16 +391,143 @@ int Options::getPackage(int solver, int pack, bool symmetry,
 
         case PASO_MKL:
         case PASO_UMFPACK:
-        case PASO_PASTIX:
         case PASO_TRILINOS:
             out = pack;
             break;
 
         default:
-            Esys_setError(VALUE_ERROR, "Options::getPackage: Unidentified package.");
+            throw PasoException("Options::getPackage: Unidentified package.");
     }
     return out;
 }
 
+int Options::mapEscriptOption(int escriptOption)
+{
+    switch (escriptOption) {
+        case escript::SO_DEFAULT:
+            return PASO_DEFAULT;
+
+        case escript::SO_PACKAGE_MKL:
+            return PASO_MKL;
+        case escript::SO_PACKAGE_PASO:
+            return PASO_PASO;
+        case escript::SO_PACKAGE_TRILINOS:
+            return PASO_TRILINOS;
+        case escript::SO_PACKAGE_UMFPACK:
+            return PASO_UMFPACK;
+
+        case escript::SO_METHOD_BICGSTAB:
+            return PASO_BICGSTAB;
+        case escript::SO_METHOD_CGS:
+            return PASO_CGS;
+        case escript::SO_METHOD_CHOLEVSKY:
+            return PASO_CHOLEVSKY;
+        case escript::SO_METHOD_CR:
+            return PASO_CR;
+        case escript::SO_METHOD_DIRECT:
+            return PASO_DIRECT;
+        case escript::SO_METHOD_GMRES:
+            return PASO_GMRES;
+        case escript::SO_METHOD_ITERATIVE:
+            return PASO_ITERATIVE;
+        case escript::SO_METHOD_MINRES:
+            return PASO_MINRES;
+        case escript::SO_METHOD_NONLINEAR_GMRES:
+            return PASO_NONLINEAR_GMRES;
+        case escript::SO_METHOD_PCG:
+            return PASO_PCG;
+        case escript::SO_METHOD_PRES20:
+            return PASO_PRES20;
+        case escript::SO_METHOD_TFQMR:
+            return PASO_TFQMR;
+
+        case escript::SO_PRECONDITIONER_AMG:
+            return PASO_AMG;
+        case escript::SO_PRECONDITIONER_AMLI:
+            return PASO_AMLI;
+        case escript::SO_PRECONDITIONER_BOOMERAMG:
+            return PASO_BOOMERAMG;
+        case escript::SO_PRECONDITIONER_GAUSS_SEIDEL:
+            return PASO_GAUSS_SEIDEL;
+        case escript::SO_PRECONDITIONER_ILU0:
+            return PASO_ILU0;
+        case escript::SO_PRECONDITIONER_ILUT:
+            return PASO_ILUT;
+        case escript::SO_PRECONDITIONER_JACOBI:
+            return PASO_JACOBI;
+        case escript::SO_PRECONDITIONER_NONE:
+            return PASO_NO_PRECONDITIONER;
+        case escript::SO_PRECONDITIONER_REC_ILU:
+            return PASO_REC_ILU;
+        case escript::SO_PRECONDITIONER_RILU:
+            return PASO_RILU;
+
+        case escript::SO_ODESOLVER_BACKWARD_EULER:         
+            return PASO_BACKWARD_EULER;
+        case escript::SO_ODESOLVER_CRANK_NICOLSON:
+            return PASO_CRANK_NICOLSON;
+        case escript::SO_ODESOLVER_LINEAR_CRANK_NICOLSON:
+            return PASO_LINEAR_CRANK_NICOLSON;
+
+        case escript::SO_INTERPOLATION_CLASSIC:
+            return PASO_CLASSIC_INTERPOLATION;
+        case escript::SO_INTERPOLATION_CLASSIC_WITH_FF_COUPLING:
+            return PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING;
+        case escript::SO_INTERPOLATION_DIRECT:
+            return PASO_DIRECT_INTERPOLATION;
+
+        case escript::SO_COARSENING_AGGREGATION:
+            return PASO_AGGREGATION_COARSENING;
+        case escript::SO_COARSENING_CIJP:
+            return PASO_CIJP_COARSENING;
+        case escript::SO_COARSENING_CIJP_FIXED_RANDOM:
+            return PASO_CIJP_FIXED_RANDOM_COARSENING;
+        case escript::SO_COARSENING_FALGOUT:
+            return PASO_FALGOUT_COARSENING;
+        case escript::SO_COARSENING_HMIS:
+            return PASO_HMIS_COARSENING;
+        case escript::SO_COARSENING_PMIS:
+            return PASO_PMIS_COARSENING;
+        case escript::SO_COARSENING_RUGE_STUEBEN:
+            return PASO_RUGE_STUEBEN_COARSENING;
+        case escript::SO_COARSENING_STANDARD:
+            return PASO_STANDARD_COARSENING;   
+        case escript::SO_COARSENING_YAIR_SHAPIRA:
+            return PASO_YAIR_SHAPIRA_COARSENING;
+
+        case escript::SO_REORDERING_DEFAULT:
+            return PASO_DEFAULT_REORDERING;
+        case escript::SO_REORDERING_MINIMUM_FILL_IN:
+            return PASO_MINIMUM_FILL_IN;
+        case escript::SO_REORDERING_NESTED_DISSECTION:
+            return PASO_NESTED_DISSECTION;
+        case escript::SO_REORDERING_NONE:
+            return PASO_NO_REORDERING;
+
+        default:
+            std::stringstream temp;
+            temp << "Error - Cannot map option value "<< escriptOption
+                 << " onto Paso";
+            throw PasoException(temp.str());
+    }
+}
+
+void Options::updateEscriptDiagnostics(bp::object& options) const
+{
+#define SET(__key__,__val__,__type__) options.attr("_updateDiagnostics")(__key__,(__type__)__val__)
+   SET("num_iter", num_iter, int);
+   SET("num_level", num_level, int);
+   SET("num_inner_iter", num_inner_iter, int);
+   SET("time", time, double);
+   SET("set_up_time", set_up_time, double);
+   SET("net_time", net_time, double);
+   SET("residual_norm", residual_norm, double);
+   SET("converged", converged, bool);
+   SET("time_step_backtracking_used", time_step_backtracking_used, bool);
+   SET("coarse_level_sparsity", coarse_level_sparsity, double);
+   SET("num_coarse_unknowns", num_coarse_unknowns, int);
+#undef SET
+}
+
 } // namespace paso
 
diff --git a/paso/src/Options.h b/paso/src/Options.h
index 405f5a7..b5ca1f0 100644
--- a/paso/src/Options.h
+++ b/paso/src/Options.h
@@ -29,7 +29,9 @@
 #ifndef __PASO_OPTIONS_H__
 #define __PASO_OPTIONS_H__
 
-#include "esysUtils/types.h"
+#include "Paso.h"
+
+#include <boost/python/object.hpp>
 
 // valid solver options
 #define PASO_DEFAULT 0
@@ -61,8 +63,6 @@
 #define PASO_GS PASO_GAUSS_SEIDEL
 #define PASO_RILU 29
 #define PASO_DEFAULT_REORDERING 30
-#define PASO_SUPER_LU 31
-#define PASO_PASTIX 32
 #define PASO_YAIR_SHAPIRA_COARSENING 33
 #define PASO_RUGE_STUEBEN_COARSENING 34
 #define PASO_AGGREGATION_COARSENING 35
@@ -86,11 +86,13 @@
 
 namespace paso {
 
-PASO_DLL_API
 struct Options
 {
     Options() { setDefaults(); }
 
+    /// constructor that fills values from an escript SolverBuddy instance
+    Options(const boost::python::object& options);
+
     /// sets the default values for solver options
     void setDefaults();
 
@@ -100,14 +102,20 @@ struct Options
     /// prints diagnostic data
     void showDiagnostics() const;
 
+    /// updates SolverBuddy diagnostics from this
+    void updateEscriptDiagnostics(boost::python::object& options) const;
+
+    /// returns the corresponding paso option code for an escript option code
+    static int mapEscriptOption(int escriptOption);
+
     static const char* name(int key);
 
     static int getPackage(int solver, int package, bool symmetry,
-                          const esysUtils::JMPI& mpi_info);
+                          const escript::JMPI& mpi_info);
 
     /// returns the solver to be used with given combination
     static int getSolver(int solver, int package, bool symmetry,
-                         const esysUtils::JMPI& mpi_info);
+                         const escript::JMPI& mpi_info);
 
     int method;
     int package;
diff --git a/paso/src/PCG.cpp b/paso/src/PCG.cpp
index 165aebe..49c6e90 100644
--- a/paso/src/PCG.cpp
+++ b/paso/src/PCG.cpp
@@ -49,9 +49,8 @@
 *  ==============================================================
 */
 
-#include "SystemMatrix.h"
-#include "Paso.h"
 #include "Solver.h"
+#include "SystemMatrix.h"
 
 namespace paso {
 
@@ -60,14 +59,14 @@ namespace paso {
 #define USE_DYNAMIC_SCHEDULING
 #endif
 
-err_t Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
-                 double* tolerance, Performance* pp)
+SolverResult Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
+                        double* tolerance, Performance* pp)
 {
     dim_t maxit,num_iter_global, len,rest, np, ipp;
-    register double ss,ss1;
+    double ss,ss1;
     dim_t i0, istart, iend;
     bool breakFlag=false, maxIterFlag=false, convergeFlag=false;
-    err_t status = SOLVER_NO_ERROR;
+    SolverResult status = NoError;
     const dim_t n = A->getTotalNumRows();
     double *resid = tolerance;
     double tau_old,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
@@ -75,7 +74,7 @@ err_t Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     double loc_sum[2], sum[2];
 #endif
     double norm_of_residual=0,norm_of_residual_global;
-    register double d;
+    double d;
 
 #ifdef USE_DYNAMIC_SCHEDULING
     dim_t chunk_size=-1;
@@ -89,7 +88,11 @@ err_t Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     n_chunks=n/chunk_size;
     if (n_chunks*chunk_size<n) n_chunks+=1;
 #else
+#ifdef _OPENMP
     np=omp_get_max_threads();
+#else
+    np=1;
+#endif
     len=n/np;
     rest=n-len*np;
 #endif
@@ -217,7 +220,7 @@ err_t Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
         // v = A*p
         Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
         Performance_startMonitor(pp, PERFORMANCE_MVM);
-        SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, p,PASO_ZERO,v);
+        A->MatrixVector_CSR_OFFSET0(PASO_ONE, p, PASO_ZERO, v);
         Performance_stopMonitor(pp, PERFORMANCE_MVM);
         Performance_startMonitor(pp, PERFORMANCE_SOLVER);
 
@@ -348,9 +351,9 @@ err_t Solver_PCG(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     num_iter_global = num_iter;
     norm_of_residual_global = norm_of_residual;
     if (maxIterFlag) {
-        status = SOLVER_MAXITER_REACHED;
+        status = MaxIterReached;
     } else if (breakFlag) {
-        status = SOLVER_BREAKDOWN;
+        status = Breakdown;
     }
     Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
     delete[] rs;
diff --git a/paso/src/Paso.h b/paso/src/Paso.h
index f907d62..ad8dfab 100644
--- a/paso/src/Paso.h
+++ b/paso/src/Paso.h
@@ -29,32 +29,38 @@
 #ifndef __PASO_H__
 #define __PASO_H__
 
-#include <cfloat>
-#include <esysUtils/error.h>
-#include <esysUtils/Esys_MPI.h>
-#include <esysUtils/index.h>
-#include <esysUtils/maths.h>
+#include <escript/index.h>
+#include <escript/DataTypes.h>
+#include <escript/EsysMPI.h>
 
 #include <boost/enable_shared_from_this.hpp>
 #include <boost/shared_ptr.hpp>
 
+namespace paso {
+
+// return codes used by the solvers
+enum SolverResult {
+    NoError = 0,
+    MaxIterReached,
+    InputError,
+    MemoryError,
+    Breakdown,
+    NegativeNormError,
+    Divergence
+};
+
+using escript::DataTypes::dim_t;
+using escript::DataTypes::index_t;
+using escript::DataTypes::real_t;
+
+}
+
 #define PASO_DLL_API
-#ifdef _WIN32
-#   ifndef PASO_STATIC_LIB
-#      undef PASO_DLL_API
-#      ifdef PASO_EXPORTS
-#         define PASO_DLL_API __declspec(dllexport)
-#      else
-#         define PASO_DLL_API __declspec(dllimport)
-#      endif
-#   endif
-#endif
 
 #define MATRIX_FORMAT_DEFAULT 1
 #define MATRIX_FORMAT_CSC 2
 #define MATRIX_FORMAT_BLK1 4
 #define MATRIX_FORMAT_OFFSET1 8
-#define MATRIX_FORMAT_TRILINOS_CRS 16
 #define MATRIX_FORMAT_DIAGONAL_BLOCK 32
 
 #define PASO_ONE (double)(1.0)
diff --git a/escriptcore/test/DataAlgorithmAdapterTestCase.h b/paso/src/PasoException.h
similarity index 54%
copy from escriptcore/test/DataAlgorithmAdapterTestCase.h
copy to paso/src/PasoException.h
index 400d723..81882ee 100644
--- a/escriptcore/test/DataAlgorithmAdapterTestCase.h
+++ b/paso/src/PasoException.h
@@ -14,24 +14,32 @@
 *
 *****************************************************************************/
 
+#ifndef __PASO_EXCEPTION_H__
+#define __PASO_EXCEPTION_H__
 
-#if !defined DataAlgorithmAdapterTestCase_20040715_H
-#define DataAlgorithmAdapterTestCase_20040715_H
+#include <escript/EsysException.h>
 
-#include <cppunit/TestFixture.h>
-#include <cppunit/TestSuite.h>
+namespace paso {
 
-#define REL_TOL ((double)1.e-10)
+/**
+  \brief
+  PasoException exception class.
 
-class DataAlgorithmAdapterTestCase : public CppUnit::TestFixture
+  Description:
+  PasoException exception class.
+  The class provides a public function returning the exception name
+*/
+class PasoException : public escript::EsysException
 {
 public:
-  void testAll();
-  void testAlgorithm();
-  void testDpAlgorithm();
-
-  static CppUnit::TestSuite* suite();
+    PasoException(const std::string& str) : EsysException(str) {}
+    virtual ~PasoException() throw() {}
 };
 
-#endif
+void checkPasoError(); 
+
+
+} // end of namespace
+
+#endif // __PASO_EXCEPTION_H__
 
diff --git a/paso/src/PasoUtil.cpp b/paso/src/PasoUtil.cpp
index e23a30e..bfa8f68 100644
--- a/paso/src/PasoUtil.cpp
+++ b/paso/src/PasoUtil.cpp
@@ -14,16 +14,7 @@
 *
 *****************************************************************************/
 
-
-/****************************************************************************/
-
-/*   Some utility routines: */
-
-/****************************************************************************/
-
-/*   Copyrights by ACcESS Australia, 2003,2004,2005 */
-
-/****************************************************************************/
+// Some utility routines
 
 #include "PasoUtil.h"
 
@@ -36,64 +27,54 @@ int comparIndex(const void* index1, const void* index2)
     return *(index_t*)index1 - *(index_t*)index2;
 }
 
-bool isAny(dim_t N, index_t* array, index_t value)
+bool isAny(dim_t N, const index_t* array, index_t value)
 {
-    bool out=false;
-    register bool out2;
-    dim_t i;
-
-    #pragma omp parallel private(i, out2)
-    {
-        out2=false;
-        #pragma omp for schedule(static)
-        for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
-        #pragma omp critical
-        {
-            out = out || out2;
-        }
-        /*  this is how this should look like but gcc 4.4 seems to have
-         *  a problem with this:
-        #pragma omp parallel for private(i) schedule(static) reduction(||:out)
-        for (i=0;i<N;i++) out = out || (array[i]==value);
-        */
-    }
+    bool out = false;
+#pragma omp parallel for reduction(||:out)
+    for (index_t i = 0; i < N; i++)
+        out = out || (array[i]==value);
     return out;
 }
 
-dim_t numPositives(dim_t N, const double *x)
+dim_t numPositives(dim_t N, const double* x, escript::JMPI mpiInfo)
 {
-    dim_t out=0;
-    register dim_t out2;
-    dim_t i;
+    dim_t myOut = 0;
+    dim_t out = 0;
 
-    #pragma omp parallel private(i, out2)
+#pragma omp parallel
     {
-        out2=0;
-        #pragma omp for schedule(static)
-        for (i=0;i<N;i++) {
-            if ( x[i] > 0 ) out2++;
+        dim_t localOut = 0;
+#pragma omp for schedule(static)
+        for (index_t i = 0; i < N; i++) {
+            if (x[i] > 0)
+                localOut++;
         }
-        #pragma omp critical
+#pragma omp critical
         {
-            out = out + out2;
+            myOut = myOut + localOut;
         }
     }
+#ifdef ESYS_MPI
+    MPI_Allreduce(&myOut, &out, 1, MPI_DIM_T, MPI_SUM, mpiInfo->comm);
+#else
+    out = myOut;
+#endif
     return out;
 }
 
 index_t iMax(dim_t N, const index_t* array)
 {
-    index_t out=INDEX_T_MIN;
-    index_t out2;
-    dim_t i;
-    if (N>0) {
-        #pragma omp parallel private(i, out2)
-        {
-            out2=INDEX_T_MIN;
-            #pragma omp for schedule(static)
-            for (i=0;i<N;i++) out2 = std::max(out2, array[i]);
+    const index_t INDEX_T_MIN = escript::DataTypes::index_t_min();
+    index_t out = INDEX_T_MIN;
 
-            #pragma omp critical
+    if (N > 0) {
+#pragma omp parallel
+        {
+            index_t out2 = INDEX_T_MIN;
+#pragma omp for schedule(static)
+            for (index_t i = 0; i < N; i++)
+                out2 = std::max(out2, array[i]);
+#pragma omp critical
             {
                 out = std::max(out, out2);
             }
@@ -104,47 +85,52 @@ index_t iMax(dim_t N, const index_t* array)
 
 index_t cumsum(dim_t N, index_t* array)
 {
-    index_t out=0,tmp;
+    index_t out = 0, tmp;
     dim_t i;
-    index_t *partial_sums=NULL,sum;
-    const int num_threads=omp_get_max_threads();
-    int thread_num;
+#ifdef _OPENMP
+    const int num_threads = omp_get_max_threads();
+#else
+    const int num_threads = 1;
+#endif
 
-    if (num_threads>1) {
-        partial_sums=new index_t[num_threads];
-        #pragma omp parallel private(sum,thread_num ,i,tmp)
+    if (num_threads > 1) {
+#ifdef _OPENMP
+        index_t* partial_sums = new index_t[num_threads];
+#pragma omp parallel private(i,tmp)
         {
-            sum=0;
-            thread_num=omp_get_thread_num();
-            #pragma omp for schedule(static)
-            for (i=0;i<N;++i) sum+=array[i];
+            index_t sum = 0;
+            const int thread_num = omp_get_thread_num();
+#pragma omp for schedule(static)
+            for (i = 0; i < N; ++i)
+                sum += array[i];
 
             partial_sums[thread_num]=sum;
-            #pragma omp barrier
-            #pragma omp master
+#pragma omp barrier
+#pragma omp master
             {
                 out=0;
-                for (i=0;i<num_threads;++i) {
-                    tmp=out;
-                    out+=partial_sums[i];
-                    partial_sums[i]=tmp;
+                for (i = 0; i < num_threads; ++i) {
+                    tmp = out;
+                    out += partial_sums[i];
+                    partial_sums[i] = tmp;
                 }
             }
-            #pragma omp barrier
-            sum=partial_sums[thread_num];
-            #pragma omp for schedule(static)
-            for (i=0;i<N;++i) {
-                tmp=sum;
-                sum+=array[i];
-                array[i]=tmp;
+#pragma omp barrier
+            sum = partial_sums[thread_num];
+#pragma omp for schedule(static)
+            for (i = 0; i < N; ++i) {
+                tmp = sum;
+                sum += array[i];
+                array[i] = tmp;
             }
         }
         delete[] partial_sums;
+#endif // _OPENMP
     } else {
-        for (i=0;i<N;++i) {
-            tmp=out;
-            out+=array[i];
-            array[i]=tmp;
+        for (i = 0; i < N; ++i) {
+            tmp = out;
+            out += array[i];
+            array[i] = tmp;
         }
     }
     return out;
@@ -152,18 +138,21 @@ index_t cumsum(dim_t N, index_t* array)
 
 index_t cumsum_maskedTrue(dim_t N, index_t* array, int* mask)
 {
-    index_t out=0,tmp;
+    index_t out=0;
     dim_t i;
-    index_t *partial_sums=NULL,sum;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
-    int thread_num;
+#else
+    const int num_threads=1;
+#endif
 
     if (num_threads>1) {
-        partial_sums=new index_t[num_threads];
-        #pragma omp parallel private(sum,i,thread_num,tmp)
+#ifdef _OPENMP
+        index_t* partial_sums=new index_t[num_threads];
+        #pragma omp parallel private(i)
         {
-            sum=0;
-            thread_num=omp_get_thread_num();
+            index_t sum=0, tmp;
+            const int thread_num = omp_get_thread_num();
             #pragma omp for schedule(static)
             for (i=0;i<N;++i) {
                 if (mask[i]) {
@@ -198,6 +187,7 @@ index_t cumsum_maskedTrue(dim_t N, index_t* array, int* mask)
             }
         }
         delete[] partial_sums;
+#endif
     } else { /* num_threads=1 */
         for (i=0;i<N;++i) {
             if (mask[i]) {
@@ -213,18 +203,21 @@ index_t cumsum_maskedTrue(dim_t N, index_t* array, int* mask)
 
 index_t cumsum_maskedFalse(dim_t N, index_t* array, int* mask)
 {
-    index_t out=0,tmp=0;
+    index_t out=0;
     dim_t i;
-    index_t *partial_sums=NULL,sum;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
-    int thread_num=0;
+#else
+    const int num_threads=1;
+#endif
 
     if (num_threads>1) {
-        partial_sums=new index_t[num_threads];
-        #pragma omp parallel private(sum,i,thread_num,tmp)
+#ifdef _OPENMP
+        index_t* partial_sums=new index_t[num_threads];
+        #pragma omp parallel private(i)
         {
-            sum=0;
-            thread_num=omp_get_thread_num();
+            index_t sum = 0, tmp = 0;
+            const int thread_num=omp_get_thread_num();
             #pragma omp for schedule(static)
             for (i=0;i<N;++i) {
                 if (! mask[i]) {
@@ -259,6 +252,7 @@ index_t cumsum_maskedFalse(dim_t N, index_t* array, int* mask)
             }
         }
         delete[] partial_sums;
+#endif
     } else {
         for (i=0;i<N;++i) {
             if (! mask[i]) {
@@ -279,13 +273,17 @@ index_t arg_max(dim_t n, dim_t* lambda)
     index_t argmax=-1;
     index_t lmax=-1;
     index_t li=-1;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
     if (n>0) {
         max=lambda[0];
         argmax=0;
-        if (num_threads>1) {
-         #pragma omp parallel private(i,lmax,li)
+        if (num_threads > 1) {
+#pragma omp parallel private(i,lmax,li)
             {
                 lmax=max;
                 li=argmax;
@@ -321,7 +319,11 @@ index_t arg_max(dim_t n, dim_t* lambda)
 void zeroes(dim_t n, double* x)
 {
     dim_t i,local_n,rest,n_start,n_end,q;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
     for (i=0;i<num_threads;++i) {
@@ -337,7 +339,11 @@ void zeroes(dim_t n, double* x)
 void update(dim_t n, double a, double* x, double b, const double* y)
 {
     dim_t i,local_n,rest,n_start,n_end,q;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
     for (i=0;i<num_threads;++i) {
@@ -383,7 +389,11 @@ void linearCombination(dim_t n, double* z, double a, const double* x,
                        double b, const double* y)
 {
     dim_t i,local_n,rest,n_start,n_end,q;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
     for (i=0;i<num_threads;++i) {
@@ -415,12 +425,16 @@ void linearCombination(dim_t n, double* z, double a, const double* x,
     }
 }
 
-double innerProduct(const dim_t n,const double* x, const double* y,
-                    esysUtils::JMPI& mpiinfo)
+double innerProduct(dim_t n, const double* x, const double* y,
+                    escript::JMPI mpiinfo)
 {
     dim_t i,local_n,rest,n_start,n_end,q;
     double my_out=0, local_out=0., out=0.;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
     for (i=0; i<num_threads; ++i) {
@@ -449,11 +463,15 @@ double innerProduct(const dim_t n,const double* x, const double* y,
     return out;
 }
 
-double lsup(dim_t n, const double* x, const esysUtils::JMPI&  mpiinfo)
+double lsup(dim_t n, const double* x, escript::JMPI mpiinfo)
 {
     dim_t i,local_n,rest,n_start,n_end,q;
     double my_out=0., local_out=0., out=0.;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
     for (i=0; i<num_threads; ++i) {
@@ -475,16 +493,19 @@ double lsup(dim_t n, const double* x, const esysUtils::JMPI&  mpiinfo)
         MPI_Allreduce(&my_out, &out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
     }
 #else
-    out=my_out;
+    out = my_out;
 #endif
-
     return out;
 }
 
-double l2(dim_t n, const double* x, const esysUtils::JMPI&  mpiinfo)
+double l2(dim_t n, const double* x, escript::JMPI mpiinfo)
 {
-    double my_out=0, out=0.;
+    double my_out = 0, out = 0.;
+#ifdef _OPENMP
     const int num_threads=omp_get_max_threads();
+#else
+    const int num_threads=1;
+#endif
 
 #pragma omp parallel for
     for (dim_t i=0; i<num_threads; ++i) {
@@ -506,7 +527,7 @@ double l2(dim_t n, const double* x, const esysUtils::JMPI&  mpiinfo)
         MPI_Allreduce(&my_out, &out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
     }
 #else
-    out=my_out;
+    out = my_out;
 #endif
 
     return sqrt(out);
diff --git a/paso/src/PasoUtil.h b/paso/src/PasoUtil.h
index 54d3ebf..a24b336 100644
--- a/paso/src/PasoUtil.h
+++ b/paso/src/PasoUtil.h
@@ -57,13 +57,13 @@ index_t iMax(dim_t N, const index_t* array);
 
 /// returns the inner product of global arrays x and y
 double innerProduct(dim_t N, const double* x, const double* y,
-                    esysUtils::JMPI& mpiinfo);
+                    escript::JMPI mpiInfo);
 
 /// returns true if array contains value
-bool isAny(dim_t N, index_t* array, index_t value);
+bool isAny(dim_t N, const index_t* array, index_t value);
 
 /// returns the global L2 norm of x
-double l2(dim_t N, const double* x, const esysUtils::JMPI& mpiinfo);
+double l2(dim_t N, const double* x, escript::JMPI mpiInfo);
 
 /// Performs an update of the form z = a*x+b*y  where y and x are long vectors.
 /// If a=0, x is not used; if b=0, y is not used.
@@ -71,10 +71,10 @@ void linearCombination(dim_t N, double* z, double a, const double* x, double b,
                        const double* y);
 
 /// returns the global Lsup of x
-double lsup(dim_t N, const double* x, const esysUtils::JMPI& mpiinfo);
+double lsup(dim_t N, const double* x, escript::JMPI mpiInfo);
 
 /// returns the number of positive values in x
-dim_t numPositives(dim_t N, const double* x);
+dim_t numPositives(dim_t N, const double* x, escript::JMPI mpiInfo);
 
 /// Performs an update of the form x = a*x+b*y  where y and x are long vectors.
 /// If b=0, y is not used.
diff --git a/paso/src/Pattern.cpp b/paso/src/Pattern.cpp
index bdda7d9..1f6c471 100644
--- a/paso/src/Pattern.cpp
+++ b/paso/src/Pattern.cpp
@@ -26,10 +26,12 @@
 /****************************************************************************/
 
 #include "Pattern.h"
+#include "PasoException.h"
 #include "PasoUtil.h"
+
 #include <boost/scoped_array.hpp>
 
-using esysUtils::IndexList;
+using escript::IndexList;
 
 namespace paso {
 
@@ -47,7 +49,6 @@ Pattern::Pattern(int ntype, dim_t numOut, dim_t numIn, index_t* inPtr,
 {
     const index_t index_offset = (ntype & MATRIX_FORMAT_OFFSET1 ? 1:0);
     index_t min_index = index_offset, max_index = index_offset-1;
-    Esys_resetError();
 
     if (inPtr!=NULL && idx != NULL) {
 #pragma omp parallel
@@ -81,7 +82,7 @@ Pattern::Pattern(int ntype, dim_t numOut, dim_t numIn, index_t* inPtr,
       } // parallel section
 
       if (min_index < index_offset || max_index >= numIn+index_offset) {
-          Esys_setError(TYPE_ERROR, "Pattern: Pattern index out of range.");
+          throw PasoException("Pattern: Pattern index out of range.");
       }
       len = ptr[numOutput] - index_offset;
     }
@@ -128,11 +129,6 @@ Pattern_ptr Pattern::fromIndexListArray(dim_t n0, dim_t n,
     Pattern_ptr out(new Pattern(MATRIX_FORMAT_DEFAULT, n-n0,
                                 range_max+index_offset, ptr, index));
 
-    if (!Esys_noError()) {
-        delete[] ptr;
-        delete[] index;
-        out.reset();
-    }
     return out;
 }
 
@@ -178,7 +174,7 @@ index_t* Pattern::borrowColoringPointer()
             mis_marker[i] = -1;
         }
 
-        while (util::isAny(n, coloring, -1) && Esys_noError()) {
+        while (util::isAny(n, coloring, -1)) {
             mis(mis_marker);
 
 #pragma omp parallel for schedule(static)
@@ -201,7 +197,6 @@ Pattern_ptr Pattern::getSubpattern(dim_t newNumRows, dim_t newNumCols,
                                    const index_t* new_col_index) const
 {
     const index_t index_offset=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
-    Esys_resetError();
 
     index_t* newPtr = new index_t[newNumRows+1];
 #pragma omp parallel
@@ -244,10 +239,6 @@ Pattern_ptr Pattern::getSubpattern(dim_t newNumRows, dim_t newNumCols,
     }
     // create return value
     Pattern_ptr out(new Pattern(type, newNumRows, newNumCols, newPtr, newIndex));
-    if (!Esys_noError()) {
-        delete[] newIndex;
-        delete[] newPtr;
-    }
     return out;
 }
 
@@ -258,7 +249,6 @@ Pattern_ptr Pattern::unrollBlocks(int newType, dim_t output_block_size,
     const index_t index_offset_in=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
     const index_t index_offset_out=(newType & MATRIX_FORMAT_OFFSET1 ? 1:0);
 
-    Esys_resetError();
     if (output_block_size == 1 && input_block_size == 1 &&
           (type & MATRIX_FORMAT_OFFSET1) == (newType & MATRIX_FORMAT_OFFSET1)) {
         out = shared_from_this();
@@ -309,10 +299,6 @@ Pattern_ptr Pattern::unrollBlocks(int newType, dim_t output_block_size,
             }
         } // parallel section
         out.reset(new Pattern(newType, new_numOutput, new_numInput, newPtr, newIndex));
-        if (!Esys_noError()) {
-            delete[] newIndex;
-            delete[] newPtr;
-        }
     }
     return out;
 }
diff --git a/paso/src/Pattern.h b/paso/src/Pattern.h
index b0468ed..5be155a 100644
--- a/paso/src/Pattern.h
+++ b/paso/src/Pattern.h
@@ -29,7 +29,8 @@
 #define __PASO_PATTERN_H__
 
 #include "Paso.h"
-#include <esysUtils/IndexList.h>
+
+#include <escript/IndexList.h>
 
 namespace paso {
 
@@ -37,7 +38,6 @@ struct Pattern;
 typedef boost::shared_ptr<Pattern> Pattern_ptr;
 typedef boost::shared_ptr<const Pattern> const_Pattern_ptr;
 
-PASO_DLL_API
 struct Pattern : boost::enable_shared_from_this<Pattern>
 {
     Pattern(int type, dim_t numOutput, dim_t numInput, index_t* ptr,
@@ -64,7 +64,7 @@ struct Pattern : boost::enable_shared_from_this<Pattern>
     index_t* borrowMainDiagonalPointer();
 
     static Pattern_ptr fromIndexListArray(dim_t n0, dim_t n,
-            const esysUtils::IndexList* index_list_array,
+            const escript::IndexList* index_list_array,
             index_t range_min, index_t range_max, index_t index_offset);
 
     index_t* borrowColoringPointer();
diff --git a/paso/src/Pattern_mis.cpp b/paso/src/Pattern_mis.cpp
index 5f897f8..9cd495a 100644
--- a/paso/src/Pattern_mis.cpp
+++ b/paso/src/Pattern_mis.cpp
@@ -32,6 +32,7 @@
 /****************************************************************************/
 
 #include "Pattern.h"
+#include "PasoException.h"
 #include "PasoUtil.h"
 
 namespace paso {
@@ -48,8 +49,7 @@ void Pattern::mis(index_t* mis_marker) const
 {
     const index_t index_offset=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
     if (numOutput != numInput) {
-        Esys_setError(VALUE_ERROR, "Pattern::mis: pattern must be square.");
-        return;
+        throw PasoException("Pattern::mis: pattern must be square.");
     }
 
     const dim_t n = numOutput;
diff --git a/paso/src/Pattern_reduceBandwidth.cpp b/paso/src/Pattern_reduceBandwidth.cpp
index 2f01674..f4c0c66 100644
--- a/paso/src/Pattern_reduceBandwidth.cpp
+++ b/paso/src/Pattern_reduceBandwidth.cpp
@@ -34,6 +34,7 @@
 /****************************************************************************/
 
 #include "Pattern.h"
+#include "PasoException.h"
 
 namespace paso {
 
@@ -125,16 +126,8 @@ bool dropTree(index_t root, const Pattern* pattern, index_t* AssignedLevel,
                 const index_t itest = pattern->index[j];
                 if (AssignedLevel[itest] < 0) {
 #ifdef BOUNDS_CHECK
-                    if (itest < 0 || itest >= N) {
-                        printf("BOUNDS_CHECK %s %d itest=%d\n",
-                                __FILE__, __LINE__, itest);
-                        exit(1);
-                    }
-                    if (level_top < 0 || level_top >= N) {
-                        printf("BOUNDS_CHECK %s %d level_top=%d\n",
-                                __FILE__, __LINE__, level_top);
-                        exit(1);
-                    }
+                    ESYS_ASSERT(itest >= 0 && itest < N, "BOUNDS_CHECK: itest=" << itest << ", N=" << N);
+                    ESYS_ASSERT(level_top >= 0 && level_top < N, "BOUNDS_CHECK: level_top=" << level_top << ", N=" << N);
 #endif
                     AssignedLevel[itest] = nlvls;
                     VerticesInTree[level_top] = itest;
@@ -151,8 +144,7 @@ bool dropTree(index_t root, const Pattern* pattern, index_t* AssignedLevel,
 void Pattern::reduceBandwidth(index_t* oldToNew)
 {
     if (numOutput != numInput) {
-        Esys_setError(VALUE_ERROR, "Pattern::reduceBandwidth: pattern needs to be for a square matrix.");
-        return;
+        throw PasoException("Pattern::reduceBandwidth: pattern needs to be for a square matrix.");
     } else if (numOutput == 0) {
         return;
     }
@@ -184,7 +176,7 @@ void Pattern::reduceBandwidth(index_t* oldToNew)
     // create an ordering with increasing degree
     qsort(degAndIdx, (size_t)N, sizeof(DegreeAndIdx), comparDegreeAndIdx);
     index_t root = degAndIdx[0].idx;
-    dim_t numLabledVertices = 0;
+    dim_t numLabeledVertices = 0;
 
     while (root >= 0) {
         dim_t max_LevelWidth = N+1;
@@ -193,13 +185,10 @@ void Pattern::reduceBandwidth(index_t* oldToNew)
                         &numLevels, firstVertexInLevel, max_LevelWidth, N)) {
             // find new maximum level width
             max_LevelWidth=0;
-            for (i=0; i<numLevels; ++i) {
 #ifdef BOUNDS_CHECK
-                if (i >= N+1) {
-                    printf("BOUNDS_CHECK %s %d i=%d N=%d\n", __FILE__,
-                            __LINE__, i, N); exit(1);
-                }
+            ESYS_ASSERT(numLevels <= N, "BOUNDS_CHECK: numLevels=" << numLevels << ", N=" << N);
 #endif
+            for (i = 0; i < numLevels; ++i) {
                 max_LevelWidth=std::max(max_LevelWidth, firstVertexInLevel[i+1]-firstVertexInLevel[i]);
             }
             // find a vertex in the last level which has minimum degree
@@ -215,21 +204,27 @@ void Pattern::reduceBandwidth(index_t* oldToNew)
             }
             // save the vertices in the current tree
             numVerticesInTree=firstVertexInLevel[numLevels];
-            for (i=0;i<firstVertexInLevel[numLevels];++i) {
 #ifdef BOUNDS_CHECK
-                if (numLabledVertices+i < 0 || numLabledVertices+i >= N) { printf("BOUNDS_CHECK %s %d i=%d numLabeledVertices=%d root=%d N=%d firstVertexInLevel[numLevels]=%d\n", __FILE__, __LINE__, i, numLabledVertices, root, N, firstVertexInLevel[numLevels]); exit(1); }
+            ESYS_ASSERT(numLabeledVertices+firstVertexInLevel[numLevels] <= N,
+                    "BOUNDS_CHECK: numLabeledVertices=" << numLabeledVertices
+                    << ", root=" << root << ", N=" << N
+                    << ", first[numLevels]=" << firstVertexInLevel[numLevels]);
 #endif
-                oldLabel[numLabledVertices+i]=VerticesInTree[i];
+            for (i = 0; i < firstVertexInLevel[numLevels]; ++i) {
+                oldLabel[numLabeledVertices+i]=VerticesInTree[i];
             }
         }
-        // now the vertices in the current tree
-        for (i=0; i<numVerticesInTree; ++i) {
 #ifdef BOUNDS_CHECK
-            if (numLabledVertices+i < 0 || numLabledVertices+i >= N) { printf("BOUNDS_CHECK %s %d i=%d numLabeledVertices=%d root=%d N=%d\n", __FILE__, __LINE__, i, numLabledVertices, root, N); exit(1); }
+            ESYS_ASSERT(numLabeledVertices+numVerticesInTree <= N,
+                    "BOUNDS_CHECK: numLabeledVertices=" << numLabeledVertices
+                    << ", root=" << root << ", N=" << N
+                    << ", numVerticesInTree=" << numVerticesInTree);
 #endif
-            oldToNew[oldLabel[numLabledVertices+i]]=numLabledVertices+i;
+        // now the vertices in the current tree
+        for (i=0; i<numVerticesInTree; ++i) {
+            oldToNew[oldLabel[numLabeledVertices+i]]=numLabeledVertices+i;
         }
-        numLabledVertices+=numVerticesInTree;
+        numLabeledVertices+=numVerticesInTree;
         // new search for a vertex which is not labeled yet
         root=-1;
         for (i=0; i<N; ++i) {
diff --git a/paso/src/Preconditioner.cpp b/paso/src/Preconditioner.cpp
index e8b2506..5097571 100644
--- a/paso/src/Preconditioner.cpp
+++ b/paso/src/Preconditioner.cpp
@@ -25,10 +25,10 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
-#include "SystemMatrix.h"
-#include "PasoUtil.h"
 #include "Preconditioner.h"
+#include "Options.h"
+#include "PasoUtil.h"
+#include "SystemMatrix.h"
 
 namespace paso {
 
@@ -47,7 +47,6 @@ void Preconditioner_free(Preconditioner* in)
 Preconditioner* Preconditioner_alloc(SystemMatrix_ptr A, Options* options)
 {
     Preconditioner* prec = new Preconditioner;
-    prec->type=UNKNOWN;
     prec->jacobi=NULL;
     prec->gs=NULL;
     prec->amg=NULL;
@@ -88,7 +87,7 @@ Preconditioner* Preconditioner_alloc(SystemMatrix_ptr A, Options* options)
         case PASO_BOOMERAMG:
         case PASO_AMLI:
         case PASO_AMG:
-            prec->amg = Preconditioner_AMG_Root_alloc(A, options);
+            prec->amg = Preconditioner_AMG_Root_alloc(boost::const_pointer_cast<SystemMatrix>(A), options);
             prec->type = PASO_AMG;
             break;
 
@@ -97,14 +96,12 @@ Preconditioner* Preconditioner_alloc(SystemMatrix_ptr A, Options* options)
                 printf("Preconditioner: ILU preconditioner is used.\n");
             prec->ilu = Solver_getILU(A->mainBlock, options->verbose);
             prec->type = PASO_ILU0;
-            Esys_MPIInfo_noError(A->mpi_info);
             break;
 
         case PASO_RILU:
             if (options->verbose)
                 printf("Preconditioner: RILU preconditioner is used.\n");
             prec->rilu = Solver_getRILU(A->mainBlock,options->verbose);
-            Esys_MPIInfo_noError(A->mpi_info);
             prec->type=PASO_RILU;
             break;
 
@@ -114,10 +111,6 @@ Preconditioner* Preconditioner_alloc(SystemMatrix_ptr A, Options* options)
             prec->type=PASO_NO_PRECONDITIONER;
             break;
     }
-    if (!Esys_noError()) {
-        Preconditioner_free(prec);
-        return NULL;
-    }
     return prec;
 }
 
diff --git a/paso/src/Preconditioner.h b/paso/src/Preconditioner.h
index 748ab55..5ed018c 100644
--- a/paso/src/Preconditioner.h
+++ b/paso/src/Preconditioner.h
@@ -17,16 +17,9 @@
 #ifndef __PASO_PRECONDITIONER_H__
 #define __PASO_PRECONDITIONER_H__
 
-#include "SystemMatrix.h"
+#include "Paso.h"
 #include "BOOMERAMG.h"
-
-#define PRECONDITIONER_NO_ERROR 0
-#define PRECONDITIONER_MAXITER_REACHED 1
-#define PRECONDITIONER_INPUT_ERROR -1
-#define PRECONDITIONER_MEMORY_ERROR -9
-#define PRECONDITIONER_BREAKDOWN -10
-#define PRECONDITIONER_NEGATIVE_NORM_ERROR -11
-#define PRECONDITIONER_DIVERGENCE -12
+#include "SystemMatrix.h"
 
 namespace paso {
 
@@ -94,9 +87,9 @@ void Preconditioner_LocalSmoother_solve(SparseMatrix_ptr A,
         Preconditioner_LocalSmoother* gs, double* x, const double* b,
         dim_t sweeps, bool x_is_initial);
 
-err_t Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
-        Preconditioner_Smoother* gs, double* x, const double* b,
-        double atol, dim_t* sweeps, bool x_is_initial);
+SolverResult Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
+                    Preconditioner_Smoother* gs, double* x, const double* b,
+                    double atol, dim_t* sweeps, bool x_is_initial);
 
 void Preconditioner_LocalSmoother_Sweep(SparseMatrix_ptr A,
         Preconditioner_LocalSmoother* gs, double* x);
@@ -208,7 +201,7 @@ void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
                         const index_t* offset_S, const index_t* S,
                         const dim_t* degree_ST, const index_t* offset_ST,
                         const index_t* ST, const_Connector_ptr col_connector,
-                        const_Distribution_ptr col_dist);
+                        escript::const_Distribution_ptr col_dist);
 
 SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P);
 
diff --git a/paso/src/RILU.cpp b/paso/src/RILU.cpp
index 33f007e..77762e2 100644
--- a/paso/src/RILU.cpp
+++ b/paso/src/RILU.cpp
@@ -26,9 +26,9 @@
 /****************************************************************************/
 
 #include "Paso.h"
-#include "Preconditioner.h"
-#include "PasoUtil.h"
 #include "BlockOps.h"
+#include "PasoUtil.h"
+#include "Preconditioner.h"
 
 namespace paso {
 
@@ -96,188 +96,171 @@ Solver_RILU* Solver_getRILU(SparseMatrix_ptr A_p, bool verbose)
     out->b_C=NULL;
 
     /* identify independent set of rows/columns */
-    time0=Esys_timer();
+    time0=escript::gettime();
     #pragma omp parallel for private(i) schedule(static)
     for (i=0;i<n;++i) mis_marker[i]=-1;
     A_p->pattern->mis(mis_marker);
-    /*time2=Esys_timer()-time0;*/
-    if (Esys_noError()) {
-        #pragma omp parallel for private(i) schedule(static)
-        for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
-        out->n=n;
-        out->n_block=n_block;
-        out->n_F=util::cumsum(n,counter);
-        out->mask_F=new index_t[n];
-        out->rows_in_F=new index_t[out->n_F];
-        out->inv_A_FF=new double[n_block*n_block*out->n_F];
-        out->A_FF_pivot=NULL; /* later use for block size>3 */
+    /*time2=escript::gettime()-time0;*/
+    #pragma omp parallel for private(i) schedule(static)
+    for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
+    out->n=n;
+    out->n_block=n_block;
+    out->n_F=util::cumsum(n,counter);
+    out->mask_F=new index_t[n];
+    out->rows_in_F=new index_t[out->n_F];
+    out->inv_A_FF=new double[n_block*n_block*out->n_F];
+    out->A_FF_pivot=NULL; /* later use for block size>3 */
 #pragma omp parallel
-        {
-          /* creates an index for F from mask */
-          #pragma omp for private(i) schedule(static)
-          for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
-          #pragma omp for private(i) schedule(static)
-          for (i = 0; i < n; ++i) {
-             if  (mis_marker[i]) {
-                    out->rows_in_F[counter[i]]=i;
-                    out->mask_F[i]=counter[i];
-             } else {
-                    out->mask_F[i]=-1;
-             }
-          }
-          #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
-          for (i = 0; i < out->n_F; i++) {
-            /* find main diagonal */
-            iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
-            index=&(A_p->pattern->index[iPtr]);
-            where_p=(index_t*)bsearch(&out->rows_in_F[i],
-                                    index,
-                                    A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
-                                    sizeof(index_t),
-                                    util::comparIndex);
-            if (where_p==NULL) {
-                Esys_setError(VALUE_ERROR, "Solver_getRILU: main diagonal element missing.");
-            } else {
-                iPtr+=(index_t)(where_p-index);
-                /* get inverse of A_FF block: */
-                if (n_block==1) {
-                   if (ABS(A_p->val[iPtr])>0.) {
-                        out->inv_A_FF[i]=1./A_p->val[iPtr];
-                   } else {
-                        Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
-                   }
-                } else if (n_block==2) {
-                   A11=A_p->val[iPtr*4];
-                   A21=A_p->val[iPtr*4+1];
-                   A12=A_p->val[iPtr*4+2];
-                   A22=A_p->val[iPtr*4+3];
-                   D = A11*A22-A12*A21;
-                   if (ABS(D) > 0 ){
-                        D=1./D;
-                        out->inv_A_FF[i*4]= A22*D;
-                        out->inv_A_FF[i*4+1]=-A21*D;
-                        out->inv_A_FF[i*4+2]=-A12*D;
-                        out->inv_A_FF[i*4+3]= A11*D;
-                   } else {
-                        Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
-                   }
-                } else if (n_block==3) {
-                   A11=A_p->val[iPtr*9  ];
-                   A21=A_p->val[iPtr*9+1];
-                   A31=A_p->val[iPtr*9+2];
-                   A12=A_p->val[iPtr*9+3];
-                   A22=A_p->val[iPtr*9+4];
-                   A32=A_p->val[iPtr*9+5];
-                   A13=A_p->val[iPtr*9+6];
-                   A23=A_p->val[iPtr*9+7];
-                   A33=A_p->val[iPtr*9+8];
-                   D  =  A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
-                   if (ABS(D) > 0 ){
-                        D=1./D;
-                        out->inv_A_FF[i*9  ]=(A22*A33-A23*A32)*D;
-                        out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
-                        out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
-                        out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
-                        out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
-                        out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
-                        out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
-                        out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
-                        out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
-                   } else {
-                        Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
-                   }
+    {
+      /* creates an index for F from mask */
+      #pragma omp for private(i) schedule(static)
+      for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
+      #pragma omp for private(i) schedule(static)
+      for (i = 0; i < n; ++i) {
+         if  (mis_marker[i]) {
+                out->rows_in_F[counter[i]]=i;
+                out->mask_F[i]=counter[i];
+         } else {
+                out->mask_F[i]=-1;
+         }
+      }
+      #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
+      for (i = 0; i < out->n_F; i++) {
+        /* find main diagonal */
+        iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
+        index=&(A_p->pattern->index[iPtr]);
+        where_p=(index_t*)bsearch(&out->rows_in_F[i],
+                                index,
+                                A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
+                                sizeof(index_t),
+                                util::comparIndex);
+        if (where_p==NULL) {
+            throw PasoException("Solver_getRILU: main diagonal element missing.");
+        } else {
+            iPtr+=(index_t)(where_p-index);
+            /* get inverse of A_FF block: */
+            if (n_block==1) {
+               if (std::abs(A_p->val[iPtr])>0.) {
+                    out->inv_A_FF[i]=1./A_p->val[iPtr];
+               } else {
+                    throw PasoException("Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
                }
-            }
-          }
-        } /* end parallel region */
+            } else if (n_block==2) {
+               A11=A_p->val[iPtr*4];
+               A21=A_p->val[iPtr*4+1];
+               A12=A_p->val[iPtr*4+2];
+               A22=A_p->val[iPtr*4+3];
+               D = A11*A22-A12*A21;
+               if (std::abs(D) > 0 ){
+                    D=1./D;
+                    out->inv_A_FF[i*4]= A22*D;
+                    out->inv_A_FF[i*4+1]=-A21*D;
+                    out->inv_A_FF[i*4+2]=-A12*D;
+                    out->inv_A_FF[i*4+3]= A11*D;
+               } else {
+                    throw PasoException("Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
+               }
+            } else if (n_block==3) {
+               A11=A_p->val[iPtr*9  ];
+               A21=A_p->val[iPtr*9+1];
+               A31=A_p->val[iPtr*9+2];
+               A12=A_p->val[iPtr*9+3];
+               A22=A_p->val[iPtr*9+4];
+               A32=A_p->val[iPtr*9+5];
+               A13=A_p->val[iPtr*9+6];
+               A23=A_p->val[iPtr*9+7];
+               A33=A_p->val[iPtr*9+8];
+               D  =  A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
+               if (std::abs(D) > 0) {
+                    D=1./D;
+                    out->inv_A_FF[i*9  ]=(A22*A33-A23*A32)*D;
+                    out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
+                    out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
+                    out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
+                    out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
+                    out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
+                    out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
+                    out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
+                    out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
+               } else {
+                    throw PasoException("Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
+               }
+           }
+        }
+      }
+    } /* end parallel region */
 
-        if (Esys_noError()) {
-            // if there are no nodes in the coarse level there is no more
-            // work to do
-            out->n_C=n-out->n_F;
-            if (out->n_C > 0) {
-                out->rows_in_C = new index_t[out->n_C];
-                out->mask_C = new index_t[n];
-                /* creates an index for C from mask */
-                #pragma omp parallel for private(i) schedule(static)
-                for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
-                util::cumsum(n,counter);
-                #pragma omp parallel
-                {
-                      #pragma omp for private(i) schedule(static)
-                      for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
-                      #pragma omp for private(i) schedule(static)
-                      for (i = 0; i < n; ++i) {
-                         if  (! mis_marker[i]) {
-                            out->rows_in_C[counter[i]]=i;
-                            out->mask_C[i]=counter[i];
-                         } else {
-                            out->mask_C[i]=-1;
-                         }
-                      }
-                } /* end parallel region */
-                /* get A_CF block: */
-                out->A_CF=A_p->getSubmatrix(out->n_C, out->n_F, out->rows_in_C, out->mask_F);
-                if (Esys_noError()) {
-                    /* get A_FC block: */
-                    out->A_FC=A_p->getSubmatrix(out->n_F, out->n_C, out->rows_in_F, out->mask_C);
-                }
-                /* get A_FF block: */
-                if (Esys_noError()) {
-                    schur = A_p->getSubmatrix(out->n_C, out->n_C, out->rows_in_C, out->mask_C);
-                }
-                time0=Esys_timer()-time0;
-                if (Esys_noError()) {
-                    time1=Esys_timer();
-                    /* update A_CC block to get Schur complement and then apply RILU to it */
-                    Solver_updateIncompleteSchurComplement(schur, out->A_CF, out->inv_A_FF, out->A_FF_pivot, out->A_FC);
-                    time1=Esys_timer()-time1;
-                    out->RILU_of_Schur = Solver_getRILU(schur, verbose);
-                    schur.reset();
-                }
-                /* allocate work arrays for RILU application */
-                if (Esys_noError()) {
-                    out->x_F=new double[n_block*out->n_F];
-                    out->b_F=new double[n_block*out->n_F];
-                    out->x_C=new double[n_block*out->n_C];
-                    out->b_C=new double[n_block*out->n_C];
-                    #pragma omp parallel
-                    {
+    // if there are no nodes in the coarse level there is no more
+    // work to do
+    out->n_C=n-out->n_F;
+    if (out->n_C > 0) {
+        out->rows_in_C = new index_t[out->n_C];
+        out->mask_C = new index_t[n];
+        /* creates an index for C from mask */
+        #pragma omp parallel for private(i) schedule(static)
+        for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
+        util::cumsum(n,counter);
+        #pragma omp parallel
+        {
+              #pragma omp for private(i) schedule(static)
+              for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
+              #pragma omp for private(i) schedule(static)
+              for (i = 0; i < n; ++i) {
+                 if  (! mis_marker[i]) {
+                    out->rows_in_C[counter[i]]=i;
+                    out->mask_C[i]=counter[i];
+                 } else {
+                    out->mask_C[i]=-1;
+                 }
+              }
+        } /* end parallel region */
+        /* get A_CF block: */
+        out->A_CF=A_p->getSubmatrix(out->n_C, out->n_F, out->rows_in_C, out->mask_F);
+        /* get A_FC block: */
+        out->A_FC=A_p->getSubmatrix(out->n_F, out->n_C, out->rows_in_F, out->mask_C);
+        /* get A_FF block: */
+        schur = A_p->getSubmatrix(out->n_C, out->n_C, out->rows_in_C, out->mask_C);
+        time0=escript::gettime()-time0;
+        time1=escript::gettime();
+        /* update A_CC block to get Schur complement and then apply RILU to it */
+        Solver_updateIncompleteSchurComplement(schur, out->A_CF, out->inv_A_FF, out->A_FF_pivot, out->A_FC);
+        time1=escript::gettime()-time1;
+        out->RILU_of_Schur = Solver_getRILU(schur, verbose);
+        schur.reset();
+        /* allocate work arrays for RILU application */
+        out->x_F=new double[n_block*out->n_F];
+        out->b_F=new double[n_block*out->n_F];
+        out->x_C=new double[n_block*out->n_C];
+        out->b_C=new double[n_block*out->n_C];
+#pragma omp parallel
+        {
 #pragma omp for private(i,k) schedule(static)
-                        for (i = 0; i < out->n_F; ++i) {
-                            for (k=0; k<n_block;++k) {
-                                out->x_F[i*n_block+k]=0.;
-                                out->b_F[i*n_block+k]=0.;
-                            }
-                        }
+            for (i = 0; i < out->n_F; ++i) {
+                for (k=0; k<n_block;++k) {
+                    out->x_F[i*n_block+k]=0.;
+                    out->b_F[i*n_block+k]=0.;
+                }
+            }
 #pragma omp for private(i,k) schedule(static)
-                        for (i = 0; i < out->n_C; ++i) {
-                            for (k=0; k<n_block;++k) {
-                                out->x_C[i*n_block+k]=0.;
-                                out->b_C[i*n_block+k]=0.;
-                            }
-                        }
-                    } // end parallel region
+            for (i = 0; i < out->n_C; ++i) {
+                for (k=0; k<n_block;++k) {
+                    out->x_C[i*n_block+k]=0.;
+                    out->b_C[i*n_block+k]=0.;
                 }
             }
-        }
+        } // end parallel region
     }
     delete[] mis_marker;
     delete[] counter;
-    if (Esys_noError()) {
-        //if (verbose) {
-        //    printf("RILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
-        //    if (out->n_C>0) {
-        //        printf("timing: RILU: MIS/reordering/elimination : %e/%e/%e\n",time2,time0,time1);
-        //    } else {
-        //        printf("timing: RILU: MIS: %e\n",time2);
-        //    }
-        //}
-        return out;
-    } else  {
-        Solver_RILU_free(out);
-        return NULL;
-    }
+    //if (verbose) {
+    //    printf("RILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
+    //    if (out->n_C>0) {
+    //        printf("timing: RILU: MIS/reordering/elimination : %e/%e/%e\n",time2,time0,time1);
+    //    } else {
+    //        printf("timing: RILU: MIS: %e\n",time2);
+    //    }
+    //}
+    return out;
 }
 
 /****************************************************************************/
diff --git a/paso/src/ReactiveSolver.cpp b/paso/src/ReactiveSolver.cpp
index db5a40f..d86bd8e 100644
--- a/paso/src/ReactiveSolver.cpp
+++ b/paso/src/ReactiveSolver.cpp
@@ -37,8 +37,17 @@
 
 namespace paso {
 
-err_t ReactiveSolver::solve(double* u, double* u_old, const double* source,
-                            Options* options, Performance* pp)
+static const real_t EPSILON = escript::DataTypes::real_t_eps();
+
+// exp(h)-1 ~ h + h**2/2 for abs(h) <  PASO_RT_EXP_LIM_MIN
+static const real_t PASO_RT_EXP_LIM_MIN = sqrt(EPSILON);
+
+// it is assumed that exp(h) with  h>PASO_RT_EXP_LIM_MAX is not reliable
+static const real_t PASO_RT_EXP_LIM_MAX = log(1./sqrt(EPSILON));
+
+SolverResult ReactiveSolver::solve(double* u, double* u_old,
+                                   const double* source, Options* options,
+                                   Performance* pp)
 {
     const double EXP_LIM_MIN = PASO_RT_EXP_LIM_MIN;
     const double EXP_LIM_MAX = PASO_RT_EXP_LIM_MAX;
@@ -74,14 +83,15 @@ err_t ReactiveSolver::solve(double* u, double* u_old, const double* source,
     MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, tp->mpi_info->comm);
 #endif
     if (fail > 0) {
-        return SOLVER_DIVERGENCE;
+        return Divergence;
     } else {
-        return SOLVER_NO_ERROR;
+        return NoError;
     }
 }
 
 double ReactiveSolver::getSafeTimeStepSize(const_TransportProblem_ptr tp)
 {
+    const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
     const double EXP_LIM_MAX = PASO_RT_EXP_LIM_MAX;
     const dim_t n = tp->transport_matrix->getTotalNumRows();
     double dt_max = LARGE_POSITIVE_FLOAT;
@@ -96,12 +106,12 @@ double ReactiveSolver::getSafeTimeStepSize(const_TransportProblem_ptr tp)
             const double m_i = tp->lumped_mass_matrix[i];
             if (m_i > 0) { // no constraint
                 if (d_ii > 0)
-                    dt_max_loc = MIN(dt_max_loc, m_i/d_ii);
+                    dt_max_loc = std::min(dt_max_loc, m_i/d_ii);
             }
         }
         #pragma omp critical
         {
-            dt_max = MIN(dt_max, dt_max_loc);
+            dt_max = std::min(dt_max, dt_max_loc);
         }
     }
 #ifdef ESYS_MPI
diff --git a/paso/src/ReactiveSolver.h b/paso/src/ReactiveSolver.h
index 6b2f446..4c76cbd 100644
--- a/paso/src/ReactiveSolver.h
+++ b/paso/src/ReactiveSolver.h
@@ -18,18 +18,12 @@
 #ifndef __PASO_REACTIVESOLVER_H__
 #define __PASO_REACTIVESOLVER_H__
 
-#include "performance.h"
 #include "Transport.h"
 
-// exp(h)-1 ~ h + h**2/2 for abs(h) <  PASO_RT_EXP_LIM_MIN
-#define PASO_RT_EXP_LIM_MIN sqrt(EPSILON)
-
-// it is assumed that exp(h) with  h>PASO_RT_EXP_LIM_MAX is not reliable
-#define PASO_RT_EXP_LIM_MAX log(1./sqrt(EPSILON))
-
 namespace paso {
 
-PASO_DLL_API
+struct Performance;
+
 struct ReactiveSolver
 {
     ReactiveSolver(const_TransportProblem_ptr _tp) : tp(_tp) {}
@@ -40,7 +34,7 @@ struct ReactiveSolver
         dt = _dt;
     }
 
-    err_t solve(double* u, double* u_old, const double* source,
+    SolverResult solve(double* u, double* u_old, const double* source,
                 Options* options, Performance* pp);
 
     static double getSafeTimeStepSize(const_TransportProblem_ptr tp);
diff --git a/paso/src/SConscript b/paso/src/SConscript
index 43b4efd..e1717c1 100644
--- a/paso/src/SConscript
+++ b/paso/src/SConscript
@@ -15,9 +15,8 @@
 ##############################################################################
 
 Import('*')
-local_env = env.Clone()
 
-lib_name = 'paso'
+module_name = 'paso'
 
 sources = """
     AMG.cpp
@@ -27,7 +26,6 @@ sources = """
     AMG_Root.cpp
     BiCGStab.cpp
     Coupler.cpp
-    Distribution.cpp
     FCT_Solver.cpp
     FluxLimiter.cpp
     Functions.cpp
@@ -83,7 +81,6 @@ headers = """
     BlockOps.h
     BOOMERAMG.h
     Coupler.h
-    Distribution.h
     FCT_Solver.h
     FluxLimiter.h
     Functions.h
@@ -92,6 +89,7 @@ headers = """
     mmio.h
     Options.h
     Paso.h
+    PasoException.h
     PasoUtil.h
     Pattern.h
     performance.h
@@ -106,24 +104,33 @@ headers = """
     UMFPACK.h
 """.split()
 
-# And just for blocktimer cancer.....
-local_env.Prepend(LIBS = ['esysUtils'])
-if IS_WINDOWS:
-    local_env.Append(CPPDEFINES = ['PASO_EXPORTS'])
+local_env = env.Clone()
 
-if local_env['build_shared']:
-    lib = local_env.SharedLibrary(lib_name, sources)
-else:
-    lib = local_env.StaticLibrary(lib_name, sources)
+# collect dependencies for other modules
+pasolibs = env['escript_libs']
+if env['uselapack']:
+    pasolibs += env['lapack_libs']
+if env['mkl']:
+    pasolibs += env['mkl_libs']
+if env['umfpack']:
+    pasolibs += env['umfpack_libs']
+if env['boomeramg']:
+    pasolibs += env['boomeramg_libs']
+if env['papi']:
+    pasolibs += env['papi_libs']
 
-env.Alias('build_paso_lib', lib)
+local_env.PrependUnique(LIBS = pasolibs)
+env['paso_libs'] = [module_name] + pasolibs
 
-include_path = Dir('paso', local_env['incinstall'])
-solvers_include_path = Dir('Solvers', include_path)
+if IS_WINDOWS:
+    local_env.Append(CPPDEFINES = ['PASO_EXPORTS'])
 
+include_path = Dir(module_name, local_env['incinstall'])
 hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_paso_headers', hdr_inst)
 
+lib = local_env.SharedLibrary(module_name, sources)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_paso_lib', lib_inst)
+
+build = env.Alias('build_paso', [hdr_inst, lib])
+env.Alias('install_paso', [build, lib_inst])
 
diff --git a/paso/src/SchurComplement.cpp b/paso/src/SchurComplement.cpp
index 6f8dd69..c657038 100644
--- a/paso/src/SchurComplement.cpp
+++ b/paso/src/SchurComplement.cpp
@@ -15,24 +15,13 @@
 *****************************************************************************/
 
 
-/****************************************************************************/
-
 /* Paso: updates A_CC <- ACC-ACF AFF^{-1} AFC                 */
-
 /* no check of consistency of matrices !!!!                   */
 
-/****************************************************************************/
-
-/* Copyrights by ACcESS Australia 2003,2004,2005              */
-/* Author: Lutz Gross, l.gross at uq.edu.au                      */
-
-/****************************************************************************/
-
 #include "Paso.h"
-#include "SparseMatrix.h"
+#include "PasoUtil.h"
 #include "Solver.h"
-
-/****************************************************************************/
+#include "SparseMatrix.h"
 
 namespace paso {
 
@@ -45,7 +34,7 @@ void Solver_updateIncompleteSchurComplement(SparseMatrix_ptr A_CC,
   bool set_A;
   dim_t n_loc_rows=A_CC->numRows;
   dim_t n_block=A_CC->row_block_size;
-  register double A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33,
+  double A_CF_11,A_CF_21,A_CF_31,A_CF_12,A_CF_22,A_CF_32,A_CF_13,A_CF_23,A_CF_33,
          invA_FF_11,invA_FF_21,invA_FF_31,invA_FF_12,invA_FF_22,invA_FF_32,invA_FF_13,invA_FF_23,invA_FF_33,
          A11=0,A21=0,A31=0,A12=0,A22=0,A32=0,A13=0,A23=0,A33=0,A_FC_11,A_FC_21,A_FC_31,A_FC_12,A_FC_22,A_FC_32,A_FC_13,A_FC_23,A_FC_33;
   if (n_block==1) {
@@ -57,11 +46,11 @@ void Solver_updateIncompleteSchurComplement(SparseMatrix_ptr A_CC,
         /* now we run through the columns of A_CF in row  i */
         for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) {
              col_CF=A_CF->pattern->index[iPtr_CF];
-             set_A=true;
+             set_A = true;
              for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) {
                 col_FC=A_FC->pattern->index[iPtr_FC];
                 /* is (i,col_FC) in the shape of A_CC ? */
-               where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),util::comparIndex);
+                where_p=(index_t*)bsearch(&col_FC,index_CC,index_CC_len,sizeof(index_t),util::comparIndex);
                 if (where_p!=NULL) {
                     if (set_A) {
                        A11=A_CF->val[iPtr_CF]*invA_FF[col_CF];
@@ -81,7 +70,7 @@ void Solver_updateIncompleteSchurComplement(SparseMatrix_ptr A_CC,
         /* now we run through the columns of A_CF in row  i */
         for (iPtr_CF = A_CF->pattern->ptr[i]; iPtr_CF < A_CF->pattern->ptr[i + 1]; ++iPtr_CF) {
              col_CF=A_CF->pattern->index[iPtr_CF];
-             set_A=true;
+             set_A = true;
              for (iPtr_FC = A_FC->pattern->ptr[col_CF]; iPtr_FC < A_FC->pattern->ptr[col_CF + 1]; ++iPtr_FC) {
                 col_FC=A_FC->pattern->index[iPtr_FC];
                 /* is (i,col_FC) in the shape of A_CC ? */
diff --git a/paso/src/SharedComponents.h b/paso/src/SharedComponents.h
index 270a1b2..eee26c2 100644
--- a/paso/src/SharedComponents.h
+++ b/paso/src/SharedComponents.h
@@ -39,71 +39,58 @@ typedef boost::shared_ptr<const SharedComponents> const_SharedComponents_ptr;
 PASO_DLL_API
 struct SharedComponents
 {
-    SharedComponents(dim_t localLength, dim_t nNeighbours,
-            const Esys_MPI_rank* neighbours, const index_t* sharedArray,
-            const index_t* offset, index_t m, index_t b,
-            const esysUtils::JMPI& mpiInfo)
+    SharedComponents(dim_t localLength, const std::vector<int>& neighbours,
+                     const index_t* sharedArray,
+                     const std::vector<index_t>& offset,
+                     index_t m = 1, index_t b = 0)
         : local_length(localLength*m),
-          numNeighbors(nNeighbours),
-          mpi_info(mpiInfo)
+          neighbour(neighbours),
+          offsetInShared(offset)
     {
-        neighbor = new Esys_MPI_rank[numNeighbors];
-        if (!offset) {
+        if (offset.empty()) {
             numSharedComponents = 0;
         } else {
-            numSharedComponents = offset[nNeighbours] * m;
+            numSharedComponents = offset[neighbours.size()] * m;
         }
         shared = new index_t[numSharedComponents];
-        offsetInShared = new index_t[numNeighbors+1];
-        if (numNeighbors > 0 && offset != NULL) {
-#pragma omp parallel
-            {
-#pragma omp for
-                for (dim_t i=0; i < numNeighbors; i++) {
-                    neighbor[i] = neighbours[i];
-                    offsetInShared[i] = offset[i] * m;
-                }
-                offsetInShared[numNeighbors] = offset[nNeighbours] * m;
-#pragma omp for
-                for (dim_t i=0; i<offset[nNeighbours]; i++) {
-                    const index_t itmp=m*sharedArray[i]+b;
-                    for (dim_t j=0; j < m; ++j)
-                        shared[m*i+j]=itmp+j;
+        if (!neighbours.empty() && !offset.empty()) {
+            if (m != 1) {
+                for (int i = 0; i < offsetInShared.size(); i++) {
+                    offsetInShared[i] *= m;
                 }
             }
+#pragma omp parallel for
+            for (dim_t i = 0; i < offset[neighbours.size()]; i++) {
+                const index_t itmp = m * sharedArray[i] + b;
+                for (dim_t j = 0; j < m; ++j)
+                    shared[m*i+j] = itmp+j;
+            }
         } else {
-            offsetInShared[numNeighbors]=0;
+            offsetInShared[neighbours.size()] = 0;
         }
     }
 
     ~SharedComponents()
     {
-        delete[] neighbor;
         delete[] shared;
-        delete[] offsetInShared;
     }
 
     /// local array length shared
     dim_t local_length;
 
-    /// number of processors sharing values with this processor
-    dim_t numNeighbors;
+    /// list of the processors sharing values with this processor
+    std::vector<int> neighbour;
 
-    /// offsetInSharedInput[i] points to the first input value in array shared
+    /// offsetInShared[i] points to the first input value in array shared
     /// for processor i. Has length numNeighbors+1
-    index_t* offsetInShared;
-
-    /// list of the processors sharing values with this processor
-    Esys_MPI_rank* neighbor;
+    std::vector<index_t> offsetInShared;
 
     /// list of the (local) components which are shared with other processors.
     /// Has length numSharedComponents
     index_t* shared;
 
-    /// = offsetInShared[numNeighbors]
+    /// = offsetInShared[numNeighbours]
     dim_t numSharedComponents;
-
-    const esysUtils::JMPI mpi_info;
 };
 
 } // namespace paso
diff --git a/paso/src/Smoother.cpp b/paso/src/Smoother.cpp
index 2153e1b..68e6934 100644
--- a/paso/src/Smoother.cpp
+++ b/paso/src/Smoother.cpp
@@ -26,8 +26,8 @@
 /****************************************************************************/
 
 #include "Preconditioner.h"
-#include "PasoUtil.h"
 #include "BlockOps.h"
+#include "PasoUtil.h"
 
 namespace paso {
 
@@ -58,12 +58,7 @@ Preconditioner_Smoother* Preconditioner_Smoother_alloc(SystemMatrix_ptr A,
     out->localSmoother=Preconditioner_LocalSmoother_alloc(A->mainBlock,
                                                 jacobi, verbose);
     out->is_local=is_local;
-    if (Esys_MPIInfo_noError(A->mpi_info)) {
-        return out;
-    } else {
-        Preconditioner_Smoother_free(out);
-        return NULL;
-    }
+    return out;
 }
 
 Preconditioner_LocalSmoother* Preconditioner_LocalSmoother_alloc(
@@ -72,7 +67,7 @@ Preconditioner_LocalSmoother* Preconditioner_LocalSmoother_alloc(
     const dim_t n=A->numRows;
     const dim_t n_block=A->row_block_size;
     const dim_t block_size=A->block_size;
-    double time0=Esys_timer();
+    double time0=escript::gettime();
     Preconditioner_LocalSmoother* out=new Preconditioner_LocalSmoother;
 
     out->diag=new double[((size_t) n) * ((size_t) block_size)];
@@ -80,14 +75,8 @@ Preconditioner_LocalSmoother* Preconditioner_LocalSmoother_alloc(
     out->buffer=new double[((size_t) n) * ((size_t)  n_block)];
     out->Jacobi=jacobi;
     A->invMain(out->diag, out->pivot);
-    time0=Esys_timer()-time0;
-
-    if (Esys_noError()) {
-        return out;
-    } else {
-        Preconditioner_LocalSmoother_free(out);
-        return NULL;
-    }
+    time0=escript::gettime()-time0;
+    return out;
 }
 
 /*
@@ -126,7 +115,8 @@ void Preconditioner_Smoother_solve(SystemMatrix_ptr A,
         }
         while (nsweeps > 0 ) {
             util::copy(n, b_new, b);
-            SystemMatrix_MatrixVector_CSR_OFFSET0((-1.), A, x, 1., b_new); /* b_new = b - A*x */
+            SparseMatrix_MatrixVector_CSR_OFFSET0(-1., A->mainBlock, x, 1., b_new); /* b_new = b - A*x */
+            //A->MatrixVector_CSR_OFFSET0(-1., x, 1., b_new); /* b_new = b - A*x */
             Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,b_new);
             util::AXPY(n, x, 1., b_new);
             nsweeps--;
@@ -134,16 +124,16 @@ void Preconditioner_Smoother_solve(SystemMatrix_ptr A,
     }
 }
 
-err_t Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
-        Preconditioner_Smoother* smoother, double* x, const double* b,
-        double atol, dim_t* sweeps, bool x_is_initial)
+SolverResult Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
+            Preconditioner_Smoother* smoother, double* x, const double* b,
+            double atol, dim_t* sweeps, bool x_is_initial)
 {
    const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
    double *b_new = smoother->localSmoother->buffer;
    const dim_t max_sweeps=*sweeps;
    dim_t s=0;
    double norm_dx = atol * 2.;
-   err_t errorCode = PRECONDITIONER_NO_ERROR;
+   SolverResult errorCode = NoError;
 
    if (! x_is_initial) {
         util::copy(n, x, b);
@@ -153,13 +143,14 @@ err_t Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
    }
    while (norm_dx > atol) {
         util::copy(n, b_new, b);
-        SystemMatrix_MatrixVector((-1.), A, x, 1., b_new); /* b_new = b - A*x */
+        SparseMatrix_MatrixVector_CSR_OFFSET0(-1., A->mainBlock, x, 1., b_new); /* b_new = b - A*x */
+        //A->MatrixVector(-1., x, 1., b_new); /* b_new = b - A*x */
         Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,b_new);
         norm_dx=util::lsup(n,b_new,A->mpi_info);
         util::AXPY(n, x, 1., b_new);
         if (s >= max_sweeps) {
-              errorCode = PRECONDITIONER_MAXITER_REACHED;
-              break;
+            errorCode = MaxIterReached;
+            break;
         }
         s++;
    }
@@ -185,10 +176,10 @@ void Preconditioner_LocalSmoother_solve(SparseMatrix_ptr A,
    while (nsweeps > 0 ) {
        util::copy(n, b_new, b);
 
-     SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A, x, 1., b_new); /* b_new = b - A*x */
-         Preconditioner_LocalSmoother_Sweep(A, smoother, b_new);
-         util::AXPY(n, x, 1., b_new);
-         nsweeps--;
+        SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A, x, 1., b_new); /* b_new = b - A*x */
+        Preconditioner_LocalSmoother_Sweep(A, smoother, b_new);
+        util::AXPY(n, x, 1., b_new);
+        nsweeps--;
    }
 }
 
@@ -210,7 +201,11 @@ void Preconditioner_LocalSmoother_solve(SparseMatrix_ptr A,
 void Preconditioner_LocalSmoother_Sweep(SparseMatrix_ptr A,
         Preconditioner_LocalSmoother* smoother, double* x)
 {
+#ifdef _OPENMP
     const dim_t nt=omp_get_max_threads();
+#else
+    const dim_t nt=1;
+#endif
     if (smoother->Jacobi) {
         BlockOps_solveAll(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
     } else {
@@ -227,13 +222,16 @@ void Preconditioner_LocalSmoother_Sweep_sequential(SparseMatrix_ptr A,
         Preconditioner_LocalSmoother* smoother, double* x)
 {
     const dim_t n=A->numRows;
+    if (n==0)
+        return;
+
     const dim_t n_block=A->row_block_size;
     double *diag = smoother->diag;
     index_t* pivot = smoother->pivot;
     const dim_t block_len=A->block_size;
-    register dim_t i,k;
-    register index_t iptr_ik, mm;
-    register double rtmp;
+    dim_t i,k;
+    index_t iptr_ik, mm;
+    double rtmp;
     int failed = 0;
     const index_t* ptr_main = A->borrowMainDiagonalPointer();
 
@@ -333,7 +331,7 @@ void Preconditioner_LocalSmoother_Sweep_sequential(SparseMatrix_ptr A,
     }
 
     if (failed > 0) {
-        Esys_setError(ZERO_DIVISION_ERROR, "Preconditioner_LocalSmoother_Sweep_sequential: non-regular main diagonal block.");
+        throw PasoException("Preconditioner_LocalSmoother_Sweep_sequential: non-regular main diagonal block.");
     }
 }
 
@@ -347,9 +345,9 @@ void Preconditioner_LocalSmoother_Sweep_colored(SparseMatrix_ptr A,
     const dim_t block_len=A->block_size;
     double *y;
 
-    register dim_t i,k;
-    register index_t color,iptr_ik, mm;
-    register double rtmp;
+    dim_t i,k;
+    index_t color,iptr_ik, mm;
+    double rtmp;
     int failed = 0;
 
     const index_t* coloring = A->pattern->borrowColoringPointer();
@@ -503,7 +501,7 @@ void Preconditioner_LocalSmoother_Sweep_colored(SparseMatrix_ptr A,
         delete[] y;
     }
     if (failed > 0) {
-        Esys_setError(ZERO_DIVISION_ERROR, "Preconditioner_LocalSmoother_Sweep_colored: non-regular main diagonal block.");
+        throw PasoException("Preconditioner_LocalSmoother_Sweep_colored: non-regular main diagonal block.");
     }
 }
 
diff --git a/paso/src/Solver.cpp b/paso/src/Solver.cpp
index 6a83d47..bc8856b 100644
--- a/paso/src/Solver.cpp
+++ b/paso/src/Solver.cpp
@@ -24,15 +24,16 @@
 /* Author: Lutz Gross, l.gross at uq.edu.au                      */
 
 /****************************************************************************/
+
+#include "Solver.h"
+#include "Options.h"
+#include "SystemMatrix.h"
+
 #include <boost/math/special_functions/fpclassify.hpp>  // for isnan
 
 #include <iostream>
-#include "Paso.h"
-#include "SystemMatrix.h"
-#include "Solver.h"
-#include "esysUtils/blocktimer.h"
 
-namespace bm=boost::math;
+namespace bm = boost::math;
 
 namespace paso {
 
@@ -42,9 +43,10 @@ void Solver_free(SystemMatrix* A)
 }
 
 ///  calls the iterative solver
-void Solver(SystemMatrix_ptr A, double* x, double* b, Options* options,
-            Performance* pp)
+SolverResult Solver(SystemMatrix_ptr A, double* x, double* b, Options* options,
+                    Performance* pp)
 {
+    const real_t EPSILON = escript::DataTypes::real_t_eps();
     double norm2_of_b,tol,tolerance,time_iter,net_time_start;
     double *r=NULL,norm2_of_residual,last_norm2_of_residual,norm_max_of_b;
     double norm2_of_b_local,norm_max_of_b_local,norm2_of_residual_local;
@@ -55,43 +57,40 @@ void Solver(SystemMatrix_ptr A, double* x, double* b, Options* options,
 #endif
     dim_t i,totIter=0,cntIter,method;
     bool finalizeIteration;
-    err_t errorCode=SOLVER_NO_ERROR;
+    SolverResult errorCode = NoError;
     const dim_t numSol = A->getTotalNumCols();
     const dim_t numEqua = A->getTotalNumRows();
-    double blocktimer_precond, blocktimer_start = blocktimer_time();
     double *x0=NULL;
 
-    Esys_resetError();
     tolerance=options->tolerance;
     if (tolerance < 100.* EPSILON) {
-        Esys_setError(VALUE_ERROR,"Solver: Tolerance is too small.");
+        throw PasoException("Solver: Tolerance is too small.");
     }
     if (tolerance >1.) {
-        Esys_setError(VALUE_ERROR,"Solver: Tolerance must be less than one.");
+        throw PasoException("Solver: Tolerance must be less than one.");
     }
     method=Options::getSolver(options->method, PASO_PASO, options->symmetric, A->mpi_info);
     /* check matrix type */
     if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1) ) {
-        Esys_setError(TYPE_ERROR,"Solver: Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0.");
+        throw PasoException("Solver: Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0.");
     }
     if (A->col_block_size != A->row_block_size) {
-        Esys_setError(TYPE_ERROR,"Solver: Iterative solver requires row and column block sizes to be equal.");
+        throw PasoException("Solver: Iterative solver requires row and column block sizes to be equal.");
     }
     if (A->getGlobalNumCols() != A->getGlobalNumRows()) {
-        Esys_setError(TYPE_ERROR,"Solver: Iterative solver requires a square matrix.");
-        return;
+        throw PasoException("Solver: Iterative solver requires a square matrix.");
     }
-    time_iter=Esys_timer();
+    time_iter=escript::gettime();
     /* this for testing only */
     if (method==PASO_NONLINEAR_GMRES) {
         LinearSystem* F = new LinearSystem(A, b, options);
         A->solvePreconditioner(x, b);
         errorCode = Solver_NewtonGMRES(F, x, options, pp);
-        if (errorCode != NO_ERROR) {
-            Esys_setError(SYSTEM_ERROR,"Solver_NewtonGMRES: an error has occurred.");
+        if (errorCode != NoError) {
+            throw PasoException("Solver_NewtonGMRES: an error has occurred.");
         }
         delete F;
-        return;
+        return errorCode;
     }
 
     r = new double[numEqua];
@@ -101,241 +100,236 @@ void Solver(SystemMatrix_ptr A, double* x, double* b, Options* options,
     options->num_inner_iter=0;
 
     /* ========================= */
-    if (Esys_noError()) {
-        Performance_startMonitor(pp, PERFORMANCE_ALL);
-        A->applyBalance(r, b, true);
-        /* get the norm of the right hand side */
-        norm2_of_b=0.;
-        norm_max_of_b=0.;
-        #pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local)
+    Performance_startMonitor(pp, PERFORMANCE_ALL);
+    A->applyBalance(r, b, true);
+    /* get the norm of the right hand side */
+    norm2_of_b=0.;
+    norm_max_of_b=0.;
+    #pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local)
+    {
+        norm2_of_b_local=0.;
+        norm_max_of_b_local=0.;
+        #pragma omp for private(i) schedule(static)
+        for (i = 0; i < numEqua ; ++i) {
+            norm2_of_b_local += r[i] * r[i];
+            norm_max_of_b_local = std::max(std::abs(r[i]),norm_max_of_b_local);
+        }
+        #pragma omp critical
         {
-            norm2_of_b_local=0.;
-            norm_max_of_b_local=0.;
-            #pragma omp for private(i) schedule(static)
-            for (i = 0; i < numEqua ; ++i) {
-                norm2_of_b_local += r[i] * r[i];
-                norm_max_of_b_local = MAX(ABS(r[i]),norm_max_of_b_local);
-            }
-            #pragma omp critical
-            {
-                norm2_of_b += norm2_of_b_local;
-                norm_max_of_b = MAX(norm_max_of_b_local,norm_max_of_b);
-            }
+            norm2_of_b += norm2_of_b_local;
+            norm_max_of_b = std::max(norm_max_of_b_local,norm_max_of_b);
         }
+    }
 #ifdef ESYS_MPI
-        /* TODO: use one call */
-        loc_norm = norm2_of_b;
-        MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
-        loc_norm = norm_max_of_b;
-        MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
+    /* TODO: use one call */
+    loc_norm = norm2_of_b;
+    MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
+    loc_norm = norm_max_of_b;
+    MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
 #endif
-        norm2_of_b=sqrt(norm2_of_b);
-        /* if norm2_of_b==0 we are ready: x=0 */
-        if (bm::isnan(norm2_of_b) || bm::isnan(norm_max_of_b)) {
-            Esys_setError(VALUE_ERROR, "Solver: Matrix or right hand side contains undefined values.");
-        } else if (norm2_of_b <= 0.) {
+    norm2_of_b=sqrt(norm2_of_b);
+    /* if norm2_of_b==0 we are ready: x=0 */
+    if (bm::isnan(norm2_of_b) || bm::isnan(norm_max_of_b)) {
+        throw PasoException("Solver: Matrix or right hand side contains undefined values.");
+    } else if (norm2_of_b <= 0.) {
 #pragma omp parallel for private(i) schedule(static)
-            for (i = 0; i < numSol; i++) x[i]=0.;
-            if (options->verbose)
-                std::cout << "right hand side is identical to zero." << std::endl;
-        } else {
-            if (options->verbose) {
-                std::cout << "Solver: l2/lmax-norm of right hand side is "
-                    << norm2_of_b << "/" << norm_max_of_b << "." << std::endl
-                    << "Solver: l2/lmax-stopping criterion is "
-                    << norm2_of_b*tolerance << "/" << norm_max_of_b*tolerance
-                    << "." << std::endl;
-                switch (method) {
-                    case PASO_BICGSTAB:
-                        std::cout << "Solver: Iterative method is BiCGStab.\n";
-                    break;
-                    case PASO_PCG:
-                        std::cout << "Solver: Iterative method is PCG.\n";
-                    break;
-                    case PASO_TFQMR:
-                        std::cout << "Solver: Iterative method is TFQMR.\n";
-                    break;
-                    case PASO_MINRES:
-                        std::cout << "Solver: Iterative method is MINRES.\n";
-                    break;
-                    case PASO_PRES20:
-                        std::cout << "Solver: Iterative method is PRES20.\n";
-                    break;
-                    case PASO_GMRES:
-                        if (options->restart > 0) {
-                            std::cout << "Solver: Iterative method is GMRES("
-                                << options->truncation << ","
-                                << options->restart << ")." << std::endl;
-                        } else {
-                            std::cout << "Solver: Iterative method is GMRES("
-                                << options->truncation << ")." << std::endl;
-                        }
-                    break;
-                }
+        for (i = 0; i < numSol; i++) x[i]=0.;
+        if (options->verbose)
+            std::cout << "right hand side is identical to zero." << std::endl;
+    } else {
+        if (options->verbose) {
+            std::cout << "Solver: l2/lmax-norm of right hand side is "
+                << norm2_of_b << "/" << norm_max_of_b << "." << std::endl
+                << "Solver: l2/lmax-stopping criterion is "
+                << norm2_of_b*tolerance << "/" << norm_max_of_b*tolerance
+                << "." << std::endl;
+            switch (method) {
+                case PASO_BICGSTAB:
+                    std::cout << "Solver: Iterative method is BiCGStab.\n";
+                break;
+                case PASO_PCG:
+                    std::cout << "Solver: Iterative method is PCG.\n";
+                break;
+                case PASO_TFQMR:
+                    std::cout << "Solver: Iterative method is TFQMR.\n";
+                break;
+                case PASO_MINRES:
+                    std::cout << "Solver: Iterative method is MINRES.\n";
+                break;
+                case PASO_PRES20:
+                    std::cout << "Solver: Iterative method is PRES20.\n";
+                break;
+                case PASO_GMRES:
+                    if (options->restart > 0) {
+                        std::cout << "Solver: Iterative method is GMRES("
+                            << options->truncation << ","
+                            << options->restart << ")." << std::endl;
+                    } else {
+                        std::cout << "Solver: Iterative method is GMRES("
+                            << options->truncation << ")." << std::endl;
+                    }
+                break;
             }
+        }
 
-            // construct the preconditioner
-            blocktimer_precond = blocktimer_time();
-            Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
-            A->setPreconditioner(options);
-            Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
-            blocktimer_increment("Solver_setPreconditioner()", blocktimer_precond);
-            options->set_up_time=Esys_timer()-time_iter;
-            if (Esys_noError()) {
-                // get an initial guess by evaluating the preconditioner
-                A->solvePreconditioner(x, r);
-
-                totIter = 1;
-                finalizeIteration = false;
-                last_norm2_of_residual=norm2_of_b;
-                last_norm_max_of_residual=norm_max_of_b;
-                net_time_start=Esys_timer();
-
-                // main loop
-                while (!finalizeIteration) {
-                    cntIter = options->iter_max - totIter;
-                    finalizeIteration = true;
-
-                    // Set initial residual
-                    if (totIter > 1) {
-                        // in the first iteration r = balance * b already
-                        A->applyBalance(r, b, true);
-                    }
+        // construct the preconditioner
+        Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
+        A->setPreconditioner(options);
+        Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
+        options->set_up_time=escript::gettime()-time_iter;
+        // get an initial guess by evaluating the preconditioner
+        A->solvePreconditioner(x, r);
 
-                    SystemMatrix_MatrixVector_CSR_OFFSET0(-1., A, x, 1., r);
-                    norm2_of_residual = 0;
-                    norm_max_of_residual = 0;
-                    #pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local)
-                    {
-                        norm2_of_residual_local = 0;
-                        norm_max_of_residual_local = 0;
-                        #pragma omp for private(i) schedule(static)
-                        for (i = 0; i < numEqua; i++) {
-                            norm2_of_residual_local+= r[i] * r[i];
-                            norm_max_of_residual_local=MAX(ABS(r[i]),norm_max_of_residual_local);
-                        }
-                        #pragma omp critical
-                        {
-                            norm2_of_residual += norm2_of_residual_local;
-                            norm_max_of_residual = MAX(norm_max_of_residual_local,norm_max_of_residual);
-                        }
-                    }
+        totIter = 1;
+        finalizeIteration = false;
+        last_norm2_of_residual=norm2_of_b;
+        last_norm_max_of_residual=norm_max_of_b;
+        net_time_start=escript::gettime();
+
+        // main loop
+        while (!finalizeIteration) {
+            cntIter = options->iter_max - totIter;
+            finalizeIteration = true;
+
+            // Set initial residual
+            if (totIter > 1) {
+                // in the first iteration r = balance * b already
+                A->applyBalance(r, b, true);
+            }
+
+            A->MatrixVector_CSR_OFFSET0(-1., x, 1., r);
+            norm2_of_residual = 0;
+            norm_max_of_residual = 0;
+            #pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local)
+            {
+                norm2_of_residual_local = 0;
+                norm_max_of_residual_local = 0;
+                #pragma omp for private(i) schedule(static)
+                for (i = 0; i < numEqua; i++) {
+                    norm2_of_residual_local+= r[i] * r[i];
+                    norm_max_of_residual_local=std::max(std::abs(r[i]),norm_max_of_residual_local);
+                }
+                #pragma omp critical
+                {
+                    norm2_of_residual += norm2_of_residual_local;
+                    norm_max_of_residual = std::max(norm_max_of_residual_local,norm_max_of_residual);
+                }
+            }
 #ifdef ESYS_MPI
-                    // TODO: use one call
-                    loc_norm = norm2_of_residual;
-                    MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
-                    loc_norm = norm_max_of_residual;
-                    MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
+            // TODO: use one call
+            loc_norm = norm2_of_residual;
+            MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
+            loc_norm = norm_max_of_residual;
+            MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
 #endif
-                    norm2_of_residual =sqrt(norm2_of_residual);
-                    options->residual_norm=norm2_of_residual;
+            norm2_of_residual =sqrt(norm2_of_residual);
+            options->residual_norm=norm2_of_residual;
 
-                    if (options->verbose)
-                        std::cout << "Solver: Step " << totIter
-                            << ": l2/lmax-norm of residual is "
-                            << norm2_of_residual << "/" << norm_max_of_residual;
+            if (options->verbose)
+                std::cout << "Solver: Step " << totIter
+                    << ": l2/lmax-norm of residual is "
+                    << norm2_of_residual << "/" << norm_max_of_residual;
 
-                    if (totIter > 1 &&
-                            norm2_of_residual >= last_norm2_of_residual &&
-                            norm_max_of_residual >= last_norm_max_of_residual) {
+            if (totIter > 1 &&
+                    norm2_of_residual >= last_norm2_of_residual &&
+                    norm_max_of_residual >= last_norm_max_of_residual) {
 
-                        if (options->verbose) std::cout << " divergence!\n";
-                        Esys_setError(DIVERGED, "Solver: No improvement during iteration. Iterative solver gives up.");
+                if (options->verbose) std::cout << " divergence!\n";
+                throw PasoException("Solver: No improvement during iteration. Iterative solver gives up.");
 
-                    } else {
-                        if (norm2_of_residual>tolerance*norm2_of_b ||
-                                norm_max_of_residual>tolerance*norm_max_of_b ) {
+            } else {
+                if (norm2_of_residual>tolerance*norm2_of_b ||
+                        norm_max_of_residual>tolerance*norm_max_of_b ) {
 
-                            tol=tolerance*MIN(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b);
-                            if (options->verbose)
-                                std::cout << " (new tolerance = " << tol << ").\n";
-
-                            last_norm2_of_residual=norm2_of_residual;
-                            last_norm_max_of_residual=norm_max_of_residual;
-
-                            // call the solver
-                            switch (method) {
-                                case PASO_BICGSTAB:
-                                    errorCode = Solver_BiCGStab(A, r, x, &cntIter, &tol, pp);
-                                break;
-                                case PASO_PCG:
-                                    errorCode = Solver_PCG(A, r, x, &cntIter, &tol, pp);
-                                break;
-                                case PASO_TFQMR:
-                                    tol=tolerance*norm2_of_residual/norm2_of_b;
-                                    errorCode = Solver_TFQMR(A, r, x0, &cntIter, &tol, pp);
-                                    #pragma omp for private(i) schedule(static)
-                                    for (i = 0; i < numEqua; i++) {
-                                        x[i]+= x0[i];
-                                    }
-                                break;
-                                case PASO_MINRES:
-                                    //tol=tolerance*norm2_of_residual/norm2_of_b;
-                                    errorCode = Solver_MINRES(A, r, x, &cntIter, &tol, pp);
-                                break;
-                                case PASO_PRES20:
-                                    errorCode = Solver_GMRES(A, r, x, &cntIter, &tol, 5, 20, pp);
-                                break;
-                                case PASO_GMRES:
-                                    errorCode = Solver_GMRES(A, r, x, &cntIter, &tol, options->truncation, options->restart, pp);
-                                break;
-                            }
+                    tol=tolerance*std::min(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b);
+                    if (options->verbose)
+                        std::cout << " (new tolerance = " << tol << ").\n";
+
+                    last_norm2_of_residual=norm2_of_residual;
+                    last_norm_max_of_residual=norm_max_of_residual;
 
-                            totIter += cntIter;
-
-                            // error handling
-                            if (errorCode == SOLVER_NO_ERROR) {
-                                finalizeIteration = false;
-                            } else if (errorCode == SOLVER_MAXITER_REACHED) {
-                                Esys_setError(DIVERGED, "Solver: maximum number of iteration steps reached.\nReturned solution does not fulfil stopping criterion.");
-                                if (options->verbose)
-                                    std::cout << "Solver: Maximum number of "
-                                        "iterations reached." << std::endl;
-                            } else if (errorCode == SOLVER_INPUT_ERROR) {
-                                Esys_setError(SYSTEM_ERROR, "Solver: illegal dimension in iterative solver.");
-                                if (options->verbose)
-                                    std::cout << "Solver: Internal error!\n";
-                            } else if (errorCode == SOLVER_NEGATIVE_NORM_ERROR) {
-                                Esys_setError(VALUE_ERROR, "Solver: negative energy norm (try other solver or preconditioner).");
-                                if (options->verbose)
-                                    std::cout << "Solver: negative energy norm"
-                                       " (try other solver or preconditioner)!\n";
-                            } else if (errorCode == SOLVER_BREAKDOWN) {
-                                if (cntIter <= 1) {
-                                    Esys_setError(ZERO_DIVISION_ERROR, "Solver: fatal break down in iterative solver.");
-                                    if (options->verbose)
-                                        std::cout << "Solver: Uncurable break "
-                                            "down!" << std::endl;
-                                } else {
-                                    if (options->verbose)
-                                        std::cout << "Solver: Breakdown at iter "
-                                            << totIter << " (residual = "
-                                            << tol << "). Restarting ...\n";
-                                    finalizeIteration = false;
-                                }
-                            } else {
-                                Esys_setError(SYSTEM_ERROR, "Solver: Generic error in solver.");
-                                if (options->verbose)
-                                    std::cout << "Solver: Generic error in solver!\n";
+                    // call the solver
+                    switch (method) {
+                        case PASO_BICGSTAB:
+                            errorCode = Solver_BiCGStab(A, r, x, &cntIter, &tol, pp);
+                        break;
+                        case PASO_PCG:
+                            errorCode = Solver_PCG(A, r, x, &cntIter, &tol, pp);
+                        break;
+                        case PASO_TFQMR:
+                            tol=tolerance*norm2_of_residual/norm2_of_b;
+                            errorCode = Solver_TFQMR(A, r, x0, &cntIter, &tol, pp);
+                            #pragma omp for private(i) schedule(static)
+                            for (i = 0; i < numEqua; i++) {
+                                x[i]+= x0[i];
                             }
+                        break;
+                        case PASO_MINRES:
+                            //tol=tolerance*norm2_of_residual/norm2_of_b;
+                            errorCode = Solver_MINRES(A, r, x, &cntIter, &tol, pp);
+                        break;
+                        case PASO_PRES20:
+                            errorCode = Solver_GMRES(A, r, x, &cntIter, &tol, 5, 20, pp);
+                        break;
+                        case PASO_GMRES:
+                            errorCode = Solver_GMRES(A, r, x, &cntIter, &tol, options->truncation, options->restart, pp);
+                        break;
+                    }
+
+                    totIter += cntIter;
+
+                    // error handling
+                    if (errorCode == NoError) {
+                        finalizeIteration = false;
+                    } else if (errorCode == MaxIterReached) {
+                        if (options->verbose)
+                            std::cout << "Solver: Maximum number of "
+                                "iterations reached." << std::endl;
+                        break;
+                    } else if (errorCode == InputError) {
+                        if (options->verbose)
+                            std::cout << "Solver: Internal error!\n";
+                        break;
+                    } else if (errorCode == NegativeNormError) {
+                        if (options->verbose)
+                            std::cout << "Solver: negative energy norm"
+                               " (try other solver or preconditioner)!\n";
+                        break;
+                    } else if (errorCode == Breakdown) {
+                        if (cntIter <= 1) {
+                            if (options->verbose)
+                                std::cout << "Solver: Uncurable break "
+                                    "down!" << std::endl;
+                            break;
                         } else {
                             if (options->verbose)
-                                std::cout << " convergence!" << std::endl;
-                            options->converged = true;
+                                std::cout << "Solver: Breakdown at iter "
+                                    << totIter << " (residual = "
+                                    << tol << "). Restarting ...\n";
+                            finalizeIteration = false;
+                            errorCode = NoError;
                         }
+                    } else {
+                        if (options->verbose)
+                            std::cout << "Solver: Generic error in solver!\n";
+                        break;
                     }
-                } // while
-                options->net_time = Esys_timer()-net_time_start;
+                } else {
+                    if (options->verbose)
+                        std::cout << " convergence!" << std::endl;
+                    options->converged = true;
+                }
             }
-            options->num_iter = totIter;
-            A->applyBalanceInPlace(x, false);
-        }
+        } // while
+        options->net_time = escript::gettime()-net_time_start;
+        options->num_iter = totIter;
+        A->applyBalanceInPlace(x, false);
     }
     delete[] r;
     delete[] x0;
-    options->time = Esys_timer()-time_iter;
+    options->time = escript::gettime()-time_iter;
     Performance_stopMonitor(pp, PERFORMANCE_ALL);
-    blocktimer_increment("Solver()", blocktimer_start);
+    return errorCode;
 }
 
 } // namespace paso
diff --git a/paso/src/Solver.h b/paso/src/Solver.h
index 858b7f1..3b071c7 100644
--- a/paso/src/Solver.h
+++ b/paso/src/Solver.h
@@ -18,54 +18,44 @@
 #ifndef __PASO_SOLVER_H__
 #define __PASO_SOLVER_H__
 
-#include "SystemMatrix.h"
-#include "performance.h"
+#include "Paso.h"
 #include "Functions.h"
+#include "performance.h"
+#include "SystemMatrix.h"
 
 namespace paso {
 
-// error codes used in the solver
-#define SOLVER_NO_ERROR 0
-#define SOLVER_MAXITER_REACHED 1
-#define SOLVER_INPUT_ERROR -1
-#define SOLVER_MEMORY_ERROR -9
-#define SOLVER_BREAKDOWN -10
-#define SOLVER_NEGATIVE_NORM_ERROR -11
-#define SOLVER_DIVERGENCE -12
-
 #define TOLERANCE_FOR_SCALARS (double)(0.)
 
-void solve(SystemMatrix_ptr A, double* out, double* in, Options* options);
-
 void solve_free(SystemMatrix* A);
 
-PASO_DLL_API
-void Solver(SystemMatrix_ptr, double*, double*, Options*, Performance*);
+SolverResult Solver(SystemMatrix_ptr, double*, double*, Options*, Performance*);
 
-PASO_DLL_API
 void Solver_free(SystemMatrix*);
 
-err_t Solver_BiCGStab(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
-                      double* tolerance, Performance* pp);
+SolverResult Solver_BiCGStab(SystemMatrix_ptr A, double* B, double* X,
+                             dim_t* iter, double* tolerance, Performance* pp);
 
-err_t Solver_PCG(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
-                 double* tolerance, Performance* pp);
+SolverResult Solver_PCG(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
+                        double* tolerance, Performance* pp);
 
-err_t Solver_TFQMR(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
-                   double* tolerance, Performance* pp);
+SolverResult Solver_TFQMR(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
+                          double* tolerance, Performance* pp);
 
-err_t Solver_MINRES(SystemMatrix_ptr A, double* B, double* X, dim_t* iter,
-                    double* tolerance, Performance* pp);
+SolverResult Solver_MINRES(SystemMatrix_ptr A, double* B, double* X,
+                           dim_t* iter, double* tolerance, Performance* pp);
 
-err_t Solver_GMRES(SystemMatrix_ptr A, double* r, double* x, dim_t* num_iter,
-                   double* tolerance, dim_t length_of_recursion, dim_t restart,
-                   Performance* pp);
+SolverResult Solver_GMRES(SystemMatrix_ptr A, double* r, double* x,
+                          dim_t* num_iter, double* tolerance,
+                          dim_t length_of_recursion, dim_t restart,
+                          Performance* pp);
 
-err_t Solver_GMRES2(Function* F, const double* f0, const double* x0, double* x,
-                    dim_t* iter, double* tolerance, Performance* pp);
+SolverResult Solver_GMRES2(Function* F, const double* f0, const double* x0,
+                           double* x, dim_t* iter, double* tolerance,
+                           Performance* pp);
 
-err_t Solver_NewtonGMRES(Function* F, double* x, Options* options,
-                         Performance* pp);
+SolverResult Solver_NewtonGMRES(Function* F, double* x, Options* options,
+                                Performance* pp);
 
 } // namespace paso
 
diff --git a/paso/src/Solver_Function.cpp b/paso/src/Solver_Function.cpp
index dc2a3ec..2838e2d 100644
--- a/paso/src/Solver_Function.cpp
+++ b/paso/src/Solver_Function.cpp
@@ -41,15 +41,15 @@ LinearSystem::~LinearSystem()
 /*
  * evaluates value=P*(b-Ax)
  */
-err_t LinearSystem::call(double* value, const double* arg, Performance* pp)
+SolverResult LinearSystem::call(double* value, const double* arg, Performance* pp)
 {
     // tmp = b
     util::copy(n, tmp, b);
     // tmp = (A*arg-tmp)
-    SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, mat, arg, -PASO_ONE, tmp);
+    mat->MatrixVector_CSR_OFFSET0(PASO_ONE, arg, -PASO_ONE, tmp);
     // value = P*tmp
     mat->solvePreconditioner(value, tmp);
-    return NO_ERROR;
+    return NoError;
 }
 
 } // namespace paso
diff --git a/paso/src/Solver_applyBlockDiagonalMatrix.cpp.old b/paso/src/Solver_applyBlockDiagonalMatrix.cpp.old
deleted file mode 100644
index 8f883aa..0000000
--- a/paso/src/Solver_applyBlockDiagonalMatrix.cpp.old
+++ /dev/null
@@ -1,85 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/************************************************************************************/
-
-/* Paso: apply block diagonal matrix D: x=D*b                 */
-
-/* should be called within a parallel region                  */
-/* barrier synchronization should be performed to make sure   */
-/* that the input vector available                            */
-
-/************************************************************************************/
-
-/* Copyrights by ACcESS Australia 2003, 2004, 2005            */
-/* Author: Lutz Gross, l.gross at uq.edu.au                      */
-
-/************************************************************************************/
-
-#include "Paso.h"
-
-/************************************************************************************/
-
-
-void Solver_applyBlockDiagonalMatrix(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x,double* b) {
-     dim_t i;
-     register dim_t i3,i9;
-     register double b0,b1,b2,D00,D10,D20,D01,D11,D21,D02,D12,D22;
-
-     if (n_block==1) {
-         #pragma omp parallel for private(i) schedule(static)
-         for (i=0;i<n;++i) {
-            x[i]=D[i]*b[i];
-         }
-     } else if (n_block==2) {
-         #pragma omp parallel for private(i,b0,b1,D00,D10,D01,D11,i3,i9) schedule(static)
-         for (i=0;i<n;++i) {
-            i3=2*i;
-            i9=4*i;
-            b0=b[i3];
-            b1=b[i3+1];
-            D00=D[i9  ];
-            D10=D[i9+1];
-            D01=D[i9+2];
-            D11=D[i9+3];
-            x[i3  ]=D00*b0+D01*b1;
-            x[i3+1]=D10*b0+D11*b1;
-         }
-     } else if (n_block==3) {
-         #pragma omp parallel for private(i,b0,b1,b2,D00,D10,D20,D01,D11,D21,D02,D12,D22,i3,i9) schedule(static)
-         for (i=0;i<n;++i) {
-            i3=3*i;
-            i9=9*i;
-            b0=b[i3];
-            b1=b[i3+1];
-            b2=b[i3+2];
-            D00=D[i9  ];
-            D10=D[i9+1];
-            D20=D[i9+2];
-            D01=D[i9+3];
-            D11=D[i9+4];
-            D21=D[i9+5];
-            D02=D[i9+6];
-            D12=D[i9+7];
-            D22=D[i9+8];
-            x[i3  ]=D00*b0+D01*b1+D02*b2;
-            x[i3+1]=D10*b0+D11*b1+D12*b2;
-            x[i3+2]=D20*b0+D21*b1+D22*b2;
-         }
-     }
-     return;
-}
diff --git a/paso/src/SparseMatrix.cpp b/paso/src/SparseMatrix.cpp
index 47a36ce..70d5ff7 100644
--- a/paso/src/SparseMatrix.cpp
+++ b/paso/src/SparseMatrix.cpp
@@ -28,6 +28,8 @@
 #include "SparseMatrix.h"
 #include "BlockOps.h"
 #include "MKL.h"
+#include "Options.h"
+#include "PasoUtil.h"
 #include "Preconditioner.h"
 #include "UMFPACK.h"
 #include "mmio.h"
@@ -39,7 +41,7 @@
 
 namespace paso {
 
-using esysUtils::IndexList;
+using escript::IndexList;
 
 /* debug: print the entries */
 /*
@@ -111,18 +113,18 @@ SparseMatrix::SparseMatrix(SparseMatrixType ntype, Pattern_ptr npattern,
     solver_p(NULL)
 {
     if (patternIsUnrolled) {
-        if (!XNOR(ntype & MATRIX_FORMAT_OFFSET1, npattern->type & MATRIX_FORMAT_OFFSET1)) {
-            Esys_setError(TYPE_ERROR, "SparseMatrix: requested offset and pattern offset do not match.");
+        if ((ntype & MATRIX_FORMAT_OFFSET1) != (npattern->type & MATRIX_FORMAT_OFFSET1)) {
+            throw PasoException("SparseMatrix: requested offset and pattern offset do not match.");
         }
     }
     // do we need to apply unrolling?
     bool unroll
           // we don't like non-square blocks
         = (rowBlockSize != colBlockSize)
-#ifndef USE_LAPACK
+#ifndef ESYS_HAVE_LAPACK
           // or any block size bigger than 3
           || (colBlockSize > 3)
-# endif
+#endif
           // or if block size one requested and the block size is not 1
           || ((ntype & MATRIX_FORMAT_BLK1) && (colBlockSize > 1))
           // or if offsets don't match
@@ -147,10 +149,8 @@ SparseMatrix::SparseMatrix(SparseMatrixType ntype, Pattern_ptr npattern,
             row_block_size = rowBlockSize;
             col_block_size = colBlockSize;
         }
-        if (Esys_noError()) {
-            numRows = pattern->numInput;
-            numCols = pattern->numOutput;
-        }
+        numRows = pattern->numInput;
+        numCols = pattern->numOutput;
     } else {
     // === compressed sparse row ===
         if (unroll) {
@@ -167,22 +167,18 @@ SparseMatrix::SparseMatrix(SparseMatrixType ntype, Pattern_ptr npattern,
             row_block_size = rowBlockSize;
             col_block_size = colBlockSize;
         }
-        if (Esys_noError()) {
-            numRows = pattern->numOutput;
-            numCols = pattern->numInput;
-        }
+        numRows = pattern->numOutput;
+        numCols = pattern->numInput;
     }
-    if (Esys_noError()) {
-        if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-            block_size = MIN(row_block_size, col_block_size);
-        } else {
-            block_size = row_block_size*col_block_size;
-        }
-        len = (size_t)(pattern->len)*(size_t)(block_size);
-
-        val=new double[len];
-        setValues(0.);
+    if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+        block_size = std::min(row_block_size, col_block_size);
+    } else {
+        block_size = row_block_size*col_block_size;
     }
+    len = (size_t)(pattern->len)*(size_t)(block_size);
+
+    val=new double[len];
+    setValues(0.);
 }
 
 SparseMatrix::~SparseMatrix()
@@ -208,35 +204,29 @@ SparseMatrix_ptr SparseMatrix::loadMM_toCSR(const char* filename)
     SparseMatrix_ptr out;
     int i;
     MM_typecode matrixCode;
-    Esys_resetError();
 
     // open the file
     std::ifstream f(filename);
     if (f.fail()) {
-        Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Cannot open file for reading.");
-        return out;
+        throw PasoException("SparseMatrix::loadMM_toCSR: Cannot open file for reading.");
     }
 
     // process banner
     if (mm_read_banner(f, &matrixCode) != 0) {
-        Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Error processing MM banner.");
         f.close();
-        return out;
+        throw PasoException("SparseMatrix::loadMM_toCSR: Error processing MM banner.");
     }
     if (!(mm_is_real(matrixCode) && mm_is_sparse(matrixCode) && mm_is_general(matrixCode))) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::loadMM_toCSR: found Matrix Market type is not supported.");
         f.close();
-        return out;
+        throw PasoException("SparseMatrix::loadMM_toCSR: found Matrix Market type is not supported.");
     }
 
     // get matrix size
     int M, N, nz;
 
-    if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0)
-    {
-        Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Could not parse matrix size.");
+    if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) {
         f.close();
-        return out;
+        throw PasoException("SparseMatrix::loadMM_toCSR: Could not parse matrix size.");
     }
 
     // prepare storage
@@ -290,18 +280,16 @@ SparseMatrix_ptr SparseMatrix::loadMM_toCSR(const char* filename)
 void SparseMatrix::saveMM(const char* filename) const
 {
     if (col_block_size != row_block_size) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::saveMM: currently only square blocks are supported.");
-        return;
+        throw PasoException("SparseMatrix::saveMM: currently only square blocks are supported.");
     }
 
     // open the file
     std::ofstream f(filename);
     if (f.fail()) {
-        Esys_setError(IO_ERROR, "SparseMatrix::saveMM: File could not be opened for writing");
-        return;
+        throw PasoException("SparseMatrix::saveMM: File could not be opened for writing");
     }
     if (type & MATRIX_FORMAT_CSC) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::saveMM does not support CSC.");
+        throw PasoException("SparseMatrix::saveMM does not support CSC.");
     } else {
         MM_typecode matcode;
         mm_initialize_typecode(&matcode);
@@ -363,7 +351,7 @@ void SparseMatrix::addAbsRow_CSR_OFFSET0(double* array) const
             for (index_t iptr=pattern->ptr[ir]; iptr < pattern->ptr[ir+1]; iptr++) {
                 for (dim_t icb=0; icb < col_block_size; icb++) {
                     const index_t idx = iptr*block_size+irb+row_block_size*icb;
-                    fac += ABS(val[idx]);
+                    fac += std::abs(val[idx]);
                 }
             }
             array[irow]+=fac;
@@ -382,10 +370,10 @@ void SparseMatrix::maxAbsRow_CSR_OFFSET0(double* array) const
             for (index_t iptr=pattern->ptr[ir]; iptr < pattern->ptr[ir+1]; iptr++) {
                 for (dim_t icb=0; icb < col_block_size; icb++) {
                     const index_t idx = iptr*block_size+irb+row_block_size*icb;
-                    fac=MAX(fac, std::abs(val[idx]));
+                    fac=std::max(fac, std::abs(val[idx]));
                 }
             }
-            array[irow]=MAX(array[irow], fac);
+            array[irow]=std::max(array[irow], fac);
         }
     }
 }
@@ -436,7 +424,7 @@ void SparseMatrix::copyFromMainDiagonal(double* out) const
 {
     const dim_t n = pattern->numOutput;
     const dim_t nblk = block_size;
-    const dim_t blk = MIN(row_block_size, col_block_size);
+    const dim_t blk = std::min(row_block_size, col_block_size);
     const index_t* main_ptr = borrowMainDiagonalPointer();
 #pragma omp parallel for
     for (index_t ir=0; ir < n; ir++) {
@@ -450,7 +438,7 @@ void SparseMatrix::copyToMainDiagonal(const double* in)
 {
     const dim_t n = pattern->numOutput;
     const dim_t nblk = block_size;
-    const dim_t blk = MIN(row_block_size, col_block_size);
+    const dim_t blk = std::min(row_block_size, col_block_size);
     const index_t* main_ptr = borrowMainDiagonalPointer();
 #pragma omp parallel for
     for (index_t ir=0; ir < n; ir++) {
@@ -512,43 +500,41 @@ void SparseMatrix::invMain(double* inv_diag, index_t* pivot) const
     index_t* main_ptr=pattern->borrowMainDiagonalPointer();
     // check matrix is square
     if (m_block != n_block) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::invMain: square block size expected.");
+        throw PasoException("SparseMatrix::invMain: square block size expected.");
     }
-    if (Esys_noError()) {
-        if (n_block == 1) {
+    if (n_block == 1) {
 #pragma omp parallel for private(i, iPtr, A11) schedule(static)
-            for (i = 0; i < n; i++) {
-                iPtr = main_ptr[i];
-                A11 = val[iPtr];
-                if (ABS(A11) > 0.) {
-                    inv_diag[i]=1./A11;
-                } else {
-                    failed=1;
-                }
+        for (i = 0; i < n; i++) {
+            iPtr = main_ptr[i];
+            A11 = val[iPtr];
+            if (std::abs(A11) > 0.) {
+                inv_diag[i]=1./A11;
+            } else {
+                failed=1;
             }
-        } else if (n_block==2) {
+        }
+    } else if (n_block==2) {
 #pragma omp parallel for private(i, iPtr) schedule(static)
-            for (i = 0; i < n; i++) {
-                iPtr = main_ptr[i];
-                BlockOps_invM_2(&inv_diag[i*4], &val[iPtr*4], &failed);
-            }
-        } else if (n_block==3) {
+        for (i = 0; i < n; i++) {
+            iPtr = main_ptr[i];
+            BlockOps_invM_2(&inv_diag[i*4], &val[iPtr*4], &failed);
+        }
+    } else if (n_block==3) {
 #pragma omp parallel for private(i, iPtr) schedule(static)
-            for (i = 0; i < n; i++) {
-                iPtr = main_ptr[i];
-                BlockOps_invM_3(&inv_diag[i*9], &val[iPtr*9], &failed);
-            }
-        } else {
+        for (i = 0; i < n; i++) {
+            iPtr = main_ptr[i];
+            BlockOps_invM_3(&inv_diag[i*9], &val[iPtr*9], &failed);
+        }
+    } else {
 #pragma omp parallel for private(i, iPtr) schedule(static)
-            for (i = 0; i < n; i++) {
-                iPtr = main_ptr[i];
-                BlockOps_Cpy_N(block_size, &inv_diag[i*block_size], &val[iPtr*block_size]);
-                BlockOps_invM_N(n_block, &inv_diag[i*block_size], &pivot[i*n_block], &failed);
-            }
+        for (i = 0; i < n; i++) {
+            iPtr = main_ptr[i];
+            BlockOps_Cpy_N(block_size, &inv_diag[i*block_size], &val[iPtr*block_size]);
+            BlockOps_invM_N(n_block, &inv_diag[i*block_size], &pivot[i*n_block], &failed);
         }
     }
     if (failed > 0) {
-        Esys_setError(ZERO_DIVISION_ERROR, "SparseMatrix::invMain: non-regular main diagonal block.");
+        throw PasoException("SparseMatrix::invMain: non-regular main diagonal block.");
     }
 }
 
@@ -646,46 +632,44 @@ SparseMatrix_ptr SparseMatrix::unroll(SparseMatrixType newType) const
     const index_t A_offset = (type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
     const index_t out_offset = (out_type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
 
-    if (Esys_noError()) {
-        if (out->type & MATRIX_FORMAT_CSC) {
+    if (out->type & MATRIX_FORMAT_CSC) {
 #pragma omp parallel for
-            for (dim_t i=0; i<n; ++i) {
-                for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
-                    const index_t j = pattern->index[iptr]-A_offset;
-                    for (dim_t icb=0; icb<col_block_size; ++icb) {
-                        const index_t icol=j*col_block_size+icb;
-                        const index_t* start_p=&out->pattern->index[out->pattern->ptr[icol]-out_offset];
-                        const index_t l_col=out->pattern->ptr[icol+1]-out->pattern->ptr[icol];
-                        for (dim_t irb=0; irb<row_block_size; ++irb) {
-                            const index_t irow=row_block_size*i+irb+out_offset;
-                            const index_t* where_p = (index_t*)bsearch(&irow,
-                                        start_p, l_col, sizeof(index_t),
-                                        util::comparIndex);
-                            if (where_p != NULL)
-                                out->val[out->pattern->ptr[icol]-out_offset+(index_t)(where_p-start_p)] =
-                                    val[block_size*iptr+irb+row_block_size*icb];
-                        }
+        for (dim_t i=0; i<n; ++i) {
+            for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
+                const index_t j = pattern->index[iptr]-A_offset;
+                for (dim_t icb=0; icb<col_block_size; ++icb) {
+                    const index_t icol=j*col_block_size+icb;
+                    const index_t* start_p=&out->pattern->index[out->pattern->ptr[icol]-out_offset];
+                    const index_t l_col=out->pattern->ptr[icol+1]-out->pattern->ptr[icol];
+                    for (dim_t irb=0; irb<row_block_size; ++irb) {
+                        const index_t irow=row_block_size*i+irb+out_offset;
+                        const index_t* where_p = (index_t*)bsearch(&irow,
+                                    start_p, l_col, sizeof(index_t),
+                                    util::comparIndex);
+                        if (where_p != NULL)
+                            out->val[out->pattern->ptr[icol]-out_offset+(index_t)(where_p-start_p)] =
+                                val[block_size*iptr+irb+row_block_size*icb];
                     }
                 }
             }
-        } else {
+        }
+    } else {
 #pragma omp parallel for
-            for (dim_t i=0; i<n; ++i) {
-                for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
-                    const index_t j = pattern->index[iptr]-A_offset;
-                    for (dim_t irb=0; irb<row_block_size; ++irb) {
-                        const index_t irow=row_block_size*i+irb;
-                        const index_t* start_p = &out->pattern->index[out->pattern->ptr[irow]-out_offset];
-                        const index_t l_row=out->pattern->ptr[irow+1]-out->pattern->ptr[irow];
-                        for (dim_t icb=0; icb<col_block_size; ++icb) {
-                            const index_t icol=j*col_block_size+icb+out_offset;
-                            const index_t* where_p = (index_t*)bsearch(&icol,
-                                        start_p, l_row, sizeof(index_t),
-                                        util::comparIndex);
-                            if (where_p != NULL)
-                                out->val[out->pattern->ptr[irow]-out_offset+(index_t)(where_p-start_p)] =
-                                    val[block_size*iptr+irb+row_block_size*icb];
-                        }
+        for (dim_t i=0; i<n; ++i) {
+            for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
+                const index_t j = pattern->index[iptr]-A_offset;
+                for (dim_t irb=0; irb<row_block_size; ++irb) {
+                    const index_t irow=row_block_size*i+irb;
+                    const index_t* start_p = &out->pattern->index[out->pattern->ptr[irow]-out_offset];
+                    const index_t l_row=out->pattern->ptr[irow+1]-out->pattern->ptr[irow];
+                    for (dim_t icb=0; icb<col_block_size; ++icb) {
+                        const index_t icol=j*col_block_size+icb+out_offset;
+                        const index_t* where_p = (index_t*)bsearch(&icol,
+                                    start_p, l_row, sizeof(index_t),
+                                    util::comparIndex);
+                        if (where_p != NULL)
+                            out->val[out->pattern->ptr[irow]-out_offset+(index_t)(where_p-start_p)] =
+                                val[block_size*iptr+irb+row_block_size*icb];
                     }
                 }
             }
diff --git a/paso/src/SparseMatrix_MatrixMatrix.cpp b/paso/src/SparseMatrix_MatrixMatrix.cpp
index 5486282..4c458b1 100644
--- a/paso/src/SparseMatrix_MatrixMatrix.cpp
+++ b/paso/src/SparseMatrix_MatrixMatrix.cpp
@@ -26,6 +26,7 @@
 *****************************************************************************/
 
 #include "SparseMatrix.h"
+#include "PasoException.h"
 #include "PasoUtil.h" // comparIndex
 
 namespace paso {
@@ -48,22 +49,18 @@ SparseMatrix_ptr SparseMatrix_MatrixMatrix(const_SparseMatrix_ptr A,
     if (!( (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) ||
            (A->type & MATRIX_FORMAT_DEFAULT) ||
            (A->type & MATRIX_FORMAT_BLK1) )) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::MatrixMatrix: Unsupported matrix format of A.");
-        return out;
+        throw PasoException("SparseMatrix::MatrixMatrix: Unsupported matrix format of A.");
     }
     if (!( (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) ||
            (B->type & MATRIX_FORMAT_DEFAULT) ||
            (B->type & MATRIX_FORMAT_BLK1) )) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::MatrixMatrix: Unsupported matrix format of B.");
-        return out;
+        throw PasoException("SparseMatrix::MatrixMatrix: Unsupported matrix format of B.");
     }
     if (A->col_block_size != B->row_block_size) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::MatrixMatrix: Column block size of A and row block size of B must match.");
-        return out;
+        throw PasoException("SparseMatrix::MatrixMatrix: Column block size of A and row block size of B must match.");
     }
     if (A->numCols != B->numRows) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::MatrixMatrix: number of columns of A and number of rows of B must match.");
-        return out;
+        throw PasoException("SparseMatrix::MatrixMatrix: number of columns of A and number of rows of B must match.");
     }
 
     if ( (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) &&
@@ -75,30 +72,24 @@ SparseMatrix_ptr SparseMatrix_MatrixMatrix(const_SparseMatrix_ptr A,
 
     Pattern_ptr outpattern(A->pattern->multiply(MATRIX_FORMAT_DEFAULT, B->pattern));
 
-    if (Esys_noError()) {
-        out.reset(new SparseMatrix(C_type, outpattern, A->row_block_size, B->col_block_size, false));
-    }
+    out.reset(new SparseMatrix(C_type, outpattern, A->row_block_size, B->col_block_size, false));
 
-    if (Esys_noError()) {
-        if ( (A->row_block_size == 1) && (B->col_block_size ==1 ) && (A->col_block_size ==1) ) {
-            SparseMatrix_MatrixMatrix_DD(out, A, B);
+    if ( (A->row_block_size == 1) && (B->col_block_size ==1 ) && (A->col_block_size ==1) ) {
+        SparseMatrix_MatrixMatrix_DD(out, A, B);
+    } else {
+        if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+            if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+                SparseMatrix_MatrixMatrix_DD(out, A, B);
+            } else {
+                SparseMatrix_MatrixMatrix_DB(out, A, B);
+            }
         } else {
-            if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                    SparseMatrix_MatrixMatrix_DD(out, A, B);
-                } else {
-                    SparseMatrix_MatrixMatrix_DB(out, A, B);
-                }
+            if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+                SparseMatrix_MatrixMatrix_BD(out, A, B);
             } else {
-                if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                    SparseMatrix_MatrixMatrix_BD(out, A, B);
-                } else {
-                    SparseMatrix_MatrixMatrix_BB(out, A, B);
-                }
+                SparseMatrix_MatrixMatrix_BB(out, A, B);
             }
         }
-    } else {
-        out.reset();
     }
     return out;
 }
@@ -115,7 +106,7 @@ void SparseMatrix_MatrixMatrix_BB(SparseMatrix_ptr C, const_SparseMatrix_ptr A,
     const dim_t B_block_size =B->block_size;
     const dim_t A_block_size =A->block_size;
     double *C_ij, *A_ik, *B_kj;
-    register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+    double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
     dim_t i, ib, irb, icb;
     index_t ij_ptrC, j, ik_ptrA, k, kj_ptrB, *start_p, *where_p;
 
@@ -421,7 +412,7 @@ void SparseMatrix_MatrixMatrix_DB(SparseMatrix_ptr C, const_SparseMatrix_ptr A,
     const dim_t B_block_size =B->block_size;
     const dim_t A_block_size =A->block_size;
     double *C_ij, *A_ik, *B_kj;
-    register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+    double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
     dim_t i, ib, irb, icb;
     index_t ij_ptrC, j, ik_ptrA, k, kj_ptrB, *start_p, *where_p;
 
@@ -656,7 +647,7 @@ void SparseMatrix_MatrixMatrix_BD(SparseMatrix_ptr C, const_SparseMatrix_ptr A,
     const dim_t B_block_size =B->block_size;
     const dim_t A_block_size =A->block_size;
     double *C_ij, *A_ik, *B_kj;
-    register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+    double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
     dim_t i, ib, irb, icb;
     index_t ij_ptrC, j, ik_ptrA, k, kj_ptrB, *start_p, *where_p;
 
@@ -887,7 +878,7 @@ void SparseMatrix_MatrixMatrix_DD(SparseMatrix_ptr C, const_SparseMatrix_ptr A,
     const dim_t B_block_size =B->block_size;
     const dim_t A_block_size =A->block_size;
     double *C_ij, *A_ik, *B_kj;
-    register double C_ij_0, C_ij_1, C_ij_2, C_ij_3;
+    double C_ij_0, C_ij_1, C_ij_2, C_ij_3;
     dim_t i, ib;
     index_t ij_ptrC, j, ik_ptrA, k, kj_ptrB, *start_p, *where_p;
 
@@ -1051,7 +1042,7 @@ void SparseMatrix_MatrixMatrix_DD(SparseMatrix_ptr C, const_SparseMatrix_ptr A,
                        kj_ptrB += (index_t)(where_p-start_p);
                        A_ik=&(A->val[ik_ptrA*A_block_size]);
                        B_kj=&(B->val[kj_ptrB*B_block_size]);
-                       for (ib=0; ib<MIN(A_block_size, B_block_size); ++ib) C_ij[ib]+=A_ik[ib]*B_kj[ib];
+                       for (ib=0; ib<std::min(A_block_size, B_block_size); ++ib) C_ij[ib]+=A_ik[ib]*B_kj[ib];
                   }
                }
             }
diff --git a/paso/src/SparseMatrix_MatrixMatrixTranspose.cpp b/paso/src/SparseMatrix_MatrixMatrixTranspose.cpp
index 01aa1b3..d36bac8 100644
--- a/paso/src/SparseMatrix_MatrixMatrixTranspose.cpp
+++ b/paso/src/SparseMatrix_MatrixMatrixTranspose.cpp
@@ -27,6 +27,7 @@
 *****************************************************************************/
 
 #include "SparseMatrix.h"
+#include "PasoException.h"
 
 namespace paso {
 
@@ -56,20 +57,17 @@ SparseMatrix_ptr SparseMatrix_MatrixMatrixTranspose(const_SparseMatrix_ptr A,
     SparseMatrix_ptr out;
 
     if ( !  ( (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) || (A->type & MATRIX_FORMAT_DEFAULT) || (MATRIX_FORMAT_BLK1 & A->type ) )  ) {
-        Esys_setError(TYPE_ERROR,"SparseMatrix_MatrixMatrix: Unsupported matrix format of A.");
-        return out;
+        throw PasoException("SparseMatrix_MatrixMatrix: Unsupported matrix format of A.");
     }
     if ( !  ( (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) || (B->type & MATRIX_FORMAT_DEFAULT) || (MATRIX_FORMAT_BLK1 & B->type ) ) ) {
-        Esys_setError(TYPE_ERROR,"SparseMatrix_MatrixMatrix: Unsupported matrix format of B.");
-        return out;
+        throw PasoException("SparseMatrix_MatrixMatrix: Unsupported matrix format of B.");
     }
     if (! (A->col_block_size == B->row_block_size) ) {
-        Esys_setError(TYPE_ERROR,"SparseMatrix_MatrixMatrix: Column block size of A and row block size of B must match.");
+        throw PasoException("SparseMatrix_MatrixMatrix: Column block size of A and row block size of B must match.");
         return out;
     }
     if (! (A->numCols == B->numRows) ) {
-        Esys_setError(TYPE_ERROR,"SparseMatrix_MatrixMatrix: number of columns of A and number of rows of B must match.");
-        return out;
+        throw PasoException("SparseMatrix_MatrixMatrix: number of columns of A and number of rows of B must match.");
     }
 
     if ( (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) && (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) ) {
@@ -80,30 +78,24 @@ SparseMatrix_ptr SparseMatrix_MatrixMatrixTranspose(const_SparseMatrix_ptr A,
 
     Pattern_ptr outpattern(A->pattern->multiply(MATRIX_FORMAT_DEFAULT, B->pattern));
 
-    if (Esys_noError()) {
-        out.reset(new SparseMatrix(C_type, outpattern, A->row_block_size, B->col_block_size, false));
-    }
+    out.reset(new SparseMatrix(C_type, outpattern, A->row_block_size, B->col_block_size, false));
 
-    if (Esys_noError()) {
-        if ( (A->row_block_size == 1) && (B->col_block_size ==1 ) && (A->col_block_size ==1) ) {
-            SparseMatrix_MatrixMatrixTranspose_DD(out, A, B, T);
+    if (A->row_block_size == 1 && B->col_block_size == 1 && A->col_block_size ==1) {
+        SparseMatrix_MatrixMatrixTranspose_DD(out, A, B, T);
+    } else {
+        if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+            if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+                SparseMatrix_MatrixMatrixTranspose_DD(out, A, B, T);
+            } else {
+                SparseMatrix_MatrixMatrixTranspose_DB(out, A, B, T);
+            }
         } else {
-            if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                    SparseMatrix_MatrixMatrixTranspose_DD(out, A, B, T);
-                } else {
-                    SparseMatrix_MatrixMatrixTranspose_DB(out, A, B, T);
-                }
+            if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+                SparseMatrix_MatrixMatrixTranspose_BD(out, A, B, T);
             } else {
-                if (B->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-                    SparseMatrix_MatrixMatrixTranspose_BD(out, A, B, T);
-                } else {
-                    SparseMatrix_MatrixMatrixTranspose_BB(out, A, B, T);
-                }
+                SparseMatrix_MatrixMatrixTranspose_BB(out, A, B, T);
             }
         }
-    } else {
-        out.reset();
     }
     return out;
 }
@@ -119,7 +111,7 @@ void SparseMatrix_MatrixMatrixTranspose_BB(SparseMatrix_ptr C, const_SparseMatri
    const dim_t B_block_size =B->block_size;
    const dim_t A_block_size =A->block_size;
    double *C_ij, *A_ik, *B_kj;
-   register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+   double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
    dim_t i, ib, irb, icb;
    index_t ij_ptrC, j, ik_ptrA, kj_ptrB, kA, kB, ikb, kjb;
 
@@ -463,7 +455,7 @@ void SparseMatrix_MatrixMatrixTranspose_DB(SparseMatrix_ptr C, const_SparseMatri
    const dim_t B_block_size =B->block_size;
    const dim_t A_block_size =A->block_size;
    double *C_ij, *A_ik, *B_kj;
-   register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+   double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
    dim_t i, ib, irb, icb;
    index_t ij_ptrC, j, ik_ptrA, kj_ptrB, kA, kB, ikb, kjb;
 
@@ -743,7 +735,7 @@ void SparseMatrix_MatrixMatrixTranspose_BD(SparseMatrix_ptr C, const_SparseMatri
    const dim_t B_block_size =B->block_size;
    const dim_t A_block_size =A->block_size;
    double *C_ij, *A_ik, *B_kj;
-   register double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
+   double rtmp, C_ij_00, C_ij_10, C_ij_20, C_ij_30, C_ij_01, C_ij_11, C_ij_21, C_ij_31, C_ij_02, C_ij_12, C_ij_22, C_ij_32, C_ij_03, C_ij_13, C_ij_23, C_ij_33;
    dim_t i, ib, irb, icb;
    index_t ij_ptrC, j, ik_ptrA, kj_ptrB, ikb, kjb, kA, kB;
 
@@ -1022,7 +1014,7 @@ void SparseMatrix_MatrixMatrixTranspose_DD(SparseMatrix_ptr C, const_SparseMatri
    const dim_t B_block_size =B->block_size;
    const dim_t A_block_size =A->block_size;
    double *C_ij, *A_ik, *B_kj;
-   register double C_ij_0, C_ij_1, C_ij_2, C_ij_3;
+   double C_ij_0, C_ij_1, C_ij_2, C_ij_3;
    dim_t i, ib;
    index_t ij_ptrC, j, ik_ptrA, kA, kB, kj_ptrB, ikb, kjb;
 
diff --git a/paso/src/SparseMatrix_MatrixVector.cpp b/paso/src/SparseMatrix_MatrixVector.cpp
index ffd3429..1547b8a 100644
--- a/paso/src/SparseMatrix_MatrixVector.cpp
+++ b/paso/src/SparseMatrix_MatrixVector.cpp
@@ -287,7 +287,11 @@ void SparseMatrix_MatrixVector_CSR_OFFSET0(double alpha,
 #endif
 
     const dim_t nrow = A->numRows;
+#ifdef _OPENMP
     const dim_t np = omp_get_max_threads();
+#else
+    const dim_t np = 1;
+#endif
     const dim_t len = nrow/np;
 
 #ifdef USE_DYNAMIC_SCHEDULING
diff --git a/paso/src/SparseMatrix_getSubmatrix.cpp b/paso/src/SparseMatrix_getSubmatrix.cpp
index 98e71c1..4052749 100644
--- a/paso/src/SparseMatrix_getSubmatrix.cpp
+++ b/paso/src/SparseMatrix_getSubmatrix.cpp
@@ -45,35 +45,29 @@ SparseMatrix_ptr SparseMatrix::getSubmatrix(dim_t n_row_sub, dim_t n_col_sub,
                                             const index_t* new_col_index) const
 {
     SparseMatrix_ptr out;
-    Esys_resetError();
     if (type & MATRIX_FORMAT_CSC) {
-        Esys_setError(TYPE_ERROR, "SparseMatrix::getSubmatrix: gathering submatrices supports CSR matrix format only.");
-        return out;
+        throw PasoException("SparseMatrix::getSubmatrix: gathering submatrices supports CSR matrix format only.");
     }
 
     const index_t index_offset = (type & MATRIX_FORMAT_OFFSET1 ? 1:0);
     Pattern_ptr sub_pattern(pattern->getSubpattern(n_row_sub, n_col_sub,
                                                    row_list, new_col_index));
-    if (Esys_noError()) {
-        // create the return object
-        out.reset(new SparseMatrix(type, sub_pattern, row_block_size,
-                                   col_block_size, true));
-        if (Esys_noError()) {
+    // create the return object
+    out.reset(new SparseMatrix(type, sub_pattern, row_block_size,
+                               col_block_size, true));
 #pragma omp parallel for
-            for (int i=0; i<n_row_sub; ++i) {
-                const index_t subpattern_row = row_list[i];
-                for (int k=pattern->ptr[subpattern_row]-index_offset;
-                        k < pattern->ptr[subpattern_row+1]-index_offset; ++k) {
-                    index_t tmp=new_col_index[pattern->index[k]-index_offset];
-                    if (tmp > -1) {
-                        #pragma ivdep
-                        for (index_t m=out->pattern->ptr[i]-index_offset;
-                                m < out->pattern->ptr[i+1]-index_offset; ++m) {
-                            if (out->pattern->index[m]==tmp+index_offset) {
-                                BlockOps_Cpy_N(block_size, &out->val[m*block_size], &val[k*block_size]);
-                                break;
-                            }
-                        }
+    for (int i=0; i<n_row_sub; ++i) {
+        const index_t subpattern_row = row_list[i];
+        for (int k=pattern->ptr[subpattern_row]-index_offset;
+                k < pattern->ptr[subpattern_row+1]-index_offset; ++k) {
+            index_t tmp=new_col_index[pattern->index[k]-index_offset];
+            if (tmp > -1) {
+                #pragma ivdep
+                for (index_t m=out->pattern->ptr[i]-index_offset;
+                        m < out->pattern->ptr[i+1]-index_offset; ++m) {
+                    if (out->pattern->index[m]==tmp+index_offset) {
+                        BlockOps_Cpy_N(block_size, &out->val[m*block_size], &val[k*block_size]);
+                        break;
                     }
                 }
             }
@@ -97,7 +91,7 @@ SparseMatrix_ptr SparseMatrix::getBlock(int blockid) const
                 }
             }
         } else {
-            Esys_setError(VALUE_ERROR, "SparseMatrix::getBlock: Invalid block ID requested.");
+            throw PasoException("SparseMatrix::getBlock: Invalid block ID requested.");
         }
     } else if (blocksize==2) {
         if (blockid==1) {
@@ -115,7 +109,7 @@ SparseMatrix_ptr SparseMatrix::getBlock(int blockid) const
                 }
             }
         } else {
-            Esys_setError(VALUE_ERROR, "SparseMatrix::getBlock: Invalid block ID requested.");
+            throw PasoException("SparseMatrix::getBlock: Invalid block ID requested.");
         }
     } else if (blocksize==3) {
         if (blockid==1) {
@@ -140,7 +134,7 @@ SparseMatrix_ptr SparseMatrix::getBlock(int blockid) const
                 }
             }
         } else {
-            Esys_setError(VALUE_ERROR, "SparseMatrix::getBlock: Invalid block ID requested.");
+            throw PasoException("SparseMatrix::getBlock: Invalid block ID requested.");
         }
     }
     return out;
diff --git a/paso/src/SparseMatrix_nullifyRowsAndCols.cpp b/paso/src/SparseMatrix_nullifyRowsAndCols.cpp
index b72d16c..d2a419e 100644
--- a/paso/src/SparseMatrix_nullifyRowsAndCols.cpp
+++ b/paso/src/SparseMatrix_nullifyRowsAndCols.cpp
@@ -33,7 +33,6 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
 #include "SparseMatrix.h"
 
 namespace paso {
diff --git a/paso/src/SparseMatrix_saveHB.cpp b/paso/src/SparseMatrix_saveHB.cpp
index 74b4734..a209bed 100644
--- a/paso/src/SparseMatrix_saveHB.cpp
+++ b/paso/src/SparseMatrix_saveHB.cpp
@@ -26,8 +26,8 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
 #include "SparseMatrix.h"
+#include "PasoException.h"
 
 #include <fstream>
 #include <iomanip>
@@ -180,8 +180,7 @@ void SparseMatrix::saveHB_CSC(const char* filename) const
 {
     std::ofstream f(filename);
     if (f.fail()) {
-        Esys_setError(IO_ERROR, "SparseMatrix::saveHB_CSC: File could not be opened for writing.");
-        return;
+        throw PasoException("SparseMatrix::saveHB_CSC: File could not be opened for writing.");
     }
 
     int i, curr_col,j ;
diff --git a/paso/src/SystemMatrix.cpp b/paso/src/SystemMatrix.cpp
index 13c31b2..2de5d5b 100644
--- a/paso/src/SystemMatrix.cpp
+++ b/paso/src/SystemMatrix.cpp
@@ -26,14 +26,23 @@
 /****************************************************************************/
 
 #include "SystemMatrix.h"
+#include "Options.h"
+#include "PasoException.h"
 #include "Preconditioner.h"
-#include "Solver.h" // only for destructor
+#include "Solver.h"
+
+#include <escript/Data.h>
 
 #include <cstring> // memcpy
 #include <vector>
 
 namespace paso {
 
+SystemMatrix::SystemMatrix()
+{
+    throw PasoException("SystemMatrix: Illegal to generate default SystemMatrix.");
+}
+
 /// Allocates a SystemMatrix of given type using the given matrix pattern.
 /// Values are initialized with zero.
 /// If patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed
@@ -41,7 +50,10 @@ namespace paso {
 /// and offsets. Otherwise unrolling and offset adjustment will be performed.
 SystemMatrix::SystemMatrix(SystemMatrixType ntype,
                            SystemMatrixPattern_ptr npattern, dim_t rowBlockSize,
-                           dim_t colBlockSize, bool patternIsUnrolled) :
+                           dim_t colBlockSize, bool patternIsUnrolled,
+                           const escript::FunctionSpace& rowFS,
+                           const escript::FunctionSpace& colFS) :
+    escript::AbstractSystemMatrix(rowBlockSize, rowFS, colBlockSize, colFS),
     type(ntype),
     logical_row_block_size(rowBlockSize),
     logical_col_block_size(colBlockSize),
@@ -49,23 +61,21 @@ SystemMatrix::SystemMatrix(SystemMatrixType ntype,
     balance_vector(NULL),
     global_id(NULL),
     solver_package(PASO_PASO),
-    solver_p(NULL),
-    trilinos_data(NULL)
+    solver_p(NULL)
 {
-    Esys_resetError();
     if (patternIsUnrolled) {
-        if (!XNOR(ntype & MATRIX_FORMAT_OFFSET1, npattern->type & MATRIX_FORMAT_OFFSET1)) {
-            Esys_setError(TYPE_ERROR, "SystemMatrix: requested offset and pattern offset do not match.");
+        if ((ntype & MATRIX_FORMAT_OFFSET1) != (npattern->type & MATRIX_FORMAT_OFFSET1)) {
+            throw PasoException("SystemMatrix: requested offset and pattern offset do not match.");
         }
     }
     // do we need to apply unrolling?
     bool unroll
           // we don't like non-square blocks
         = (rowBlockSize != colBlockSize)
-#ifndef USE_LAPACK
+#ifndef ESYS_HAVE_LAPACK
           // or any block size bigger than 3
           || (colBlockSize > 3)
-# endif
+#endif
           // or if block size one requested and the block size is not 1
           || ((ntype & MATRIX_FORMAT_BLK1) && colBlockSize > 1)
           // or the offsets don't match
@@ -91,10 +101,8 @@ SystemMatrix::SystemMatrix(SystemMatrixType ntype,
             row_block_size = rowBlockSize;
             col_block_size = colBlockSize;
         }
-        if (Esys_noError()) {
-            row_distribution = pattern->input_distribution;
-            col_distribution = pattern->output_distribution;
-        }
+        row_distribution = pattern->input_distribution;
+        col_distribution = pattern->output_distribution;
     } else {
         if (unroll) {
             if (patternIsUnrolled) {
@@ -110,31 +118,24 @@ SystemMatrix::SystemMatrix(SystemMatrixType ntype,
             row_block_size = rowBlockSize;
             col_block_size = colBlockSize;
         }
-        if (Esys_noError()) {
-            row_distribution = pattern->output_distribution;
-            col_distribution = pattern->input_distribution;
-        }
+        row_distribution = pattern->output_distribution;
+        col_distribution = pattern->input_distribution;
     }
-    if (Esys_noError()) {
-        if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-            block_size = MIN(row_block_size, col_block_size);
-        } else {
-            block_size = row_block_size*col_block_size;
-        }
-        col_coupler.reset(new Coupler(pattern->col_connector, col_block_size));
-        row_coupler.reset(new Coupler(pattern->row_connector, row_block_size));
-        if (ntype & MATRIX_FORMAT_TRILINOS_CRS) {
-        } else {
-            mainBlock.reset(new SparseMatrix(type, pattern->mainPattern, row_block_size, col_block_size, true));
-            col_coupleBlock.reset(new SparseMatrix(type, pattern->col_couplePattern, row_block_size, col_block_size, true));
-            row_coupleBlock.reset(new SparseMatrix(type, pattern->row_couplePattern, row_block_size, col_block_size, true));
-            const dim_t n_norm = MAX(mainBlock->numCols*col_block_size, mainBlock->numRows*row_block_size);
-            balance_vector = new double[n_norm];
-#pragma omp parallel for
-            for (dim_t i=0; i<n_norm; ++i)
-                balance_vector[i] = 1.;
-        }
+    if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+        block_size = std::min(row_block_size, col_block_size);
+    } else {
+        block_size = row_block_size*col_block_size;
     }
+    col_coupler.reset(new Coupler(pattern->col_connector, col_block_size, mpi_info));
+    row_coupler.reset(new Coupler(pattern->row_connector, row_block_size, mpi_info));
+    mainBlock.reset(new SparseMatrix(type, pattern->mainPattern, row_block_size, col_block_size, true));
+    col_coupleBlock.reset(new SparseMatrix(type, pattern->col_couplePattern, row_block_size, col_block_size, true));
+    row_coupleBlock.reset(new SparseMatrix(type, pattern->row_couplePattern, row_block_size, col_block_size, true));
+    const dim_t n_norm = std::max(mainBlock->numCols*col_block_size, mainBlock->numRows*row_block_size);
+    balance_vector = new double[n_norm];
+#pragma omp parallel for
+    for (dim_t i=0; i<n_norm; ++i)
+        balance_vector[i] = 1.;
 }
 
 // deallocates a SystemMatrix
@@ -148,14 +149,16 @@ SystemMatrix::~SystemMatrix()
 void SystemMatrix::setPreconditioner(Options* options)
 {
     if (!solver_p) {
-        solver_p = Preconditioner_alloc(shared_from_this(), options);
+        SystemMatrix_ptr mat(boost::dynamic_pointer_cast<SystemMatrix>(getPtr()));
+        solver_p = Preconditioner_alloc(mat, options);
     }
 }
 
 void SystemMatrix::solvePreconditioner(double* x, double* b)
 {
     Preconditioner* prec=(Preconditioner*)solver_p;
-    Preconditioner_solve(prec, shared_from_this(), x, b);
+    SystemMatrix_ptr mat(boost::dynamic_pointer_cast<SystemMatrix>(getPtr()));
+    Preconditioner_solve(prec, mat, x, b);
 }
 
 void SystemMatrix::freePreconditioner()
@@ -191,7 +194,7 @@ index_t* SystemMatrix::borrowMainDiagonalPointer() const
     MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, mpi_info->comm);
 #endif
     if (fail>0)
-        Esys_setError(VALUE_ERROR, "SystemMatrix::borrowMainDiagonalPointer: no main diagonal");
+        throw PasoException("SystemMatrix::borrowMainDiagonalPointer: no main diagonal");
     return out;
 }
 
@@ -219,10 +222,8 @@ void SystemMatrix::makeZeroRowSums(double* left_over)
 
 void SystemMatrix::nullifyRows(double* mask_row, double main_diagonal_value)
 {
-    if ((type & MATRIX_FORMAT_CSC) || (type & MATRIX_FORMAT_TRILINOS_CRS)) {
-        Esys_setError(SYSTEM_ERROR,
-                "SystemMatrix::nullifyRows: Only CSR format is supported.");
-        return;
+    if (type & MATRIX_FORMAT_CSC) {
+        throw PasoException("SystemMatrix::nullifyRows: Only CSR format is supported.");
     }
 
     if (col_block_size==1 && row_block_size==1) {
@@ -240,20 +241,30 @@ void SystemMatrix::nullifyRows(double* mask_row, double main_diagonal_value)
     }
 }
 
-void SystemMatrix::nullifyRowsAndCols(double* mask_row, double* mask_col,
+void SystemMatrix::nullifyRowsAndCols(escript::Data& row_q,
+                                      escript::Data& col_q,
                                       double main_diagonal_value)
 {
-    if (type & MATRIX_FORMAT_TRILINOS_CRS) {
-        Esys_setError(SYSTEM_ERROR,
-               "SystemMatrix::nullifyRowsAndCols: TRILINOS is not supported.");
-        return;
+    if (col_q.getDataPointSize() != getColumnBlockSize()) {
+        throw PasoException("nullifyRowsAndCols: column block size does not match the number of components of column mask.");
+    } else if (row_q.getDataPointSize() != getRowBlockSize()) {
+        throw PasoException("nullifyRowsAndCols: row block size does not match the number of components of row mask.");
+    } else if (col_q.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw PasoException("nullifyRowsAndCols: column function space and function space of column mask don't match.");
+    } else if (row_q.getFunctionSpace() != getRowFunctionSpace()) {
+        throw PasoException("nullifyRowsAndCols: row function space and function space of row mask don't match.");
     }
+    row_q.expand();
+    col_q.expand();
+    row_q.requireWrite();
+    col_q.requireWrite();
+    double* mask_row = row_q.getSampleDataRW(0);
+    double* mask_col = col_q.getSampleDataRW(0);
 
     if (mpi_info->size > 1) {
         if (type & MATRIX_FORMAT_CSC) {
-            Esys_setError(SYSTEM_ERROR, "SystemMatrix::nullifyRowsAndCols: "
-                                        "CSC is not supported with MPI.");
-            return;
+            throw PasoException("SystemMatrix::nullifyRowsAndCols: "
+                                "CSC is not supported with MPI.");
         }
 
         startColCollect(mask_col);
@@ -288,23 +299,73 @@ void SystemMatrix::nullifyRowsAndCols(double* mask_row, double* mask_col,
     }
 }
 
+void SystemMatrix::resetValues(bool preserveSolverData)
+{
+    setValues(0.);
+    if (!preserveSolverData)
+        solve_free(this);
+}
+
+void SystemMatrix::setToSolution(escript::Data& out, escript::Data& in,
+                                 boost::python::object& options) const
+{
+    options.attr("resetDiagnostics")();
+    Options paso_options(options);
+    if (out.getDataPointSize() != getColumnBlockSize()) {
+        throw PasoException("solve: column block size does not match the number of components of solution.");
+    } else if (in.getDataPointSize() != getRowBlockSize()) {
+        throw PasoException("solve: row block size does not match the number of components of  right hand side.");
+    } else if (out.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw PasoException("solve: column function space and function space of solution don't match.");
+    } else if (in.getFunctionSpace() != getRowFunctionSpace()) {
+        throw PasoException("solve: row function space and function space of right hand side don't match.");
+    }
+    out.expand();
+    in.expand();
+    out.requireWrite();
+    in.requireWrite();
+    double* out_dp = out.getSampleDataRW(0);        
+    double* in_dp = in.getSampleDataRW(0);                
+    solve(out_dp, in_dp, &paso_options);
+    paso_options.updateEscriptDiagnostics(options);
+}
+
+void SystemMatrix::ypAx(escript::Data& y, escript::Data& x) const 
+{
+    if (x.getDataPointSize() != getColumnBlockSize()) {
+        throw PasoException("matrix vector product: column block size does not match the number of components in input.");
+    } else if (y.getDataPointSize() != getRowBlockSize()) {
+        throw PasoException("matrix vector product: row block size does not match the number of components in output.");
+    } else if (x.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw PasoException("matrix vector product: column function space and function space of input don't match.");
+    } else if (y.getFunctionSpace() != getRowFunctionSpace()) {
+        throw PasoException("matrix vector product: row function space and function space of output don't match.");
+    }
+    x.expand();
+    y.expand();
+    x.requireWrite();
+    y.requireWrite();
+    double* x_dp = x.getSampleDataRW(0);
+    double* y_dp = y.getSampleDataRW(0);
+    MatrixVector(1., x_dp, 1., y_dp);
+}
+
 void SystemMatrix::copyColCoupleBlock()
 {
     if (mpi_info->size == 1) {
         // nothing to do
         return;
     } else if (!row_coupleBlock) {
-        Esys_setError(VALUE_ERROR, "SystemMatrix::copyColCoupleBlock: "
+        throw PasoException("SystemMatrix::copyColCoupleBlock: "
                     "creation of row_coupleBlock pattern not supported yet.");
-        return;
     } else if (row_coupler->in_use) {
-        Esys_setError(SYSTEM_ERROR,
-                "SystemMatrix::copyColCoupleBlock: Coupler in use.");
-        return;
+        throw PasoException("SystemMatrix::copyColCoupleBlock: Coupler in use.");
     }
 
+    const dim_t numNeighboursSend = row_coupler->connector->send->neighbour.size();
+    const dim_t numNeighboursRecv = row_coupler->connector->recv->neighbour.size();
     // start receiving
-    for (dim_t p=0; p<row_coupler->connector->recv->numNeighbors; p++) {
+    for (dim_t p = 0; p < numNeighboursRecv; p++) {
 #ifdef ESYS_MPI
         const index_t irow1 = row_coupler->connector->recv->offsetInShared[p];
         const index_t irow2 = row_coupler->connector->recv->offsetInShared[p+1];
@@ -312,8 +373,8 @@ void SystemMatrix::copyColCoupleBlock()
         const index_t b = row_coupleBlock->pattern->ptr[irow2];
 
         MPI_Irecv(&row_coupleBlock->val[a*block_size], (b-a) * block_size,
-                MPI_DOUBLE, row_coupler->connector->recv->neighbor[p],
-                mpi_info->msg_tag_counter+row_coupler->connector->recv->neighbor[p],
+                MPI_DOUBLE, row_coupler->connector->recv->neighbour[p],
+                mpi_info->counter()+row_coupler->connector->recv->neighbour[p],
                 mpi_info->comm, &row_coupler->mpi_requests[p]);
 
 #endif
@@ -324,7 +385,7 @@ void SystemMatrix::copyColCoupleBlock()
     double* send_buffer = new double[col_coupleBlock->len];
     const size_t block_size_size = block_size*sizeof(double);
 
-    for (dim_t p=0; p<row_coupler->connector->send->numNeighbors; p++) {
+    for (dim_t p = 0; p < numNeighboursSend; p++) {
         // j_min, j_max defines the range of columns to be sent to processor p
         const index_t j_min = col_coupler->connector->recv->offsetInShared[p];
         const index_t j_max = col_coupler->connector->recv->offsetInShared[p+1];
@@ -350,21 +411,20 @@ void SystemMatrix::copyColCoupleBlock()
         }
 #ifdef ESYS_MPI
         MPI_Issend(&send_buffer[z0], z-z0, MPI_DOUBLE,
-                   row_coupler->connector->send->neighbor[p],
-                   mpi_info->msg_tag_counter+mpi_info->rank,
+                   row_coupler->connector->send->neighbour[p],
+                   mpi_info->counter()+mpi_info->rank,
                    mpi_info->comm,
-                   &row_coupler->mpi_requests[p+row_coupler->connector->recv->numNeighbors]);
+                   &row_coupler->mpi_requests[p+numNeighboursRecv]);
 #endif
         z0 = z;
     }
 
     // wait until everything is done
 #ifdef ESYS_MPI
-    MPI_Waitall(row_coupler->connector->send->numNeighbors+row_coupler->connector->recv->numNeighbors,
-                row_coupler->mpi_requests,
+    mpi_info->incCounter(mpi_info->size);
+    MPI_Waitall(numNeighboursSend+numNeighboursRecv, row_coupler->mpi_requests,
                 row_coupler->mpi_stati);
 #endif
-    ESYS_MPI_INC_COUNTER(*mpi_info, mpi_info->size);
     delete[] send_buffer;
 }
 
@@ -412,60 +472,59 @@ void SystemMatrix::balance()
 
     if (!is_balanced) {
         if ((type & MATRIX_FORMAT_CSC) || (type & MATRIX_FORMAT_OFFSET1)) {
-            Esys_setError(TYPE_ERROR,"SystemMatrix_balance: No normalization available for compressed sparse column or index offset 1.");
+            throw PasoException("SystemMatrix_balance: No normalization "
+                  "available for compressed sparse column or index offset 1.");
         }
         if (getGlobalNumRows() != getGlobalNumCols() ||
                 row_block_size != col_block_size) {
-            Esys_setError(SYSTEM_ERROR,"SystemMatrix::balance: matrix needs to be a square matrix.");
+            throw PasoException("SystemMatrix::balance: matrix needs to be a square matrix.");
         }
-        if (Esys_noError()) {
-            // calculate absolute max value over each row
+        // calculate absolute max value over each row
 #pragma omp parallel for
-            for (dim_t irow=0; irow<nrow; ++irow) {
-                balance_vector[irow]=0;
-            }
-            mainBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
-            if (col_coupleBlock->pattern->ptr != NULL) {
-                col_coupleBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
-            }
+        for (dim_t irow=0; irow<nrow; ++irow) {
+            balance_vector[irow]=0;
+        }
+        mainBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
+        if (col_coupleBlock->pattern->ptr != NULL) {
+            col_coupleBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
+        }
 
-            // set balancing vector
-            #pragma omp parallel for
-            for (dim_t irow=0; irow<nrow; ++irow) {
-                const double fac = balance_vector[irow];
-                if (fac > 0) {
-                    balance_vector[irow]=sqrt(1./fac);
-                } else {
-                    balance_vector[irow]=1.;
-                }
-            }
-            ///// rescale matrix /////
-            // start exchange
-            startCollect(balance_vector);
-            // process main block
-            mainBlock->applyDiagonal_CSR_OFFSET0(balance_vector, balance_vector);
-            // finish exchange
-            double* remote_values = finishCollect();
-            // process couple block
-            if (col_coupleBlock->pattern->ptr != NULL) {
-                col_coupleBlock->applyDiagonal_CSR_OFFSET0(balance_vector, remote_values);
-            }
-            if (row_coupleBlock->pattern->ptr != NULL) {
-                row_coupleBlock->applyDiagonal_CSR_OFFSET0(remote_values, balance_vector);
+        // set balancing vector
+        #pragma omp parallel for
+        for (dim_t irow=0; irow<nrow; ++irow) {
+            const double fac = balance_vector[irow];
+            if (fac > 0) {
+                balance_vector[irow]=sqrt(1./fac);
+            } else {
+                balance_vector[irow]=1.;
             }
-            is_balanced = true;
         }
+        ///// rescale matrix /////
+        // start exchange
+        startCollect(balance_vector);
+        // process main block
+        mainBlock->applyDiagonal_CSR_OFFSET0(balance_vector, balance_vector);
+        // finish exchange
+        double* remote_values = finishCollect();
+        // process couple block
+        if (col_coupleBlock->pattern->ptr != NULL) {
+            col_coupleBlock->applyDiagonal_CSR_OFFSET0(balance_vector, remote_values);
+        }
+        if (row_coupleBlock->pattern->ptr != NULL) {
+            row_coupleBlock->applyDiagonal_CSR_OFFSET0(remote_values, balance_vector);
+        }
+        is_balanced = true;
     }
 }
 
-index_t SystemMatrix::getSystemMatrixTypeId(index_t solver,
-                                            index_t preconditioner,
-                                            index_t package,
-                                            bool symmetry,
-                                            const esysUtils::JMPI& mpi_info)
+int SystemMatrix::getSystemMatrixTypeId(int solver, int preconditioner,
+                                        int package, bool symmetry,
+                                        const escript::JMPI& mpi_info)
 {
-    index_t out = -1;
-    index_t true_package = Options::getPackage(solver, package, symmetry, mpi_info);
+    int out = -1;
+    int true_package = Options::getPackage(Options::mapEscriptOption(solver),
+                                           Options::mapEscriptOption(package),
+                                           symmetry, mpi_info);
 
     switch(true_package) {
         case PASO_PASO:
@@ -478,7 +537,7 @@ index_t SystemMatrix::getSystemMatrixTypeId(index_t solver,
 
         case PASO_UMFPACK:
             if (mpi_info->size > 1) {
-                Esys_setError(VALUE_ERROR, "The selected solver UMFPACK "
+                throw PasoException("The selected solver UMFPACK "
                         "requires CSC format which is not supported with "
                         "more than one rank.");
             } else {
@@ -486,13 +545,8 @@ index_t SystemMatrix::getSystemMatrixTypeId(index_t solver,
             }
         break;
 
-        case PASO_TRILINOS:
-            // Distributed CRS
-            out=MATRIX_FORMAT_TRILINOS_CRS | MATRIX_FORMAT_BLK1;
-        break;
-
         default:
-            Esys_setError(VALUE_ERROR, "unknown package code");
+            throw PasoException("unknown package code");
     }
     return out;
 }
@@ -542,13 +596,13 @@ SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
             const index_t remote_n = row_distribution->first_component[i+1] -
                                         row_distribution->first_component[i];
             MPI_Irecv(&ptr_global[iptr], remote_n, MPI_INT, i,
-                        mpi_info->msg_tag_counter+i, mpi_info->comm,
+                        mpi_info->counter()+i, mpi_info->comm,
                         &mpi_requests[i]);
             temp_n[i] = remote_n;
             iptr += remote_n;
         }
+        mpi_info->incCounter(size);
         MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
-        ESYS_MPI_INC_COUNTER(*mpi_info, size);
 
         // Then, prepare to receive idx and val from other ranks
         index_t len = 0;
@@ -568,7 +622,7 @@ SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
         for (index_t i=1; i<size; i++) {
             len = temp_len[i];
             MPI_Irecv(&idx_global[iptr], len, MPI_INT, i,
-                        mpi_info->msg_tag_counter+i,
+                        mpi_info->counter()+i,
                         mpi_info->comm, &mpi_requests[i]);
             const index_t remote_n = temp_n[i];
             for (index_t j=0; j<remote_n; j++) {
@@ -580,7 +634,7 @@ SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
         memcpy(idx_global, idx, temp_len[0]*sizeof(index_t));
         delete[] idx;
         MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
-        ESYS_MPI_INC_COUNTER(*mpi_info, size);
+        mpi_info->incCounter(size);
         delete[] temp_n;
 
         // Then generate the sparse matrix
@@ -596,21 +650,21 @@ SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
         for (index_t i=1; i<size; i++) {
             len = temp_len[i];
             MPI_Irecv(&out->val[iptr], len * block_size, MPI_DOUBLE, i,
-                        mpi_info->msg_tag_counter+i, mpi_info->comm,
+                        mpi_info->counter()+i, mpi_info->comm,
                         &mpi_requests[i]);
             iptr += len*block_size;
         }
         memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);
         delete[] val;
+        mpi_info->incCounter(size);
         MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
-        ESYS_MPI_INC_COUNTER(*mpi_info, size);
         delete[] temp_len;
         return out;
 
     } else { // it's not rank 0
 
         // First, send out the local ptr
-        index_t tag = mpi_info->msg_tag_counter+rank;
+        index_t tag = mpi_info->counter()+rank;
         MPI_Issend(&ptr[1], n, MPI_INT, 0, tag, mpi_info->comm,
                    &mpi_requests[0]);
 
@@ -627,7 +681,7 @@ SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
                    &mpi_requests[2]);
 
         MPI_Waitall(3, &mpi_requests[0], &mpi_stati[0]);
-        ESYS_MPI_SET_COUNTER(*mpi_info, tag + size - rank)
+        mpi_info->setCounter(tag + size - rank);
         delete[] ptr;
         delete[] idx;
         delete[] val;
diff --git a/paso/src/SystemMatrix.h b/paso/src/SystemMatrix.h
index aab263d..ad04a47 100644
--- a/paso/src/SystemMatrix.h
+++ b/paso/src/SystemMatrix.h
@@ -31,22 +31,29 @@
 
 #include "SparseMatrix.h"
 #include "SystemMatrixPattern.h"
-#include "Options.h"
+
+#include <escript/AbstractSystemMatrix.h>
 
 namespace paso {
 
-struct SystemMatrix;
+class Options;
+class SystemMatrix;
 typedef boost::shared_ptr<SystemMatrix> SystemMatrix_ptr;
 typedef boost::shared_ptr<const SystemMatrix> const_SystemMatrix_ptr;
 
 typedef int SystemMatrixType;
 
-//  this struct holds a (distributed) stiffness matrix
-PASO_DLL_API
-struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
+/// this class holds a (distributed) stiffness matrix
+class SystemMatrix : public escript::AbstractSystemMatrix
 {
-    SystemMatrix(SystemMatrixType, SystemMatrixPattern_ptr, dim_t, dim_t,
-                 bool patternIsUnrolled);
+public:
+    /// default constructor - throws exception.
+    SystemMatrix();
+
+    SystemMatrix(SystemMatrixType type, SystemMatrixPattern_ptr pattern,
+                 dim_t rowBlockSize, dim_t columnBlockSize,
+                 bool patternIsUnrolled, const escript::FunctionSpace& rowFS,
+                 const escript::FunctionSpace& colFS);
 
     ~SystemMatrix();
 
@@ -54,8 +61,34 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
     /// The rows and columns are marked by positive values in mask_row and
     /// mask_col. Values on the main diagonal which are marked to set to
     /// zero by both mask_row and mask_col are set to main_diagonal_value.
-    void nullifyRowsAndCols(double* mask_row, double* mask_col,
-                            double main_diagonal_value);
+    virtual void nullifyRowsAndCols(escript::Data& mask_row,
+                                    escript::Data& mask_col,
+                                    double main_diagonal_value);
+
+    virtual inline void saveMM(const std::string& filename) const
+    {
+        if (mpi_info->size > 1) {
+            //throw PasoException("SystemMatrix::saveMM: Only single rank supported.");
+            SparseMatrix_ptr merged(mergeSystemMatrix());
+            if (mpi_info->rank == 0)
+                merged->saveMM(filename.c_str());
+        } else {
+            mainBlock->saveMM(filename.c_str());
+        }
+    }
+
+    virtual inline void saveHB(const std::string& filename) const
+    {
+        if (mpi_info->size > 1) {
+            throw PasoException("SystemMatrix::saveHB: Only single rank supported.");
+        } else if (!(type & MATRIX_FORMAT_CSC)) {
+            throw PasoException("SystemMatrix::saveHB: Only CSC format supported.");
+        } else {
+            mainBlock->saveHB_CSC(filename.c_str());
+        }
+    }
+
+    virtual void resetValues(bool preserveSolverData = false);
 
     /// Nullifies rows in the matrix.
     /// The rows are marked by positive values in mask_row. Values on the
@@ -118,22 +151,22 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
 
     index_t* borrowMainDiagonalPointer() const;
 
-    inline void startCollect(const double* in)
+    inline void startCollect(const double* in) const
     {
         startColCollect(in);
     }
 
-    inline double* finishCollect()
+    inline double* finishCollect() const
     {
         return finishColCollect();
     }
 
-    inline void startColCollect(const double* in)
+    inline void startColCollect(const double* in) const
     {
         col_coupler->startCollect(in);
     }
 
-    inline double* finishColCollect()
+    inline double* finishColCollect() const
     {
         return col_coupler->finishCollect();
     }
@@ -243,30 +276,10 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
         is_balanced = false;
     }
 
-    inline void saveMM(const char* filename) const
-    {
-        if (mpi_info->size > 1) {
-            Esys_setError(IO_ERROR, "SystemMatrix::saveMM: Only single rank supported.");
-        } else {
-            mainBlock->saveMM(filename);
-        }
-    }
-
-    inline void saveHB(const char *filename) const
-    {
-        if (mpi_info->size > 1) {
-            Esys_setError(TYPE_ERROR, "SystemMatrix::saveHB: Only single rank supported.");
-        } else if (!(type & MATRIX_FORMAT_CSC)) {
-            Esys_setError(TYPE_ERROR, "SystemMatrix::saveHB: Only CSC format supported.");
-        } else {
-            mainBlock->saveHB_CSC(filename);
-        }
-    }
-
     inline void rowSum(double* row_sum) const
     {
         if ((type & MATRIX_FORMAT_CSC) || (type & MATRIX_FORMAT_OFFSET1)) {
-            Esys_setError(TYPE_ERROR, "SystemMatrix::rowSum: No normalization "
+            throw PasoException("SystemMatrix::rowSum: No normalization "
                   "available for compressed sparse column or index offset 1.");
         } else {
             const dim_t nrow = mainBlock->numRows*row_block_size;
@@ -279,14 +292,19 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
         }
     }
 
+    void MatrixVector(double alpha, const double* in, double beta,
+                      double* out) const;
+
+    void MatrixVector_CSR_OFFSET0(double alpha, const double* in, double beta,
+                                  double* out) const;
+
     static SystemMatrix_ptr loadMM_toCSR(const char* filename);
 
     static SystemMatrix_ptr loadMM_toCSC(const char* filename);
 
-    static index_t getSystemMatrixTypeId(index_t solver,
-                                         index_t preconditioner,
-                                         index_t package, bool symmetry,
-                                         const esysUtils::JMPI& mpi_info);
+    static int getSystemMatrixTypeId(int solver, int preconditioner,
+                                     int package, bool symmetry,
+                                     const escript::JMPI& mpi_info);
 
     SystemMatrixType type;
     SystemMatrixPattern_ptr pattern;
@@ -298,9 +316,9 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
     dim_t col_block_size;
     dim_t block_size;
 
-    Distribution_ptr row_distribution;
-    Distribution_ptr col_distribution;
-    esysUtils::JMPI mpi_info;
+    escript::Distribution_ptr row_distribution;
+    escript::Distribution_ptr col_distribution;
+    escript::JMPI mpi_info;
 
     Coupler_ptr col_coupler;
     Coupler_ptr row_coupler;
@@ -327,19 +345,20 @@ struct SystemMatrix : boost::enable_shared_from_this<SystemMatrix>
     mutable index_t* global_id;
 
     /// package code controlling the solver pointer
-    index_t solver_package;
+    mutable index_t solver_package;
 
     /// pointer to data needed by a solver
     void* solver_p;
 
-    /// this is only used for a trilinos matrix
-    void* trilinos_data;
-};
+private:
+    virtual void setToSolution(escript::Data& out, escript::Data& in,
+                               boost::python::object& options) const;
 
+    virtual void ypAx(escript::Data& y, escript::Data& x) const;
 
-void SystemMatrix_MatrixVector(double alpha, SystemMatrix_ptr A, const double* in, double beta, double* out);
+    void solve(double* out, double* in, Options* options) const;
+};
 
-void SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha, SystemMatrix_ptr A, const double* in, double beta, double* out);
 
 void RHS_loadMM_toCSR(const char* filename, double* b, dim_t size);
 
diff --git a/paso/src/SystemMatrixPattern.cpp b/paso/src/SystemMatrixPattern.cpp
index 2b9a04a..743ad41 100644
--- a/paso/src/SystemMatrixPattern.cpp
+++ b/paso/src/SystemMatrixPattern.cpp
@@ -30,8 +30,8 @@
 
 namespace paso {
 
-SystemMatrixPattern::SystemMatrixPattern(int patType, Distribution_ptr outDist,
-        Distribution_ptr inDist, Pattern_ptr mainPat, Pattern_ptr colPat,
+SystemMatrixPattern::SystemMatrixPattern(int patType, escript::Distribution_ptr outDist,
+        escript::Distribution_ptr inDist, Pattern_ptr mainPat, Pattern_ptr colPat,
         Pattern_ptr rowPat, Connector_ptr colConn, Connector_ptr rowConn) :
     type(patType),
     mainPattern(mainPat),
@@ -42,43 +42,51 @@ SystemMatrixPattern::SystemMatrixPattern(int patType, Distribution_ptr outDist,
     output_distribution(outDist),
     input_distribution(inDist)
 {
-    Esys_resetError();
+    std::stringstream ss;
 
     if (outDist->mpi_info != inDist->mpi_info) {
-        Esys_setError(SYSTEM_ERROR, "SystemMatrixPattern: output distribution and input distribution MPI communicators don't match.");
+        ss << "SystemMatrixPattern: output distribution and input distribution MPI communicators don't match.";
+    } else if (mainPat->type != patType)  {
+        ss << "SystemMatrixPattern: type of mainPattern (" << mainPat->type
+           << ") does not match expected type (" << patType << ")";
+    } else if (colPat->type != patType)  {
+        ss << "SystemMatrixPattern: type of col couplePattern (" << colPat->type
+           << ") does not match expected type (" << patType << ")";
+    } else if (rowPat->type != patType)  {
+        ss << "SystemMatrixPattern: type of row couplePattern (" << rowPat->type
+           << ") does not match expected type (" << patType << ")";
+    } else if (colPat->numOutput != mainPat->numOutput) {
+        ss << "SystemMatrixPattern: number of outputs for couple and main "
+              "pattern don't match: " << colPat->numOutput << " != "
+           << mainPat->numOutput;
+    } else if (mainPat->numOutput != outDist->getMyNumComponents()) {
+        ss << "SystemMatrixPattern: number of outputs and given distribution "
+              "don't match: " << mainPat->numOutput << " != "
+           << outDist->getMyNumComponents();
+    } else if (mainPat->numInput != inDist->getMyNumComponents()) {
+        ss << "SystemMatrixPattern: number of input for main pattern and "
+              "number of send components in connector don't match: "
+           << mainPat->numInput << " != " << inDist->getMyNumComponents();
+    } else if (colPat->numInput != colConn->recv->numSharedComponents) {
+        ss << "SystemMatrixPattern: number of inputs for column couple pattern"
+              " and number of received components in connector don't match: "
+           << colPat->numInput << " != " << colConn->recv->numSharedComponents;
+    } else if (rowPat->numOutput != rowConn->recv->numSharedComponents) {
+        ss << "SystemMatrixPattern: number of inputs for row couple pattern "
+              "and number of received components in connector don't match: "
+           << rowPat->numOutput << " != " << rowConn->recv->numSharedComponents;
     }
-    if (outDist->mpi_info != colConn->mpi_info) {
-        Esys_setError(SYSTEM_ERROR, "SystemMatrixPattern: output distribution and col connector MPI communicators don't match.");
+    const std::string msg(ss.str());
+    int error = msg.length(); // proxy for error condition
+    int gerror = error;
+    escript::checkResult(error, gerror, outDist->mpi_info);
+    if (gerror > 0) {
+        char* gmsg;
+        escript::shipString(msg.c_str(), &gmsg, outDist->mpi_info->comm);
+        throw PasoException(gmsg);
     }
-    if (outDist->mpi_info != rowConn->mpi_info ) {
-        Esys_setError(SYSTEM_ERROR, "SystemMatrixPattern: output distribution and row connector MPI communicators don't match.");
-    }
-    if (mainPat->type != patType)  {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: type of mainPattern does not match expected type.");
-    }
-    if (colPat->type != patType)  {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: type of col couplePattern does not match expected type.");
-    }
-    if (rowPat->type != patType)  {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: type of row couplePattern does not match expected type.");
-    }
-    if (colPat->numOutput != mainPat->numOutput) {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: number of outputs for couple and main pattern don't match.");
-    }
-    if (mainPat->numOutput != outDist->getMyNumComponents()) {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: number of outputs and given distribution don't match.");
-    }
-    if (mainPat->numInput != inDist->getMyNumComponents()) {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: number of input for main pattern and number of send components in connector don't match.");
-    }
-    if (colPat->numInput != colConn->recv->numSharedComponents) {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: number of inputs for column couple pattern and number of received components in connector don't match.");
-    }
-    if (rowPat->numOutput != rowConn->recv->numSharedComponents) {
-        Esys_setError(VALUE_ERROR, "SystemMatrixPattern: number of inputs for row couple pattern and number of received components in connector don't match.");
-    }
-    mpi_info = outDist->mpi_info;
 
+    mpi_info = outDist->mpi_info;
 }
 
 } // namespace paso
diff --git a/paso/src/SystemMatrixPattern.h b/paso/src/SystemMatrixPattern.h
index ff5089b..a51774f 100644
--- a/paso/src/SystemMatrixPattern.h
+++ b/paso/src/SystemMatrixPattern.h
@@ -29,9 +29,10 @@
 #ifndef __PASO_SYSTEMMATRIXPATTERN_H__
 #define __PASO_SYSTEMMATRIXPATTERN_H__
 
-#include "Distribution.h"
-#include "Pattern.h"
 #include "Coupler.h"
+#include "Pattern.h"
+
+#include <escript/Distribution.h>
 
 namespace paso {
 
@@ -43,14 +44,12 @@ PASO_DLL_API
 struct SystemMatrixPattern : boost::enable_shared_from_this<SystemMatrixPattern>
 {
     // constructor
-    SystemMatrixPattern(int type, Distribution_ptr output_distribution,
-        Distribution_ptr input_distribution, Pattern_ptr mainPattern,
+    SystemMatrixPattern(int type, escript::Distribution_ptr output_distribution,
+        escript::Distribution_ptr input_distribution, Pattern_ptr mainPattern,
         Pattern_ptr col_couplePattern, Pattern_ptr row_couplePattern,
         Connector_ptr col_connector, Connector_ptr row_connector);
 
-    ~SystemMatrixPattern()
-    {
-    }
+    ~SystemMatrixPattern() {}
 
     inline index_t getNumOutput() const {
         return mainPattern->numOutput;
@@ -60,14 +59,14 @@ struct SystemMatrixPattern : boost::enable_shared_from_this<SystemMatrixPattern>
                                          dim_t input_block_size);
 
     int type;
-    esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
     Pattern_ptr mainPattern;
     Pattern_ptr col_couplePattern;
     Pattern_ptr row_couplePattern;
     Connector_ptr col_connector;
     Connector_ptr row_connector;
-    Distribution_ptr output_distribution;
-    Distribution_ptr input_distribution;
+    escript::Distribution_ptr output_distribution;
+    escript::Distribution_ptr input_distribution;
 };
 
 
diff --git a/paso/src/SystemMatrixPattern_unrollBlocks.cpp b/paso/src/SystemMatrixPattern_unrollBlocks.cpp
index 1f9ab86..501af1b 100644
--- a/paso/src/SystemMatrixPattern_unrollBlocks.cpp
+++ b/paso/src/SystemMatrixPattern_unrollBlocks.cpp
@@ -34,7 +34,7 @@ SystemMatrixPattern_ptr SystemMatrixPattern::unrollBlocks(
                         dim_t input_block_size)
 {
     SystemMatrixPattern_ptr out;
-    Distribution_ptr new_output_distribution, new_input_distribution;
+    escript::Distribution_ptr new_output_distribution, new_input_distribution;
     Connector_ptr new_col_connector, new_row_connector;
 
     if ( (output_block_size == 1) && (input_block_size == 1) &&
@@ -48,7 +48,7 @@ SystemMatrixPattern_ptr SystemMatrixPattern::unrollBlocks(
         Pattern_ptr new_row_couplePattern(row_couplePattern->unrollBlocks(
                 newType, output_block_size, input_block_size));
         if (output_block_size > 1) {
-            new_output_distribution.reset(new Distribution(
+            new_output_distribution.reset(new escript::Distribution(
                     output_distribution->mpi_info,
                     output_distribution->first_component,
                     output_block_size, 0));
@@ -58,7 +58,7 @@ SystemMatrixPattern_ptr SystemMatrixPattern::unrollBlocks(
             new_row_connector = row_connector;
         }
         if (input_block_size > 1) {
-            new_input_distribution.reset(new Distribution(
+            new_input_distribution.reset(new escript::Distribution(
                     input_distribution->mpi_info,
                     input_distribution->first_component,
                     input_block_size, 0));
@@ -68,20 +68,15 @@ SystemMatrixPattern_ptr SystemMatrixPattern::unrollBlocks(
             new_col_connector = col_connector;
         }
 
-        if (Esys_noError()) {
-            out.reset(new SystemMatrixPattern(newType, new_output_distribution,
-                                              new_input_distribution,
-                                              new_mainPattern,
-                                              new_col_couplePattern,
-                                              new_row_couplePattern,
-                                              new_col_connector,
-                                              new_row_connector));
-        }
+        out.reset(new SystemMatrixPattern(newType, new_output_distribution,
+                                          new_input_distribution,
+                                          new_mainPattern,
+                                          new_col_couplePattern,
+                                          new_row_couplePattern,
+                                          new_col_connector,
+                                          new_row_connector));
     }
 
-    if (!Esys_noError()) {
-        return SystemMatrixPattern_ptr();
-    }
     return out;
 }
 
diff --git a/paso/src/SystemMatrix_MatrixVector.cpp b/paso/src/SystemMatrix_MatrixVector.cpp
index d1ac4c6..b4b9676 100644
--- a/paso/src/SystemMatrix_MatrixVector.cpp
+++ b/paso/src/SystemMatrix_MatrixVector.cpp
@@ -26,69 +26,59 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
 #include "SystemMatrix.h"
 
 namespace paso {
 
 /*  raw scaled vector update operation: out = alpha * A * in + beta * out */
-void SystemMatrix_MatrixVector(double alpha, SystemMatrix_ptr A,
-                               const double* in, double beta, double* out)
+void SystemMatrix::MatrixVector(double alpha, const double* in, double beta,
+                                double* out) const
 {
-    if (A->is_balanced) {
-        Esys_setError(VALUE_ERROR, "SystemMatrix_MatrixVector: balanced matrix is not supported.");
-        return;
+    if (is_balanced) {
+        throw PasoException("MatrixVector: balanced matrix is not supported.");
     }
-    if (A->type & MATRIX_FORMAT_CSC) {
-        if (A->mpi_info->size > 1) {
-            Esys_setError(SYSTEM_ERROR,"SystemMatrix_MatrixVector: CSC is not supported by MPI.");
-            return;
+    if (type & MATRIX_FORMAT_CSC) {
+        if (mpi_info->size > 1) {
+            throw PasoException("MatrixVector: CSC is not supported by MPI.");
         } else {
-            if (A->type & MATRIX_FORMAT_OFFSET1) {
-                SparseMatrix_MatrixVector_CSC_OFFSET1(alpha,A->mainBlock,in,beta,out);
+            if (type & MATRIX_FORMAT_OFFSET1) {
+                SparseMatrix_MatrixVector_CSC_OFFSET1(alpha, mainBlock, in, beta, out);
             } else {
-                SparseMatrix_MatrixVector_CSC_OFFSET0(alpha,A->mainBlock,in,beta,out);
+                SparseMatrix_MatrixVector_CSC_OFFSET0(alpha, mainBlock, in, beta, out);
             }
         }
-    } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
-        Esys_setError(SYSTEM_ERROR,"SystemMatrix_MatrixVector: TRILINOS is not supported with MPI.");
-        return;
     } else {
-        if (A->type & MATRIX_FORMAT_OFFSET1) {
-            if (A->mpi_info->size > 1) {
-                Esys_setError(SYSTEM_ERROR,"SystemMatrix_MatrixVector: CSR with offset 1 is not supported in MPI.");
-                return;
+        if (type & MATRIX_FORMAT_OFFSET1) {
+            if (mpi_info->size > 1) {
+                throw PasoException("MatrixVector: CSR with offset 1 is not supported in MPI.");
             } else {
-                SparseMatrix_MatrixVector_CSR_OFFSET1(alpha,A->mainBlock,in,beta,out);
+                SparseMatrix_MatrixVector_CSR_OFFSET1(alpha, mainBlock, in, beta, out);
             }
         } else {
-            if (Esys_noError()) {
-                SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
-            }
+            MatrixVector_CSR_OFFSET0(alpha, in, beta, out);
         }
     }
 }
 
-void SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha, SystemMatrix_ptr A,
-                                           const double* in, const double beta,
-                                           double* out)
+void SystemMatrix::MatrixVector_CSR_OFFSET0(double alpha, const double* in,
+                                            double beta, double* out) const
 {
     // start exchange
-    A->startCollect(in);
+    startCollect(in);
     // process main block
-    if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-        SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(alpha,A->mainBlock,in,beta,out);
+    if (type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+        SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(alpha, mainBlock, in, beta, out);
     } else {
-        SparseMatrix_MatrixVector_CSR_OFFSET0(alpha,A->mainBlock,in,beta,out);
+        SparseMatrix_MatrixVector_CSR_OFFSET0(alpha, mainBlock, in, beta, out);
     }
     // finish exchange
-    double* remote_values = A->finishCollect();
+    double* remote_values = finishCollect();
     // process couple block
-    if (A->col_coupleBlock->pattern->ptr != NULL) {
-        if (A->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
-            SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(alpha,A->col_coupleBlock,remote_values,1.,out);
+    if (col_coupleBlock->pattern->ptr != NULL) {
+        if (type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
+            SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(alpha, col_coupleBlock, remote_values, 1., out);
         } else {
-            SparseMatrix_MatrixVector_CSR_OFFSET0(alpha,A->col_coupleBlock,remote_values,1.,out);
+            SparseMatrix_MatrixVector_CSR_OFFSET0(alpha, col_coupleBlock, remote_values, 1., out);
         }
     }
 }
diff --git a/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp b/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp
index 69efde7..15fd67b 100644
--- a/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp
+++ b/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp
@@ -53,14 +53,14 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
     const dim_t mpi_size = mpi_info->size;
     index_t num_main_cols = mainBlock->numCols;
     double* cols = new double[num_main_cols];
-    const index_t offset = col_distribution->first_component[rank];
+    const index_t offset = col_distribution->getFirstComponent();
 #pragma omp parallel for
     for (index_t i=0; i<num_main_cols; ++i)
         cols[i] = offset + i;
 
     Coupler_ptr coupler;
     if (!global_id) {
-        coupler.reset(new Coupler(col_coupler->connector, 1));
+        coupler.reset(new Coupler(col_coupler->connector, 1, mpi_info));
         coupler->startCollect(cols);
     }
 
@@ -78,7 +78,7 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
     const index_t overlapped_n = row_coupleBlock->numRows;
     SharedComponents_ptr send(row_coupler->connector->send);
     SharedComponents_ptr recv(row_coupler->connector->recv);
-    index_t num_neighbors = send->numNeighbors;
+    const dim_t numNeighbours = send->neighbour.size();
     const size_t block_size_size = block_size * sizeof(double);
 
     // waiting for receiving unknown's global ID
@@ -110,37 +110,36 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
 
     // first, prepare the ptr_ptr to be received
     index_t* ptr_ptr = new index_t[overlapped_n+1];
-    for (index_t p=0; p<recv->numNeighbors; p++) {
+    for (index_t p=0; p<recv->neighbour.size(); p++) {
         const index_t row = recv->offsetInShared[p];
         const index_t i = recv->offsetInShared[p+1];
-        MPI_Irecv(&(ptr_ptr[row]), i-row, MPI_INT, recv->neighbor[p],
-                mpi_info->msg_tag_counter+recv->neighbor[p],
-                mpi_info->comm,
-                &(row_coupler->mpi_requests[p]));
+        MPI_Irecv(&ptr_ptr[row], i-row, MPI_INT, recv->neighbour[p],
+                mpi_info->counter()+recv->neighbour[p],
+                mpi_info->comm, &row_coupler->mpi_requests[p]);
     }
 
     // now prepare the rows to be sent (the degree, the offset and the data)
-    index_t p = send->offsetInShared[num_neighbors];
+    index_t p = send->offsetInShared[numNeighbours];
     len = 0;
-    for (index_t i=0; i<num_neighbors; i++) {
+    for (index_t i=0; i<numNeighbours; i++) {
         // #cols per row X #rows
-        len += recv_buf[send->neighbor[i]] *
+        len += recv_buf[send->neighbour[i]] *
                 (send->offsetInShared[i+1] - send->offsetInShared[i]);
     }
     double* send_buf = new double[len*block_size];
     index_t* send_idx = new index_t[len];
     int* send_offset = new int[p+1];
-    int* send_degree = new int[num_neighbors];
+    int* send_degree = new int[numNeighbours];
 
     index_t k, l, m, n, q;
     len = 0;
     index_t base = 0;
     index_t i0 = 0;
-    for (p=0; p<num_neighbors; p++) {
+    for (p=0; p<numNeighbours; p++) {
         index_t i = i0;
-        const index_t neighbor = send->neighbor[p];
-        const index_t l_ub = recv_offset[neighbor+1];
-        const index_t l_lb = recv_offset[neighbor];
+        const int neighbour = send->neighbour[p];
+        const index_t l_ub = recv_offset[neighbour+1];
+        const index_t l_lb = recv_offset[neighbour];
         const index_t j_ub = send->offsetInShared[p + 1];
         for (index_t j=send->offsetInShared[p]; j<j_ub; j++) {
             const index_t row = send->shared[j];
@@ -217,17 +216,16 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
         }
 
         /* sending */
-        MPI_Issend(&send_offset[i0], i-i0, MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank, mpi_info->comm,
-                &row_coupler->mpi_requests[p+recv->numNeighbors]);
+        MPI_Issend(&send_offset[i0], i-i0, MPI_INT, send->neighbour[p],
+                mpi_info->counter()+rank, mpi_info->comm,
+                &row_coupler->mpi_requests[p+recv->neighbour.size()]);
         send_degree[p] = len;
         i0 = i;
     }
 
-    MPI_Waitall(row_coupler->connector->send->numNeighbors +
-                    row_coupler->connector->recv->numNeighbors,
+    mpi_info->incCounter(mpi_size);
+    MPI_Waitall(numNeighbours + recv->neighbour.size(),
                 row_coupler->mpi_requests, row_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
 
     len = 0;
     for (index_t i=0; i<overlapped_n; i++) {
@@ -240,29 +238,28 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
 
     // send/receive index array
     index_t j=0;
-    for (p=0; p<recv->numNeighbors; p++) {
+    for (p = 0; p < recv->neighbour.size(); p++) {
         const index_t i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
         if (i > 0)
-            MPI_Irecv(&ptr_idx[j], i, MPI_INT, recv->neighbor[p],
-                mpi_info->msg_tag_counter+recv->neighbor[p], mpi_info->comm,
+            MPI_Irecv(&ptr_idx[j], i, MPI_INT, recv->neighbour[p],
+                mpi_info->counter()+recv->neighbour[p], mpi_info->comm,
                 &row_coupler->mpi_requests[p]);
         j += i;
     }
 
     j=0;
-    for (p=0; p<num_neighbors; p++) {
+    for (p = 0; p < numNeighbours; p++) {
         const index_t i = send_degree[p] - j;
         if (i > 0)
-            MPI_Issend(&send_idx[j], i, MPI_INT, send->neighbor[p],
-                mpi_info->msg_tag_counter+rank, mpi_info->comm,
-                &row_coupler->mpi_requests[p+recv->numNeighbors]);
+            MPI_Issend(&send_idx[j], i, MPI_INT, send->neighbour[p],
+                mpi_info->counter()+rank, mpi_info->comm,
+                &row_coupler->mpi_requests[p+recv->neighbour.size()]);
         j = send_degree[p];
     }
 
-    MPI_Waitall(row_coupler->connector->send->numNeighbors +
-                         row_coupler->connector->recv->numNeighbors,
+    mpi_info->incCounter(mpi_size);
+    MPI_Waitall(numNeighbours + recv->neighbour.size(),
                 row_coupler->mpi_requests, row_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
 
     // allocate pattern and sparse matrix for remote_coupleBlock
     Pattern_ptr pattern(new Pattern(row_coupleBlock->pattern->type,
@@ -272,31 +269,30 @@ void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
 
     // send/receive value array
     j=0;
-    for (p=0; p<recv->numNeighbors; p++) {
+    for (p = 0; p < recv->neighbour.size(); p++) {
         const index_t i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
         if (i > 0)
             MPI_Irecv(&remote_coupleBlock->val[j], i * block_size,
-                MPI_DOUBLE, recv->neighbor[p],
-                mpi_info->msg_tag_counter+recv->neighbor[p], mpi_info->comm,
+                MPI_DOUBLE, recv->neighbour[p],
+                mpi_info->counter()+recv->neighbour[p], mpi_info->comm,
                 &row_coupler->mpi_requests[p]);
         j += i*block_size;
     }
 
     j=0;
-    for (p=0; p<num_neighbors; p++) {
+    for (p=0; p<numNeighbours; p++) {
         const index_t i = send_degree[p] - j;
         if (i > 0)
             MPI_Issend(&send_buf[j*block_size], i*block_size, MPI_DOUBLE,
-                       send->neighbor[p], mpi_info->msg_tag_counter+rank,
+                       send->neighbour[p], mpi_info->counter()+rank,
                        mpi_info->comm,
-                       &row_coupler->mpi_requests[p+recv->numNeighbors]);
+                       &row_coupler->mpi_requests[p+recv->neighbour.size()]);
         j = send_degree[p];
     }
 
-    MPI_Waitall(row_coupler->connector->send->numNeighbors +
-                     row_coupler->connector->recv->numNeighbors,
+    mpi_info->incCounter(mpi_size);
+    MPI_Waitall(numNeighbours + recv->neighbour.size(),
                 row_coupler->mpi_requests, row_coupler->mpi_stati);
-    ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
 
     // release all temp memory allocation
     delete[] cols;
diff --git a/paso/src/SystemMatrix_debug.cpp b/paso/src/SystemMatrix_debug.cpp
index b922e4f..dd13c68 100644
--- a/paso/src/SystemMatrix_debug.cpp
+++ b/paso/src/SystemMatrix_debug.cpp
@@ -24,10 +24,11 @@
  Author: Lutz Gross, l.gross at uq.edu.au
 
 *****************************************************************************/
-#include <iostream>
+
 #include "SystemMatrix.h"
 
 #include <cstring> // strcat
+#include <iostream>
 
 namespace paso {
 
@@ -37,13 +38,12 @@ void SystemMatrix::fillWithGlobalCoordinates(double f1)
 {
     const dim_t n = getNumRows();
     const dim_t m = getNumCols();
-    const index_t me = mpi_info->rank;
-    const index_t row_offset = row_distribution->first_component[me];
-    const index_t col_offset = col_distribution->first_component[me];
+    const index_t row_offset = row_distribution->getFirstComponent();
+    const index_t col_offset = col_distribution->getFirstComponent();
     double* cols = new double[m];
     double* rows = new double[n];
-    Coupler_ptr col_couple(new Coupler(col_coupler->connector, 1));
-    Coupler_ptr row_couple(new Coupler(col_coupler->connector, 1));
+    Coupler_ptr col_couple(new Coupler(col_coupler->connector, 1, mpi_info));
+    Coupler_ptr row_couple(new Coupler(col_coupler->connector, 1, mpi_info));
 
 #pragma omp parallel for
     for (dim_t i=0; i<n; ++i)
diff --git a/paso/src/SystemMatrix_extendedRows.cpp b/paso/src/SystemMatrix_extendedRows.cpp
index 2d8fd6f..20d44e9 100644
--- a/paso/src/SystemMatrix_extendedRows.cpp
+++ b/paso/src/SystemMatrix_extendedRows.cpp
@@ -31,8 +31,8 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
 #include "SystemMatrix.h"
+#include "PasoUtil.h"
 
 #include <cstring> // memcpy
 
@@ -48,7 +48,7 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
     double* cols = new double[num_main_cols];
     const index_t rank = mpi_info->rank;
     const index_t offset = col_distribution->first_component[rank];
-    index_t i, j, k, p, z, z0, z1, size, len;
+    index_t i, j, k, p, z, z0, z1, size;
 
 #pragma omp parallel for private(i) schedule(static)
     for (i=0; i<num_main_cols; ++i)
@@ -56,7 +56,7 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
 
     Coupler_ptr coupler;
     if (global_id == NULL) {
-        coupler.reset(new Coupler(col_coupler->connector, 1));
+        coupler.reset(new Coupler(col_coupler->connector, 1, mpi_info));
         coupler->startCollect(cols);
     }
 
@@ -72,7 +72,7 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
     index_t* recv_offset_ST = new index_t[overlapped_n+1];
     dim_t * recv_degree_ST = new dim_t[overlapped_n];
     index_t* send_ST = new index_t[offset_ST[my_n]];
-    len = row_coupler->connector->send->offsetInShared[row_coupler->connector->send->numNeighbors] * size;
+    dim_t len = row_coupler->connector->send->numSharedComponents * size;
     index_t* send_buf = new index_t[len];
 
     // waiting for receiving unknown's global ID
@@ -85,7 +85,7 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
     }
 
     // sending/receiving the degree_ST
-    coupler.reset(new Coupler(row_coupler->connector, 1));
+    coupler.reset(new Coupler(row_coupler->connector, 1, mpi_info));
     coupler->startCollect(rows);
 
     // prepare ST with global ID
@@ -136,24 +136,25 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
 
     // preparing degree_ST and offset_ST for the to-be-received extended rows
 #pragma omp parallel for private(i) schedule(static)
-    for (i=0; i<overlapped_n; i++) recv_degree_ST[i] = coupler->recv_buffer[i];
+    for (i = 0; i < overlapped_n; i++)
+        recv_degree_ST[i] = coupler->recv_buffer[i];
     recv_offset_ST[0] = 0;
-    for (i=0; i<overlapped_n; i++) {
+    for (i = 0; i < overlapped_n; i++) {
         recv_offset_ST[i+1] = recv_offset_ST[i] + coupler->recv_buffer[i];
     }
     index_t* recv_ST = new index_t[recv_offset_ST[overlapped_n]];
     coupler.reset();
 
-    /* receiving ST for the extended rows */
+    // receiving ST for the extended rows
     z = 0;
-    for (p=0; p<row_coupler->connector->recv->numNeighbors; p++) {
+    for (p=0; p<row_coupler->connector->recv->neighbour.size(); p++) {
         const index_t j_min = row_coupler->connector->recv->offsetInShared[p];
         const index_t j_max = row_coupler->connector->recv->offsetInShared[p+1];
         j = recv_offset_ST[j_max] - recv_offset_ST[j_min];
 #ifdef ESYS_MPI
         MPI_Irecv(&recv_ST[z], j, MPI_INT,
-                row_coupler->connector->recv->neighbor[p],
-                mpi_info->msg_tag_counter+row_coupler->connector->recv->neighbor[p],
+                row_coupler->connector->recv->neighbour[p],
+                mpi_info->counter()+row_coupler->connector->recv->neighbour[p],
                 mpi_info->comm, &row_coupler->mpi_requests[p]);
 #endif
         z += j;
@@ -161,23 +162,22 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
 
     /* sending ST for the extended rows */
     z0 = 0;
-    for (p=0; p<row_coupler->connector->send->numNeighbors; p++) {
+    for (p=0; p<row_coupler->connector->send->neighbour.size(); p++) {
         const index_t j_min = row_coupler->connector->send->offsetInShared[p];
         const index_t j_max = row_coupler->connector->send->offsetInShared[p+1];
         z = z0;
         for (j=j_min; j<j_max; j++) {
             const index_t row=row_coupler->connector->send->shared[j];
             if (degree_ST[row] > 0) {
-                memcpy(&(send_buf[z]), &(send_ST[offset_ST[row]]), degree_ST[row] * sizeof(index_t));
+                memcpy(&send_buf[z], &send_ST[offset_ST[row]], degree_ST[row] * sizeof(index_t));
                 z += degree_ST[row];
             }
         }
 #ifdef ESYS_MPI
         MPI_Issend(&send_buf[z0], z-z0, MPI_INT,
-                 row_coupler->connector->send->neighbor[p],
-                 mpi_info->msg_tag_counter+mpi_info->rank,
-                 mpi_info->comm,
-                 &row_coupler->mpi_requests[p+row_coupler->connector->recv->numNeighbors]);
+                 row_coupler->connector->send->neighbour[p],
+                 mpi_info->counter()+mpi_info->rank, mpi_info->comm,
+                 &row_coupler->mpi_requests[p+row_coupler->connector->recv->neighbour.size()]);
 #endif
         z0 = z;
     }
@@ -213,11 +213,11 @@ void SystemMatrix::extendedRowsForST(dim_t* degree_ST, index_t* offset_ST,
 
     // wait until everything is done
 #ifdef ESYS_MPI
-    MPI_Waitall(row_coupler->connector->send->numNeighbors +
-                    row_coupler->connector->recv->numNeighbors,
+    mpi_info->incCounter(mpi_info->size);
+    MPI_Waitall(row_coupler->connector->send->neighbour.size() +
+                    row_coupler->connector->recv->neighbour.size(),
                     row_coupler->mpi_requests, row_coupler->mpi_stati);
 #endif
-    ESYS_MPI_INC_COUNTER(*mpi_info, mpi_info->size)
 
     // filter the received ST (for extended rows) with cols in mainBlock as
     // well as cols in col_coupleBlock, their global ids are listed in "B"
diff --git a/paso/src/SystemMatrix_loadMM.cpp b/paso/src/SystemMatrix_loadMM.cpp
index c36b80f..0bd8bf7 100644
--- a/paso/src/SystemMatrix_loadMM.cpp
+++ b/paso/src/SystemMatrix_loadMM.cpp
@@ -26,11 +26,8 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
-#include "mmio.h"
 #include "SystemMatrix.h"
-
-#include "limits.h"
+#include "mmio.h"
 
 namespace paso {
 
@@ -70,7 +67,7 @@ void q_sort(index_t *row, index_t *col, double *val, int begin, int end)
             /* This whole section is for checking lval<pivot, where
             pivot=N*row[begin]+col[begin] and lval=N*row[l]+col[l]. */
             if (row[l]<row[begin]) {
-                if (ABS(row[l]-row[begin])==1 && ABS(col[l]-col[begin])==N)
+                if (std::abs(row[l]-row[begin])==1 && std::abs(col[l]-col[begin])==N)
                     flag=0;
                 else
                     flag=1;
@@ -80,7 +77,7 @@ void q_sort(index_t *row, index_t *col, double *val, int begin, int end)
                 else
                     flag=0;
             } else {
-                if (ABS(row[l]-row[begin])==1 && ABS(col[l]-col[begin])==N)
+                if (std::abs(row[l]-row[begin])==1 && std::abs(col[l]-col[begin])==N)
                     flag=1;
                 else
                     flag=0;
@@ -109,37 +106,31 @@ SystemMatrix_ptr SystemMatrix::loadMM_toCSR(const char *filename)
     SystemMatrix_ptr out;
     int curr_row;
     MM_typecode matrixCode;
-    esysUtils::JMPI mpi_info=esysUtils::makeInfo(MPI_COMM_WORLD);
-    Esys_resetError();
+    escript::JMPI mpi_info = escript::makeInfo(MPI_COMM_WORLD);
     if (mpi_info->size > 1) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSR: supports single processor only");
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSR: supports single processor only");
     }
 
     // open the file
     std::ifstream f(filename);
     if (!f.good()) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSR: Cannot open file for reading.");
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSR: Cannot open file for reading.");
     }
 
     // process banner
     if (mm_read_banner(f, &matrixCode) != 0) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSR: Error processing MM banner.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSR: Error processing MM banner.");
     }
     if ( !(mm_is_real(matrixCode) && mm_is_sparse(matrixCode) && mm_is_general(matrixCode)) ) {
-        Esys_setError(TYPE_ERROR, "SystemMatrix::loadMM_toCSR: found Matrix Market type is not supported.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSR: found Matrix Market type is not supported.");
     }
 
     // get matrix size
     if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSR: Could not read sparse matrix size.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSR: Could not read sparse matrix size.");
     }
 
     // prepare storage
@@ -179,22 +170,22 @@ SystemMatrix_ptr SystemMatrix::loadMM_toCSR(const char *filename)
     row_ptr[M] = nz;
 
     // create return value
-    index_t dist[2];
-    dist[0]=0;
-    dist[1]=M;
-    Distribution_ptr output_dist(new Distribution(mpi_info, dist, 1, 0));
-    dist[1]=N;
-    Distribution_ptr input_dist(new Distribution(mpi_info, dist, 1, 0));
+    std::vector<index_t> dist(2);
+    dist[1] = M;
+    escript::Distribution_ptr output_dist(new escript::Distribution(mpi_info, dist));
+    dist[1] = N;
+    escript::Distribution_ptr input_dist(new escript::Distribution(mpi_info, dist));
     Pattern_ptr mainPattern(new Pattern(MATRIX_FORMAT_DEFAULT, M, N, row_ptr, col_ind));
     Pattern_ptr couplePattern(new Pattern(MATRIX_FORMAT_DEFAULT, M, N, NULL, NULL));
-    dist[0]=M;
-    SharedComponents_ptr send(new SharedComponents(
-                                    M, 0, NULL, NULL, dist, 1, 0, mpi_info));
+    dist[0] = M;
+    SharedComponents_ptr send(new SharedComponents(M, std::vector<int>(),
+                                                   NULL, dist));
     Connector_ptr connector(new Connector(send, send));
     SystemMatrixPattern_ptr pattern(new SystemMatrixPattern(
                 MATRIX_FORMAT_DEFAULT, output_dist, input_dist, mainPattern,
                 couplePattern, couplePattern, connector, connector));
-    out.reset(new SystemMatrix(MATRIX_FORMAT_DEFAULT, pattern, 1, 1, true));
+    out.reset(new SystemMatrix(MATRIX_FORMAT_DEFAULT, pattern, 1, 1, true,
+                escript::FunctionSpace(), escript::FunctionSpace()));
 
     // copy values
 #pragma omp parallel for
@@ -218,38 +209,31 @@ SystemMatrix_ptr SystemMatrix::loadMM_toCSC(const char* filename)
     double *val = NULL;
     int curr_col=0;
     MM_typecode matrixCode;
-    esysUtils::JMPI mpi_info=esysUtils::makeInfo( MPI_COMM_WORLD);
+    escript::JMPI mpi_info = escript::makeInfo(MPI_COMM_WORLD);
     if (mpi_info->size > 1) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSC: supports single processor only");
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSC: supports single processor only");
     }
 
-    Esys_resetError();
-
     // open the file
     std::ifstream f(filename);
     if (!f.good()) {
-        Esys_setError(IO_ERROR, "SystemMatrix::loadMM_toCSC: File could not be opened for reading.");
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSC: File could not be opened for reading.");
     }
 
     // process banner
     if (mm_read_banner(f, &matrixCode) != 0) {
-        Esys_setError(IO_ERROR,"SystemMatrix::loadMM_toCSC: Error processing MM banner.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSC: Error processing MM banner.");
     }
     if( !(mm_is_real(matrixCode) && mm_is_sparse(matrixCode) && mm_is_general(matrixCode)) ) {
-        Esys_setError(TYPE_ERROR, "SystemMatrix::loadMM_toCSC: found Matrix Market type is not supported.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSC: found Matrix Market type is not supported.");
     }
 
     // get matrix size
     if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) {
-        Esys_setError(TYPE_ERROR, "SystemMatrix::loadMM_toCSC: found Matrix Market type is not supported.");
         f.close();
-        return out;
+        throw PasoException("SystemMatrix::loadMM_toCSC: found Matrix Market type is not supported.");
     }
 
     // prepare storage
@@ -285,21 +269,22 @@ SystemMatrix_ptr SystemMatrix::loadMM_toCSC(const char* filename)
     }
     col_ptr[N] = nz;
 
-    index_t dist[2];
-    dist[0]=0;
-    dist[1]=N;
-    Distribution_ptr output_dist(new Distribution(mpi_info, dist,1,0));
-    dist[1]=M;
-    Distribution_ptr input_dist(new Distribution(mpi_info, dist,1,0));
+    std::vector<index_t> dist(2);
+    dist[1] = N;
+    escript::Distribution_ptr output_dist(new escript::Distribution(mpi_info, dist));
+    dist[1] = M;
+    escript::Distribution_ptr input_dist(new escript::Distribution(mpi_info, dist));
     mainPattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT,N,M,col_ptr,col_ind));
     couplePattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT,N,M,NULL,NULL));
-    SharedComponents_ptr send(new SharedComponents(
-                    N, 0, NULL, NULL, NULL, 1, 0, mpi_info));
-    connector.reset(new Connector(send,send));
+    SharedComponents_ptr send(new SharedComponents(N, std::vector<int>(),
+                                     NULL, std::vector<index_t>()));
+    connector.reset(new Connector(send, send));
     pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
                 output_dist, input_dist, mainPattern, couplePattern,
                 couplePattern, connector, connector));
-    out.reset(new SystemMatrix(MATRIX_FORMAT_CSC, pattern, 1, 1, true));
+    out.reset(new SystemMatrix(MATRIX_FORMAT_CSC, pattern, 1, 1, true,
+                               escript::FunctionSpace(),
+                               escript::FunctionSpace()));
 
     // copy values
 #pragma omp parallel for
@@ -314,39 +299,36 @@ SystemMatrix_ptr SystemMatrix::loadMM_toCSC(const char* filename)
 void RHS_loadMM_toCSR(const char *filename, double *b, dim_t size)
 {
     MM_typecode matrixCode;
-    Esys_resetError();
     // open the file
     std::ifstream f(filename);
     if (!f.good()) {
-        Esys_setError(IO_ERROR, "RHS_loadMM_toCSR: Cannot open file for reading.");
+        throw PasoException("RHS_loadMM_toCSR: Cannot open file for reading.");
     }
 
     // process banner
     if (mm_read_banner(f, &matrixCode) != 0) {
-        Esys_setError(IO_ERROR, "RHS_loadMM_toCSR: Error processing MM banner.");
+        throw PasoException("RHS_loadMM_toCSR: Error processing MM banner.");
     }
     if( !(mm_is_real(matrixCode) && mm_is_general(matrixCode) && mm_is_array(matrixCode)) ) {
-        Esys_setError(TYPE_ERROR,"RHS_loadMM_toCSR: found Matrix Market type is not supported.");
+        throw PasoException("RHS_loadMM_toCSR: found Matrix Market type is not supported.");
     }
 
     // get matrix size
     if (mm_read_mtx_array_size(f, &M, &N) != 0) {
-        Esys_setError(IO_ERROR, "RHS_loadMM_toCSR: Could not read sparse matrix size.");
+        throw PasoException("RHS_loadMM_toCSR: Could not read sparse matrix size.");
     }
 
     if (M != size) {
-        Esys_setError(IO_ERROR, "RHS_loadMM_toCSR: Actual and provided sizes do not match.");
+        throw PasoException("RHS_loadMM_toCSR: Actual and provided sizes do not match.");
     }
 
-    if (Esys_noError()) {
-        nz=M;
-        // perform actual read of elements
-        for (int i=0; i<nz; i++) {
-            f >> b[i];
-            if (!f.good()) {
-                f.close();
-                Esys_setError(IO_ERROR, "RHS_loadMM_toCSR: Could not read some of the values.");
-            }
+    nz=M;
+    // perform actual read of elements
+    for (int i=0; i<nz; i++) {
+        f >> b[i];
+        if (!f.good()) {
+            f.close();
+            throw PasoException("RHS_loadMM_toCSR: Could not read some of the values.");
         }
     }
     f.close();
diff --git a/paso/src/SystemMatrix_mergeMainAndCouple.cpp b/paso/src/SystemMatrix_mergeMainAndCouple.cpp
index 0cde64b..95737d7 100644
--- a/paso/src/SystemMatrix_mergeMainAndCouple.cpp
+++ b/paso/src/SystemMatrix_mergeMainAndCouple.cpp
@@ -33,7 +33,6 @@
 
 /****************************************************************************/
 
-#include "Paso.h"
 #include "SystemMatrix.h"
 
 namespace paso {
@@ -47,12 +46,10 @@ void SystemMatrix::mergeMainAndCouple(index_t** p_ptr, index_t** p_idx, double**
         if (type & (MATRIX_FORMAT_OFFSET1 + MATRIX_FORMAT_BLK1)) {
             mergeMainAndCouple_CSC_OFFSET1(p_ptr, p_idx, p_val);
         } else {
-            Esys_setError(SYSTEM_ERROR, "SystemMatrix::mergeMainAndCouple: CSC with index 0 or block size larger than 1 is not supported.");
+            throw PasoException("SystemMatrix::mergeMainAndCouple: CSC with index 0 or block size larger than 1 is not supported.");
         }
-    } else if (type & MATRIX_FORMAT_TRILINOS_CRS) {
-        Esys_setError(SYSTEM_ERROR, "SystemMatrix::mergeMainAndCouple: TRILINOS is not supported.");
     } else {
-        Esys_setError(SYSTEM_ERROR,"SystemMatrix::mergeMainAndCouple: CRS is not supported.");
+        throw PasoException("SystemMatrix::mergeMainAndCouple: CRS is not supported.");
     }
 }
 
@@ -94,22 +91,20 @@ void SystemMatrix::mergeMainAndCouple_CSR_OFFSET0(index_t** p_ptr, index_t** p_i
     const index_t couple_num_rows = col_coupleBlock->numRows;
 
     if (main_num_rows != couple_num_rows) {
-        Esys_setError(TYPE_ERROR, "SystemMatrix::mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
-        return;
+        throw PasoException("SystemMatrix::mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
     }
 
     double* rows = NULL;
-    const index_t rank = mpi_info->rank;
     Coupler_ptr coupler;
     if (global_id == NULL) {
         // prepare for global coordinates in colCoupleBlock, the results are
         // in coupler->recv_buffer
         rows = new double[main_num_rows];
-        const index_t row_offset = row_distribution->first_component[rank];
+        const index_t row_offset = row_distribution->getFirstComponent();
 #pragma omp parallel for
         for (index_t i=0; i<main_num_rows; ++i)
             rows[i] = row_offset+i;
-        coupler.reset(new Coupler(col_coupler->connector, 1));
+        coupler.reset(new Coupler(col_coupler->connector, 1, mpi_info));
         coupler->startCollect(rows);
     }
 
@@ -117,7 +112,7 @@ void SystemMatrix::mergeMainAndCouple_CSR_OFFSET0(index_t** p_ptr, index_t** p_i
     index_t* couple_ptr = col_coupleBlock->pattern->ptr;
     index_t* couple_idx = col_coupleBlock->pattern->index;
     double*  couple_val = col_coupleBlock->val;
-    const index_t col_offset = col_distribution->first_component[rank];
+    const index_t col_offset = col_distribution->getFirstComponent();
     const index_t main_num_vals = main_ptr[main_num_rows]-main_ptr[0];
     const index_t couple_num_vals = couple_ptr[couple_num_rows]-couple_ptr[0];
     const index_t num_vals = main_num_vals + couple_num_vals;
@@ -203,22 +198,20 @@ void SystemMatrix::mergeMainAndCouple_CSR_OFFSET0_Block(index_t** p_ptr, index_t
     const index_t couple_num_rows = col_coupleBlock->numRows;
 
     if (main_num_rows != couple_num_rows) {
-        Esys_setError(TYPE_ERROR, "SystemMatrix_mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
-        return;
+        throw PasoException("SystemMatrix_mergeMainAndCouple_CSR_OFFSET0: number of rows do not match.");
     }
 
     double* rows = NULL;
-    const index_t rank = mpi_info->rank;
     Coupler_ptr coupler;
     if (global_id == NULL) {
         // prepare for global coordinates in colCoupleBlock, the results are
         // in coupler->recv_buffer
         rows = new double[main_num_rows];
-        const index_t row_offset = row_distribution->first_component[rank];
+        const index_t row_offset = row_distribution->getFirstComponent();
 #pragma omp parallel for
         for (index_t i=0; i<main_num_rows; ++i)
             rows[i]=row_offset+i;
-        coupler.reset(new Coupler(col_coupler->connector, 1));
+        coupler.reset(new Coupler(col_coupler->connector, 1, mpi_info));
         coupler->startCollect(rows);
     }
 
@@ -226,7 +219,7 @@ void SystemMatrix::mergeMainAndCouple_CSR_OFFSET0_Block(index_t** p_ptr, index_t
     index_t* couple_ptr = col_coupleBlock->pattern->ptr;
     index_t* couple_idx = col_coupleBlock->pattern->index;
     double*  couple_val = col_coupleBlock->val;
-    const index_t col_offset = col_distribution->first_component[rank];
+    const index_t col_offset = col_distribution->getFirstComponent();
     const index_t main_num_vals = main_ptr[main_num_rows]-main_ptr[0];
     const index_t couple_num_vals = couple_ptr[couple_num_rows]-couple_ptr[0];
     const index_t num_vals = main_num_vals + couple_num_vals;
@@ -282,7 +275,7 @@ void SystemMatrix::mergeMainAndCouple_CSR_OFFSET0_Block(index_t** p_ptr, index_t
 
 void SystemMatrix::mergeMainAndCouple_CSC_OFFSET1(index_t** p_ptr, index_t** p_idx, double** p_val) const
 {
-    Esys_setError(TYPE_ERROR, "SystemMatrix_mergeMainAndCouple_CSC_OFFSET1: not implemented.");
+    throw PasoException("SystemMatrix_mergeMainAndCouple_CSC_OFFSET1: not implemented.");
 }
 
 } // namespace paso
diff --git a/paso/src/TFQMR.cpp b/paso/src/TFQMR.cpp
index 7fec473..af4951f 100644
--- a/paso/src/TFQMR.cpp
+++ b/paso/src/TFQMR.cpp
@@ -59,14 +59,14 @@ namespace paso {
 #define USE_DYNAMIC_SCHEDULING
 #endif
 
-err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
-                   double* tolerance, Performance* pp)
+SolverResult Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
+                          double* tolerance, Performance* pp)
 {
     int m=1;
     int j=0;
     dim_t num_iter=0;
     bool breakFlag=false, maxIterFlag=false, convergeFlag=false;
-    err_t status = SOLVER_NO_ERROR;
+    SolverResult status = NoError;
     const dim_t n = A->getTotalNumRows();
     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
     double norm_of_residual;
@@ -84,7 +84,7 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     dim_t maxit = *iter;
 
     if (maxit <= 0) {
-        status = SOLVER_INPUT_ERROR;
+        status = InputError;
     }
 
     util::zeroes(n, x);
@@ -102,7 +102,7 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
 
     Performance_startMonitor(pp, PERFORMANCE_MVM);
-    SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1, PASO_ZERO, temp_vector);
+    A->MatrixVector_CSR_OFFSET0(PASO_ONE, y1, PASO_ZERO, temp_vector);
     Performance_stopMonitor(pp, PERFORMANCE_MVM);
     Performance_startMonitor(pp, PERFORMANCE_SOLVER);
 
@@ -121,9 +121,9 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
     rho = tau * tau;
     norm_of_residual=tau;
 
-    while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) {
+    while (!(convergeFlag || maxIterFlag || breakFlag || (status!=NoError) )) {
         sigma = util::innerProduct(n,res,v,A->mpi_info);
-        if (! (breakFlag = (ABS(sigma) == 0.))) {
+        if (! (breakFlag = (std::abs(sigma) == 0.))) {
             alpha = rho / sigma;
             for (j=0; j<=1; j=j+1) {
                 // Compute y2 and u2 only if you have to
@@ -133,7 +133,7 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
 
                     Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
                     Performance_startMonitor(pp, PERFORMANCE_MVM);
-                    SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
+                    A->MatrixVector_CSR_OFFSET0(PASO_ONE, y2,PASO_ZERO,temp_vector);
                     Performance_stopMonitor(pp, PERFORMANCE_MVM);
                     Performance_startMonitor(pp, PERFORMANCE_SOLVER);
 
@@ -166,7 +166,7 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
                 util::update(n,1.,x,eta,d);
             }
 
-            breakFlag = (ABS(rho) == 0);
+            breakFlag = (std::abs(rho) == 0);
 
             rhon = util::innerProduct(n, res, w, A->mpi_info);
             beta = rhon / rho;
@@ -177,7 +177,7 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
 
             Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
             Performance_startMonitor(pp, PERFORMANCE_MVM);
-            SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1, PASO_ZERO, temp_vector);
+            A->MatrixVector_CSR_OFFSET0(PASO_ONE, y1, PASO_ZERO, temp_vector);
             Performance_stopMonitor(pp, PERFORMANCE_MVM);
 
             Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER);
@@ -196,9 +196,9 @@ err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
         convergeFlag = (norm_of_residual<(*tolerance));
 
         if (maxIterFlag) {
-            status = SOLVER_MAXITER_REACHED;
+            status = MaxIterReached;
         } else if (breakFlag) {
-            status = SOLVER_BREAKDOWN;
+            status = Breakdown;
         }
         ++num_iter;
     } // end of iterations
diff --git a/paso/src/Transport.cpp b/paso/src/Transport.cpp
index 3561be2..5e3cf08 100644
--- a/paso/src/Transport.cpp
+++ b/paso/src/Transport.cpp
@@ -30,10 +30,22 @@
 #include "Preconditioner.h"
 #include "Solver.h" // only for resetting
 
+#include <escript/Data.h>
+
+#include <limits>
+
+namespace bp = boost::python;
+
+using escript::ValueError;
+
 namespace paso {
 
+static const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
+
 TransportProblem::TransportProblem(SystemMatrixPattern_ptr pattern,
-                                   int block_size) :
+                                   int block_size,
+                                   const escript::FunctionSpace& functionspace) :
+    AbstractTransportProblem(block_size, functionspace),
     valid_matrices(false),
     dt_max_R(LARGE_POSITIVE_FLOAT),
     dt_max_T(LARGE_POSITIVE_FLOAT),
@@ -47,26 +59,26 @@ TransportProblem::TransportProblem(SystemMatrixPattern_ptr pattern,
     SystemMatrixType matrix_type = MATRIX_FORMAT_DEFAULT+MATRIX_FORMAT_BLK1;
 
     transport_matrix.reset(new SystemMatrix(matrix_type, pattern, block_size,
-                                            block_size, false));
+                                            block_size, false,
+                                            functionspace, functionspace));
     mass_matrix.reset(new SystemMatrix(matrix_type, pattern, block_size,
-                                       block_size, false));
+                                       block_size, false, functionspace,
+                                       functionspace));
 
     mpi_info = pattern->mpi_info;
 
-    if (Esys_noError()) {
-        const dim_t n = transport_matrix->getTotalNumRows();
-        constraint_mask = new double[n];
-        lumped_mass_matrix = new double[n];
-        reactive_matrix = new double[n];
-        main_diagonal_mass_matrix = new double[n];
-        main_diagonal_low_order_transport_matrix = new double[n];
+    const dim_t n = transport_matrix->getTotalNumRows();
+    constraint_mask = new double[n];
+    lumped_mass_matrix = new double[n];
+    reactive_matrix = new double[n];
+    main_diagonal_mass_matrix = new double[n];
+    main_diagonal_low_order_transport_matrix = new double[n];
 
 #pragma omp parallel for
-        for (dim_t i = 0; i < n; ++i) {
-            lumped_mass_matrix[i] = 0.;
-            main_diagonal_low_order_transport_matrix[i] = 0.;
-            constraint_mask[i] = 0.;
-        }
+    for (dim_t i = 0; i < n; ++i) {
+        lumped_mass_matrix[i] = 0.;
+        main_diagonal_low_order_transport_matrix[i] = 0.;
+        constraint_mask[i] = 0.;
     }
 }
 
@@ -79,7 +91,93 @@ TransportProblem::~TransportProblem()
     delete[] main_diagonal_low_order_transport_matrix;
 }
 
-void TransportProblem::reset()
+void TransportProblem::setToSolution(escript::Data& out, escript::Data& u0,
+                                     escript::Data& source, double dt,
+                                     bp::object& options)
+{
+    Options paso_options(options);
+    options.attr("resetDiagnostics")();
+    if (out.getDataPointSize() != getBlockSize()) {
+        throw ValueError("solve: block size of solution does not match block size of transport problems.");
+    } else if (source.getDataPointSize() != getBlockSize()) {
+        throw ValueError("solve: block size of source term does not match block size of transport problems.");
+    } else if (out.getFunctionSpace() != getFunctionSpace()) {
+        throw ValueError("solve: function spaces of solution and of transport problem don't match.");
+    } else if (source.getFunctionSpace() != getFunctionSpace()) {
+        throw ValueError("solve: function spaces of source term and of transport problem don't match.");
+    } else if (dt <= 0.) {
+        throw ValueError("solve: time increment dt needs to be positive.");
+    }
+    out.expand();
+    source.expand();
+    u0.expand();
+    out.requireWrite();
+    source.requireWrite();
+    u0.requireWrite();
+    double* out_dp = out.getSampleDataRW(0);
+    double* u0_dp = u0.getSampleDataRW(0);
+    double* source_dp = source.getSampleDataRW(0);
+    solve(out_dp, dt, u0_dp, source_dp, &paso_options);
+    paso_options.updateEscriptDiagnostics(options);
+}
+
+void TransportProblem::copyConstraint(escript::Data& source, escript::Data& q,
+                                      escript::Data& r)
+{
+    if (q.getDataPointSize() != getBlockSize()) {
+        throw ValueError("copyConstraint: block size does not match the number of components of constraint mask.");
+    } else if (q.getFunctionSpace() != getFunctionSpace()) {
+        throw ValueError("copyConstraint: function spaces of transport problem and constraint mask don't match.");
+    } else if (r.getDataPointSize() != getBlockSize()) {
+        throw ValueError("copyConstraint: block size does not match the number of components of constraint values.");
+    } else if (r.getFunctionSpace() != getFunctionSpace()) {
+        throw ValueError("copyConstraint: function spaces of transport problem and constraint values don't match.");
+    } else if (source.getDataPointSize() != getBlockSize()) {
+        throw ValueError("copyConstraint: block size does not match the number of components of source.");
+    } else if (source.getFunctionSpace() != getFunctionSpace()) {
+        throw ValueError("copyConstraint: function spaces of transport problem and source don't match.");
+    }
+
+#if 0
+    // r2=r where q>0, 0 elsewhere
+    escript::Data r2(0., q.getDataPointShape(), q.getFunctionSpace());
+    r2.copyWithMask(r, q);
+
+    // source -= tp->mass_matrix*r2
+    r2.expand();
+    source.expand();
+    q.expand();
+    r2.requireWrite();
+    source.requireWrite();
+    q.requireWrite();
+    double* r2_dp = r2.getSampleDataRW(0);
+    double* source_dp = source.getSampleDataRW(0);
+    double* q_dp = q.getSampleDataRW(0);
+
+    mass_matrix->MatrixVector(-1., r2_dp, 1., source_dp);
+
+    // insert 0 rows into transport matrix
+    transport_matrix->nullifyRows(q_dp, 0.);
+
+    // insert 0 rows and 1 in main diagonal into mass matrix
+    mass_matrix->nullifyRowsAndCols(q_dp, q_dp, 1.);
+    source.copyWithMask(escript::Data(0.,q.getDataPointShape(),q.getFunctionSpace()),q);
+#else
+    r.expand();
+    source.expand();
+    q.expand();
+    r.requireWrite();
+    source.requireWrite();
+    q.requireWrite();
+    double* r_dp = r.getSampleDataRW(0);
+    double* source_dp = source.getSampleDataRW(0);
+    double* q_dp = q.getSampleDataRW(0);
+    setUpConstraint(q_dp);
+    insertConstraint(r_dp, source_dp);
+#endif
+}
+
+void TransportProblem::resetTransport() const
 {
     const dim_t n = transport_matrix->getTotalNumRows();
     transport_matrix->setValues(0.);
@@ -89,12 +187,17 @@ void TransportProblem::reset()
     valid_matrices = false;
 }
 
+double TransportProblem::getUnlimitedTimeStepSize() const
+{
+    return std::numeric_limits<double>::max();
+}
+
+
 void TransportProblem::setUpConstraint(const double* q)
 {
     if (valid_matrices) {
-        Esys_setError(VALUE_ERROR, "TransportProblem::setUpConstraint: "
+        throw PasoException("TransportProblem::setUpConstraint: "
                             "Cannot insert a constraint into a valid system.");
-        return;
     }
 
     const dim_t n = transport_matrix->getTotalNumRows();
@@ -108,7 +211,7 @@ void TransportProblem::setUpConstraint(const double* q)
     }
 }
 
-void TransportProblem::insertConstraint(const double* r,  double* source)
+void TransportProblem::insertConstraint(const double* r,  double* source) const
 {
     const dim_t n = transport_matrix->getTotalNumRows();
 
diff --git a/paso/src/Transport.h b/paso/src/Transport.h
index ad6d97e..ac39559 100644
--- a/paso/src/Transport.h
+++ b/paso/src/Transport.h
@@ -20,28 +20,39 @@
 
 #define DT_FACTOR_MAX 100000.
 
-#include "SystemMatrix.h"
+#include "Paso.h"
 #include "Options.h"
+#include "SystemMatrix.h"
+
+#include <escript/AbstractTransportProblem.h>
 
 namespace paso {
 
-struct TransportProblem;
+class TransportProblem;
 typedef boost::shared_ptr<TransportProblem> TransportProblem_ptr;
 typedef boost::shared_ptr<const TransportProblem> const_TransportProblem_ptr;
 
-PASO_DLL_API
-struct TransportProblem : boost::enable_shared_from_this<TransportProblem>
+class TransportProblem : public escript::AbstractTransportProblem,
+                         public boost::enable_shared_from_this<TransportProblem>
 {
-    TransportProblem(SystemMatrixPattern_ptr pattern, int block_size);
+public:
+    /// Default constructor - throws exception
+    TransportProblem();
+
+    TransportProblem(SystemMatrixPattern_ptr pattern, int blocksize,
+                     const escript::FunctionSpace& functionspace);
+
     ~TransportProblem();
 
-    void reset();
+    virtual void resetTransport() const;
 
     void solve(double* u, double dt, double* u0, double* q, Options* options);
 
-    double getSafeTimeStepSize();
+    virtual double getSafeTimeStepSize() const;
+
+    virtual double getUnlimitedTimeStepSize() const;
 
-    void insertConstraint(const double* r,  double* source);
+    void insertConstraint(const double* r,  double* source) const;
 
     void setUpConstraint(const double* q);
 
@@ -80,9 +91,9 @@ struct TransportProblem : boost::enable_shared_from_this<TransportProblem>
        return mass_matrix->mainBlock->borrowMainDiagonalPointer();
     }
 
-    inline static index_t getTypeId(index_t solver, index_t preconditioner,
-                                    index_t package, bool symmetry,
-                                    const esysUtils::JMPI& mpi_info)
+    inline static int getTypeId(int solver, int preconditioner,
+                                int package, bool symmetry,
+                                const escript::JMPI& mpi_info)
     {
         return MATRIX_FORMAT_DEFAULT + MATRIX_FORMAT_BLK1;
     }
@@ -91,12 +102,12 @@ struct TransportProblem : boost::enable_shared_from_this<TransportProblem>
     SystemMatrix_ptr mass_matrix;
     SystemMatrix_ptr iteration_matrix;
 
-    bool valid_matrices;
+    mutable bool valid_matrices;
     /// safe time step size for reactive part
-    double dt_max_R;
+    mutable double dt_max_R;
     /// safe time step size for transport part
-    double dt_max_T;
-    double* constraint_mask;
+    mutable double dt_max_T;
+    mutable double* constraint_mask;
 
     double* main_diagonal_low_order_transport_matrix;
     /// 'relevant' lumped mass matrix is assumed to be positive.
@@ -106,7 +117,15 @@ struct TransportProblem : boost::enable_shared_from_this<TransportProblem>
     double* reactive_matrix;
     double* main_diagonal_mass_matrix;
 
-    esysUtils::JMPI mpi_info;
+    escript::JMPI mpi_info;
+
+private:
+    virtual void setToSolution(escript::Data& out, escript::Data& u0,
+                               escript::Data& source, double dt,
+                               boost::python::object& options);
+
+    virtual void copyConstraint(escript::Data& source, escript::Data& q,
+                                escript::Data& r);
 };
 
 } // namespace paso
diff --git a/paso/src/Transport_solve.cpp b/paso/src/Transport_solve.cpp
index 958cc86..c0f192e 100644
--- a/paso/src/Transport_solve.cpp
+++ b/paso/src/Transport_solve.cpp
@@ -37,18 +37,21 @@
 
 /****************************************************************************/
 
-#include <iostream>
 #include "Transport.h"
 #include "FCT_Solver.h"
+#include "PasoUtil.h"
 #include "ReactiveSolver.h"
 #include "Solver.h"
-#include "PasoUtil.h"
+
+#include <iostream>
 
 namespace paso {
 
 void TransportProblem::solve(double* u, double dt, double* u0, double* q,
                              Options* options)
 {
+    const real_t EPSILON = escript::DataTypes::real_t_eps();
+    const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
     const double reduction_after_divergence_factor = 0.5;
     const dim_t num_failures_max=50;
 
@@ -59,16 +62,16 @@ void TransportProblem::solve(double* u, double dt, double* u0, double* q,
     dim_t i_substeps=0, n_substeps=1, num_failures=0;
     double *u_save=NULL, *u2=NULL;
     double  dt2,t=0, dt3;
-    err_t errorCode=SOLVER_NO_ERROR;
+    SolverResult errorCode=NoError;
     const dim_t n = transport_matrix->getTotalNumRows();
     options->time_step_backtracking_used = false;
     options->num_iter=0;
 
     if (dt <= 0.) {
-        Esys_setError(VALUE_ERROR, "TransportProblem::solve: dt must be positive.");
+        throw PasoException("TransportProblem::solve: dt must be positive.");
     } else if (getBlockSize() > 1) {
-        Esys_setError(VALUE_ERROR, "TransportProblem::solve: block size >1 "
-                                   "is not supported.");
+        throw PasoException("TransportProblem::solve: block size >1 "
+                            "is not supported.");
     }
     if (options->verbose) {
         if (options->ode_solver == PASO_BACKWARD_EULER) {
@@ -78,114 +81,105 @@ void TransportProblem::solve(double* u, double dt, double* u0, double* q,
         } else  if (options->ode_solver == PASO_CRANK_NICOLSON) {
             printf("TransportProblem::solve: Crank-Nicolson is used (dt = %e).\n",dt);
         } else {
-            Esys_setError(VALUE_ERROR, "TransportProblem::solve: unknown ODE solver.");
+            throw PasoException("TransportProblem::solve: unknown ODE solver.");
         }
     }
-    if (Esys_noError()) {
-        getSafeTimeStepSize();
-        // allocate memory
-        fctsolver = new FCT_Solver(shared_from_this(), options);
-        rsolver = new ReactiveSolver(shared_from_this());
-        u_save = new double[n];
-        u2 = new double[n];
+    getSafeTimeStepSize();
+    // allocate memory
+    fctsolver = new FCT_Solver(shared_from_this(), options);
+    rsolver = new ReactiveSolver(shared_from_this());
+    u_save = new double[n];
+    u2 = new double[n];
+
+    // let the show begin!!!!
+    const double dt_R = dt_max_R;
+    const double dt_T = dt_max_T;
+    dt2 = dt;
+    if (dt_R < LARGE_POSITIVE_FLOAT)
+        dt2 = std::min(dt_R*2, dt); // as we half the step size for the RT bit
+    if (dt_T < LARGE_POSITIVE_FLOAT) {
+        if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON || options->ode_solver == PASO_CRANK_NICOLSON) {
+            dt2 = std::min(dt_T, dt);
+        } // PASO_BACKWARD_EULER does not require a restriction
     }
-    if (Esys_noError()) {
-        // let the show begin!!!!
-        const double dt_R = dt_max_R;
-        const double dt_T = dt_max_T;
-        dt2 = dt;
-        if (dt_R < LARGE_POSITIVE_FLOAT)
-            dt2 = MIN(dt_R*2, dt); // as we half the step size for the RT bit
-        if (dt_T < LARGE_POSITIVE_FLOAT) {
-            if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON || options->ode_solver == PASO_CRANK_NICOLSON) {
-                dt2 = MIN(dt_T, dt);
-            } // PASO_BACKWARD_EULER does not require a restriction
-        }
 
-        num_failures = 0;
-        util::copy(n, u, u0); // copy initial value to return
+    num_failures = 0;
+    util::copy(n, u, u0); // copy initial value to return
 
-        while((dt-t) > dt*sqrt(EPSILON) && Esys_noError()) {
-            n_substeps = ceil((dt-t)/dt2);
-            if (n_substeps <= 0) {
-                Esys_setError(VALUE_ERROR, "TransportProblem::solve: time stepping break down.");
-            } else {
-                dt3 = (dt-t)/n_substeps;
+    while((dt-t) > dt*sqrt(EPSILON)) {
+        n_substeps = ceil((dt-t)/dt2);
+        if (n_substeps <= 0) {
+            throw PasoException("TransportProblem::solve: time stepping break down.");
+        } else {
+            dt3 = (dt-t)/n_substeps;
+            if (options->verbose) {
+                std::cout << "TransportProblem::solve: number of substeps = "
+                    << n_substeps << " with dt = " << dt3 << "."
+                    << std::endl;
+            }
+            // initialize the iteration matrix
+            fctsolver->initialize(dt3, options, &pp);
+            rsolver->initialize(dt3/2, options);
+            errorCode = NoError;
+
+            // start iteration
+            for (i_substeps=0; i_substeps<n_substeps &&
+                               errorCode==NoError; i_substeps++) {
                 if (options->verbose) {
-                    std::cout << "TransportProblem::solve: number of substeps = "
-                        << n_substeps << " with dt = " << dt3 << "."
-                        << std::endl;
+                    std::cout << "TransportProblem::solve: substep "
+                        << i_substeps << " of " << n_substeps << " at t = "
+                        << (t+dt3) << " (dt = " << dt3 << ")" << std::endl;
                 }
-                // initialize the iteration matrix
-                fctsolver->initialize(dt3, options, &pp);
-                rsolver->initialize(dt3/2, options);
-                errorCode = SOLVER_NO_ERROR;
-
-                // start iteration
-                for (i_substeps=0; i_substeps<n_substeps &&
-                                   errorCode==SOLVER_NO_ERROR &&
-                                   Esys_noError(); i_substeps++) {
-                    if (options->verbose) {
-                        std::cout << "TransportProblem::solve: substep "
-                            << i_substeps << " of " << n_substeps << " at t = "
-                            << (t+dt3) << " (dt = " << dt3 << ")" << std::endl;
-                    }
 
-                    // create copy for restart in case of failure
-                    util::copy(n, u_save, u);
-                    // update u
+                // create copy for restart in case of failure
+                util::copy(n, u_save, u);
+                // update u
 
-                    // Mu_t=Du+q u(0)=u
-                    errorCode = rsolver->solve(u2, u, q, options, &pp);
+                // Mu_t=Du+q u(0)=u
+                errorCode = rsolver->solve(u2, u, q, options, &pp);
 
-                    // Mv_t=Lv   v(0)=u(dt/2)
-                    if (errorCode == SOLVER_NO_ERROR) {
-                        errorCode = fctsolver->update(u, u2, options, &pp);
+                // Mv_t=Lv   v(0)=u(dt/2)
+                if (errorCode == NoError) {
+                    errorCode = fctsolver->update(u, u2, options, &pp);
 
-                    }
-                    // Mu_t=Du+q u(dt/2)=v(dt/2)
-                    if (errorCode == SOLVER_NO_ERROR) {
-                        errorCode = rsolver->solve(u2, u, q, options, &pp);
-                    }
+                }
+                // Mu_t=Du+q u(dt/2)=v(dt/2)
+                if (errorCode == NoError) {
+                    errorCode = rsolver->solve(u2, u, q, options, &pp);
+                }
 
-                    if (errorCode == SOLVER_NO_ERROR) {
-                        num_failures = 0;
-                        t += dt3;
-                        util::copy(n, u, u2);
-                    }
+                if (errorCode == NoError) {
+                    num_failures = 0;
+                    t += dt3;
+                    util::copy(n, u, u2);
                 }
-                if (errorCode == SOLVER_MAXITER_REACHED || errorCode == SOLVER_DIVERGENCE) {
-                    // if num_failures_max failures in a row: give up
-                    if (num_failures >= num_failures_max) {
-                        Esys_setError(VALUE_ERROR, "TransportProblem::solve: "
-                                "No convergence after time step reductions.");
-                    } else {
-                        options->time_step_backtracking_used = true;
-                        if (options->verbose) {
-                            std::cout << "TransportProblem::solve: "
-                                << "no convergence. Time step size is reduced."
-                                << std::endl;
-                        }
-                        dt2 = dt3*reduction_after_divergence_factor;
-                        num_failures++;
-                        util::copy(n, u, u_save); // reset initial value
+            }
+            if (errorCode == MaxIterReached || errorCode == Divergence) {
+                // if num_failures_max failures in a row: give up
+                if (num_failures >= num_failures_max) {
+                    throw PasoException("TransportProblem::solve: "
+                            "No convergence after time step reductions.");
+                } else {
+                    options->time_step_backtracking_used = true;
+                    if (options->verbose) {
+                        std::cout << "TransportProblem::solve: "
+                            << "no convergence. Time step size is reduced."
+                            << std::endl;
                     }
-                } else if (errorCode == SOLVER_INPUT_ERROR) {
-                    Esys_setError(VALUE_ERROR, "TransportProblem::solve: "
-                                               "input error for solver.");
-                } else if (errorCode == SOLVER_MEMORY_ERROR) {
-                    Esys_setError(MEMORY_ERROR, "TransportProblem::solve: "
-                                                "memory allocation failed.");
-                } else if (errorCode == SOLVER_BREAKDOWN) {
-                    Esys_setError(VALUE_ERROR, "TransportProblem::solve: "
-                                               "solver break down.");
-                } else if (errorCode == SOLVER_NEGATIVE_NORM_ERROR) {
-                    Esys_setError(VALUE_ERROR, "TransportProblem::solve: "
-                                               "negative norm.");
-                } else if (errorCode != SOLVER_NO_ERROR) {
-                    Esys_setError(SYSTEM_ERROR, "TransportProblem::solve: "
-                                                "general error.");
+                    dt2 = dt3*reduction_after_divergence_factor;
+                    num_failures++;
+                    util::copy(n, u, u_save); // reset initial value
                 }
+            } else if (errorCode == InputError) {
+                throw PasoException("TransportProblem::solve: input error for solver.");
+            } else if (errorCode == MemoryError) {
+                throw PasoException("TransportProblem::solve: memory allocation failed.");
+            } else if (errorCode == Breakdown) {
+                throw PasoException("TransportProblem::solve: solver break down.");
+            } else if (errorCode == NegativeNormError) {
+                throw PasoException("TransportProblem::solve: negative norm.");
+            } else if (errorCode != NoError) {
+                throw PasoException("TransportProblem::solve: general error.");
             }
         }
     } // end of time loop
@@ -196,7 +190,7 @@ void TransportProblem::solve(double* u, double dt, double* u0, double* q,
     delete[] u2;
 }
 
-double TransportProblem::getSafeTimeStepSize()
+double TransportProblem::getSafeTimeStepSize() const
 {
     double dt_max=0.;
     const dim_t n = transport_matrix->getTotalNumRows();
@@ -209,7 +203,7 @@ double TransportProblem::getSafeTimeStepSize()
         int fail = 0;
 #pragma omp parallel
         {
-            index_t fail_loc = 0;
+            int fail_loc = 0;
 #pragma omp for
             for (index_t i=0; i<n; ++i) {
                 const double m_i = lumped_mass_matrix[i];
@@ -222,7 +216,7 @@ double TransportProblem::getSafeTimeStepSize()
             }
             #pragma omp critical
             {
-                fail = MAX(fail, fail_loc);
+                fail = std::max(fail, fail_loc);
             }
         }
 #ifdef ESYS_MPI
@@ -230,24 +224,22 @@ double TransportProblem::getSafeTimeStepSize()
         MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, mpi_info->comm);
 #endif
         if (fail > 0)
-            Esys_setError(VALUE_ERROR, "TransportProblem::getSafeTimeStepSize: "
-                                     "negative mass matrix entries detected.");
+            throw PasoException("TransportProblem::getSafeTimeStepSize: "
+                                "negative mass matrix entries detected.");
         // split off row-sum from transport_matrix
         transport_matrix->makeZeroRowSums(reactive_matrix);
         // get a copy of the main diagonal of the mass matrix
         mass_matrix->copyFromMainDiagonal(main_diagonal_mass_matrix);
 
-        if (Esys_noError()) {
-            const double dt_R = ReactiveSolver::getSafeTimeStepSize(shared_from_this());
-            const double dt_T = FCT_Solver::getSafeTimeStepSize(shared_from_this());
-            dt_max_R = dt_R;
-            dt_max_T = dt_T;
-            valid_matrices = true;
-            dt_max = MIN(2*dt_R, dt_T);
-        }
+        const double dt_R = ReactiveSolver::getSafeTimeStepSize(shared_from_this());
+        const double dt_T = FCT_Solver::getSafeTimeStepSize(shared_from_this());
+        dt_max_R = dt_R;
+        dt_max_T = dt_T;
+        valid_matrices = true;
+        dt_max = std::min(2*dt_R, dt_T);
     } else {
         // factor 2 as we use operator splitting
-        dt_max = MIN(2*dt_max_R, dt_max_T);
+        dt_max = std::min(2*dt_max_R, dt_max_T);
     }
     return dt_max;
 }
diff --git a/paso/src/UMFPACK.cpp b/paso/src/UMFPACK.cpp
index b9f77f2..ad7319d 100644
--- a/paso/src/UMFPACK.cpp
+++ b/paso/src/UMFPACK.cpp
@@ -21,9 +21,10 @@
 
 /****************************************************************************/
 
-#include "UMFPACK.h"
 #include "Paso.h"
+#include "UMFPACK.h"
 #include "Options.h"
+#include "PasoException.h"
 
 #include <iostream>
 #include <sstream>
@@ -35,7 +36,7 @@ void UMFPACK_free(SparseMatrix* A)
 {
     if (A && A->solver_p) {
         UMFPACK_Handler* pt = reinterpret_cast<UMFPACK_Handler*>(A->solver_p);
-#ifdef USE_UMFPACK
+#ifdef ESYS_HAVE_UMFPACK
 #ifdef ESYS_INDEXTYPE_LONG
         umfpack_dl_free_symbolic(&pt->symbolic);
         umfpack_dl_free_numeric(&pt->numeric);
@@ -54,10 +55,9 @@ void UMFPACK_free(SparseMatrix* A)
 void UMFPACK_solve(SparseMatrix_ptr A, double* out, double* in,
                    dim_t numRefinements, bool verbose)
 {
-#ifdef USE_UMFPACK
+#ifdef ESYS_HAVE_UMFPACK
     if (!( (A->type & MATRIX_FORMAT_BLK1) && (A->type & MATRIX_FORMAT_CSC)) ) {
-        Esys_setError(TYPE_ERROR, "Paso: UMFPACK requires CSC format with index offset 1 and block size 1.");
-        return;
+        throw PasoException("Paso: UMFPACK requires CSC format with index offset 1 and block size 1.");
     }
 
     UMFPACK_Handler* pt = reinterpret_cast<UMFPACK_Handler*>(A->solver_p);
@@ -77,7 +77,7 @@ void UMFPACK_solve(SparseMatrix_ptr A, double* out, double* in,
         pt = new UMFPACK_Handler;
         A->solver_p = (void*) pt;
         A->solver_package = PASO_UMFPACK;
-        time0=Esys_timer();
+        time0=escript::gettime();
 
         // call LDU symbolic factorization:
 #ifdef ESYS_INDEXTYPE_LONG
@@ -94,26 +94,22 @@ void UMFPACK_solve(SparseMatrix_ptr A, double* out, double* in,
             if (error == UMFPACK_ERROR_out_of_memory) {
                 message = "UMFPACK: symbolic factorization failed because of "
                           "memory overflow.";
-                Esys_setError(MEMORY_ERROR, message.c_str());
             } else if (error == UMFPACK_WARNING_singular_matrix) {
                 message = "UMFPACK: symbolic factorization failed because of "
                           "singular matrix.";
-                Esys_setError(ZERO_DIVISION_ERROR, message.c_str());
             } else if (error == UMFPACK_WARNING_determinant_underflow ||
                        error == UMFPACK_WARNING_determinant_overflow) {
                 message = "UMFPACK: symbolic factorization failed because of "
                           "under/overflow.";
-                Esys_setError(FLOATING_POINT_ERROR, message.c_str());
             } else {
                 std::stringstream ss;
                 ss << "UMFPACK: symbolic factorization failed. UMFPACK "
                       "error code = " << error << ".";
                 message = ss.str();
-                Esys_setError(SYSTEM_ERROR, message.c_str());
             }
             if (verbose)
                 std::cout << message.c_str() << std::endl;
-            return;
+            throw PasoException(message);
         }
 
         // call LDU factorization:
@@ -129,43 +125,39 @@ void UMFPACK_solve(SparseMatrix_ptr A, double* out, double* in,
         if (error == UMFPACK_OK) {
             if (verbose) {
                 std::cout << "UMFPACK: LDU factorization completed (time = "
-                    << Esys_timer()-time0 << ")." << std::endl;
+                    << escript::gettime()-time0 << ")." << std::endl;
             }
         } else if (error == UMFPACK_ERROR_out_of_memory) {
             if (verbose) {
                 std::cout << "UMFPACK: LDU factorization failed because of "
                     "memory overflow." << std::endl;
             }
-            Esys_setError(MEMORY_ERROR, "UMFPACK: LDU factorization failed because of memory overflow.");
-            return;
+            throw PasoException("UMFPACK: LDU factorization failed because of memory overflow.");
         } else if (error == UMFPACK_WARNING_singular_matrix) {
             if (verbose) {
                 std::cout << "UMFPACK: LDU factorization failed because of "
                     "singular matrix." << std::endl;
             }
-            Esys_setError(ZERO_DIVISION_ERROR,"UMFPACK: LDU factorization failed because of singular matrix.");
-            return;
+            throw PasoException("UMFPACK: LDU factorization failed because of singular matrix.");
         } else if (error == UMFPACK_WARNING_determinant_underflow
                    || error == UMFPACK_WARNING_determinant_overflow) {
             if (verbose) {
                 std::cout << "UMFPACK: symbolic factorization failed because "
                     "of under/overflow." << std::endl;
             }
-            Esys_setError(FLOATING_POINT_ERROR,"UMFPACK: symbolic factorization failed because of under/overflow.");
-            return;
+            throw PasoException("UMFPACK: symbolic factorization failed because of under/overflow.");
         } else {
             if (verbose) {
                 std::cout << "UMFPACK: LDU factorization failed. UMFPACK "
                     "error code = " << error << "." << std::endl;
             }
-            Esys_setError(SYSTEM_ERROR, "UMFPACK: factorization failed.");
-            return;
+            throw PasoException("UMFPACK: factorization failed.");
         }
     } // pt==NULL
 
     // call forward backward substitution
     control[UMFPACK_IRSTEP] = numRefinements; // number of refinement steps
-    time0 = Esys_timer();
+    time0 = escript::gettime();
 #ifdef ESYS_INDEXTYPE_LONG
     error = umfpack_dl_solve(UMFPACK_A, A->pattern->ptr, A->pattern->index,
                              A->val, out, in, pt->numeric, control, info);
@@ -177,36 +169,36 @@ void UMFPACK_solve(SparseMatrix_ptr A, double* out, double* in,
     if (error == UMFPACK_OK) {
         if (verbose) {
             std::cout << "UMFPACK: forward/backward substitution completed "
-                "(time = " << Esys_timer()-time0 << ")." << std::endl;
+                "(time = " << escript::gettime()-time0 << ")." << std::endl;
         }
     } else if (error == UMFPACK_ERROR_out_of_memory) {
         if (verbose) {
             std::cout << "UMFPACK: forward/backward substitution failed "
                 "because of memory overflow." << std::endl;
         }
-        Esys_setError(MEMORY_ERROR, "UMFPACK: forward/backward substitution failed because of memory overflow.");
+        throw PasoException("UMFPACK: forward/backward substitution failed because of memory overflow.");
     } else if (error == UMFPACK_WARNING_singular_matrix) {
         if (verbose) {
             std::cout << "UMFPACK: forward/backward substitution because of "
                 "singular matrix." << std::endl;
         }
-        Esys_setError(ZERO_DIVISION_ERROR, "UMFPACK: forward/backward substitution failed because of singular matrix.");
+        throw PasoException("UMFPACK: forward/backward substitution failed because of singular matrix.");
     } else if (error == UMFPACK_WARNING_determinant_underflow
                  || error == UMFPACK_WARNING_determinant_overflow) {
         if (verbose) {
             std::cout << "UMFPACK: forward/backward substitution failed "
                 "because of under/overflow." << std::endl;
         }
-        Esys_setError(FLOATING_POINT_ERROR, "UMFPACK: forward/backward substitution failed because of under/overflow.");
+        throw PasoException("UMFPACK: forward/backward substitution failed because of under/overflow.");
     } else {
         if (verbose) {
             std::cout << "UMFPACK: forward/backward substitution failed. "
                 "UMFPACK error code = " << error << "." << std::endl;
         }
-        Esys_setError(SYSTEM_ERROR, "UMFPACK: forward/backward substitution failed.");
+        throw PasoException("UMFPACK: forward/backward substitution failed.");
     }
-#else // USE_UMFPACK
-    Esys_setError(SYSTEM_ERROR, "Paso: Not compiled with UMFPACK.");
+#else // ESYS_HAVE_UMFPACK
+    throw PasoException("Paso: Not compiled with UMFPACK.");
 #endif
 }
 
diff --git a/paso/src/UMFPACK.h b/paso/src/UMFPACK.h
index ce28f1f..19b09c1 100644
--- a/paso/src/UMFPACK.h
+++ b/paso/src/UMFPACK.h
@@ -31,7 +31,7 @@
 
 #include "SparseMatrix.h"
 
-#ifdef USE_UMFPACK
+#ifdef ESYS_HAVE_UMFPACK
 #include <umfpack.h>
 #endif
 
diff --git a/paso/src/performance.cpp b/paso/src/performance.cpp
index 4432362..4c582e1 100644
--- a/paso/src/performance.cpp
+++ b/paso/src/performance.cpp
@@ -34,55 +34,53 @@ namespace paso {
 /// sets up the monitoring process
 void Performance_open(Performance* pp, int verbose)
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
     #pragma omp single
     {
         pp->event_set = PAPI_NULL;
         // Initialize the PAPI library
         int retval = PAPI_library_init(PAPI_VER_CURRENT);
         if (retval != PAPI_VER_CURRENT && retval > 0) {
-            Esys_setError(SYSTEM_ERROR,"performance: PAPI library version mismatch.");
+            throw PasoException("performance: PAPI library version mismatch.");
         } else if (retval < 0) {
-            Esys_setError(SYSTEM_ERROR,"performance: PAPI initialization error.");
+            throw PasoException("performance: PAPI initialization error.");
         } else {
             if (PAPI_create_eventset(&(pp->event_set)) != PAPI_OK)
-                Esys_setError(SYSTEM_ERROR,"performance: PAPI event set up failed.");
+                throw PasoException("performance: PAPI event set up failed.");
         }
-        if (Esys_noError()) {
-            // try to add various monitors
-            pp->num_events=0;
-            if (PAPI_add_event(pp->event_set, PAPI_FP_OPS) == PAPI_OK) {
-                pp->events[pp->num_events]=PAPI_FP_OPS;
-                pp->num_events++;
-            }
-            if (PAPI_add_event(pp->event_set, PAPI_L1_DCM) == PAPI_OK) {
-                pp->events[pp->num_events]=PAPI_L1_DCM;
-                pp->num_events++;
-            }
-            if (PAPI_add_event(pp->event_set, PAPI_L2_DCM) == PAPI_OK) {
-                pp->events[pp->num_events]=PAPI_L2_DCM;
-                pp->num_events++;
-            }
-            if (PAPI_add_event(pp->event_set, PAPI_L3_DCM) == PAPI_OK) {
-                pp->events[pp->num_events]=PAPI_L3_DCM;
-                pp->num_events++;
-            }
-            for (int i=0; i<PERFORMANCE_NUM_MONITORS; ++i) {
-                pp->cycles[i] = 0;
-                pp->set[i] = PERFORMANCE_UNUSED;
-                for (int j=0; j<PERFORMANCE_NUM_EVENTS; ++j)
-                    pp->values[i][j] = 0.;
-            }
-            PAPI_start(pp->event_set);
+        // try to add various monitors
+        pp->num_events=0;
+        if (PAPI_add_event(pp->event_set, PAPI_FP_OPS) == PAPI_OK) {
+            pp->events[pp->num_events]=PAPI_FP_OPS;
+            pp->num_events++;
+        }
+        if (PAPI_add_event(pp->event_set, PAPI_L1_DCM) == PAPI_OK) {
+            pp->events[pp->num_events]=PAPI_L1_DCM;
+            pp->num_events++;
+        }
+        if (PAPI_add_event(pp->event_set, PAPI_L2_DCM) == PAPI_OK) {
+            pp->events[pp->num_events]=PAPI_L2_DCM;
+            pp->num_events++;
+        }
+        if (PAPI_add_event(pp->event_set, PAPI_L3_DCM) == PAPI_OK) {
+            pp->events[pp->num_events]=PAPI_L3_DCM;
+            pp->num_events++;
+        }
+        for (int i=0; i<PERFORMANCE_NUM_MONITORS; ++i) {
+            pp->cycles[i] = 0;
+            pp->set[i] = PERFORMANCE_UNUSED;
+            for (int j=0; j<PERFORMANCE_NUM_EVENTS; ++j)
+                pp->values[i][j] = 0.;
         }
+        PAPI_start(pp->event_set);
     } // omp single
-#endif // PAPI
+#endif // ESYS_HAVE_PAPI
 }
 
 /// find the index of an event in the list of monitored events
 int Performance_getEventIndex(Performance* pp, int event_id)
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
     for (int i=0; i<pp->num_events; ++i)
         if (pp->events[i]==event_id)
             return i;
@@ -93,10 +91,10 @@ int Performance_getEventIndex(Performance* pp, int event_id)
 /// shuts down the monitoring process
 void Performance_close(Performance* pp, int verbose)
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
 #pragma omp single
     {
-        if (Esys_noError() && verbose) {
+        if (verbose) {
             int i_ops = Performance_getEventIndex(pp, PAPI_FP_OPS);
             int i_l1_miss = Performance_getEventIndex(pp, PAPI_L1_DCM);
             int i_l2_miss = Performance_getEventIndex(pp, PAPI_L2_DCM);
@@ -163,7 +161,7 @@ void Performance_close(Performance* pp, int verbose)
 /// switches on a monitor
 void Performance_startMonitor(Performance* pp, int monitor)
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
 #pragma omp barrier
 #pragma omp single
     {
@@ -182,7 +180,7 @@ void Performance_startMonitor(Performance* pp, int monitor)
 /// switches off a monitor
 void Performance_stopMonitor(Performance* pp,int monitor)
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
 #pragma omp barrier
 #pragma omp single
     {
diff --git a/paso/src/performance.h b/paso/src/performance.h
index 34c5818..f6c47b6 100644
--- a/paso/src/performance.h
+++ b/paso/src/performance.h
@@ -29,7 +29,7 @@
 #ifndef __PASO_PERFORMANCE_H__
 #define __PASO_PERFORMANCE_H__
 
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
 #include <papi.h>
 #endif
 
@@ -53,7 +53,7 @@ namespace paso {
 
 struct Performance
 {
-#ifdef PAPI
+#ifdef ESYS_HAVE_PAPI
     /// PAPI event sets for the monitors
     int event_set;
     /// number of events tracked by the monitors
diff --git a/paso/src/solve.cpp b/paso/src/solve.cpp
index 14c0e0c..c2973e4 100644
--- a/paso/src/solve.cpp
+++ b/paso/src/solve.cpp
@@ -27,6 +27,7 @@
 /****************************************************************************/
 
 #include "Paso.h"
+#include "Options.h"
 #include "performance.h"
 #include "Preconditioner.h"
 #include "Solver.h"
@@ -35,82 +36,93 @@
 
 namespace paso {
 
-void solve(SystemMatrix_ptr A, double* out, double* in, Options* options)
+void SystemMatrix::solve(double* out, double* in, Options* options) const
 {
     Performance pp;
     index_t package;
-    Esys_resetError();
-    if (A->getGlobalNumCols() != A->getGlobalNumRows()
-                    || A->col_block_size != A->row_block_size) {
-        Esys_setError(VALUE_ERROR,"solve: matrix has to be a square matrix.");
-        return;
+    if (getGlobalNumCols() != getGlobalNumRows()
+                    || col_block_size != row_block_size) {
+        throw PasoException("solve: matrix has to be a square matrix.");
     }
     //options->show();
     Performance_open(&pp, options->verbose);
-    package = Options::getPackage(options->method, options->package, options->symmetric, A->mpi_info);
-    if (Esys_noError()) {
-        switch(package) {
-            case PASO_PASO:
-                Solver(A, out, in, options, &pp);
-                A->solver_package = PASO_PASO;
-            break;
+    package = Options::getPackage(options->method, options->package, options->symmetric, mpi_info);
+    SolverResult res = NoError;
 
-            case PASO_MKL:
-                if (A->mpi_info->size > 1) {
-                    Esys_setError(VALUE_ERROR,"solve: MKL package does not support MPI.");
-                    return;
-                }
-                options->converged = false;
-                options->time = Esys_timer();
-                Performance_startMonitor(&pp, PERFORMANCE_ALL);
-                MKL_solve(A->mainBlock, out, in, options->reordering,
-                          options->refinements, options->verbose);
-                A->solver_package = PASO_MKL;
-                Performance_stopMonitor(&pp, PERFORMANCE_ALL);
-                options->time = Esys_timer()-options->time;
-                options->set_up_time = 0;
-                options->residual_norm = 0.;
-                options->num_iter = 0;
-                if (Esys_MPIInfo_noError(A->mpi_info))
-                    options->converged = true;
-            break;
+    switch (package) {
+        case PASO_PASO:
+            res = Solver(boost::const_pointer_cast<SystemMatrix>(
+                    boost::dynamic_pointer_cast<const SystemMatrix>(getPtr())),
+                    out, in, options, &pp);
+            solver_package = PASO_PASO;
+        break;
 
-            case PASO_UMFPACK:
-                if (A->mpi_info->size>1) {
-                    Esys_setError(VALUE_ERROR,"solve: UMFPACK package does not support MPI.");
-                    return;
-                }
-                options->converged = false;
-                options->time = Esys_timer();
-                Performance_startMonitor(&pp, PERFORMANCE_ALL);
-                UMFPACK_solve(A->mainBlock, out, in, options->refinements, options->verbose);
-                A->solver_package = PASO_UMFPACK;
-                Performance_stopMonitor(&pp, PERFORMANCE_ALL);
-                options->time = Esys_timer()-options->time;
-                options->set_up_time = 0;
-                options->residual_norm = 0.;
-                options->num_iter = 0;
-                if (Esys_MPIInfo_noError(A->mpi_info))
-                    options->converged = true;
-            break;
+        case PASO_MKL:
+            if (mpi_info->size > 1) {
+                throw PasoException("solve: MKL package does not support MPI.");
+            }
+            options->converged = false;
+            options->time = escript::gettime();
+            Performance_startMonitor(&pp, PERFORMANCE_ALL);
+            MKL_solve(mainBlock, out, in, options->reordering,
+                      options->refinements, options->verbose);
+            solver_package = PASO_MKL;
+            Performance_stopMonitor(&pp, PERFORMANCE_ALL);
+            options->time = escript::gettime()-options->time;
+            options->set_up_time = 0;
+            options->residual_norm = 0.;
+            options->num_iter = 0;
+            options->converged = true;
+        break;
 
-            default:
-                Esys_setError(VALUE_ERROR, "solve: unknown package code");
-            break;
-        }
+        case PASO_UMFPACK:
+            if (mpi_info->size > 1) {
+                throw PasoException("solve: UMFPACK package does not support MPI.");
+            }
+            options->converged = false;
+            options->time = escript::gettime();
+            Performance_startMonitor(&pp, PERFORMANCE_ALL);
+            UMFPACK_solve(mainBlock, out, in, options->refinements, options->verbose);
+            solver_package = PASO_UMFPACK;
+            Performance_stopMonitor(&pp, PERFORMANCE_ALL);
+            options->time = escript::gettime()-options->time;
+            options->set_up_time = 0;
+            options->residual_norm = 0.;
+            options->num_iter = 0;
+            options->converged = true;
+        break;
+
+        default:
+            throw PasoException("solve: unknown package code");
+        break;
     }
-    /*
-        cancel divergence errors
-    */
-    if (options->accept_failed_convergence) {
-        if (Esys_getErrorType() == DIVERGED) {
-            Esys_resetError();
+
+    if (res == Divergence) {
+        // cancel divergence errors
+        if (options->accept_failed_convergence) {
+            if (options->verbose)
+                printf("paso: failed convergence error has been canceled as requested.\n");
+        } else {
+            throw PasoException("Solver: No improvement during iteration. Iterative solver gives up.");
+        }
+    } else if (res == MaxIterReached) {
+        // cancel divergence errors
+        if (options->accept_failed_convergence) {
             if (options->verbose)
                 printf("paso: failed convergence error has been canceled as requested.\n");
+        } else {
+            throw PasoException("Solver: maximum number of iteration steps reached.\nReturned solution does not fulfil stopping criterion.");
         }
+    } else if (res == InputError) {
+        throw PasoException("Solver: illegal dimension in iterative solver.");
+    } else if (res == NegativeNormError) {
+        throw PasoException("Solver: negative energy norm (try other solver or preconditioner).");
+    } else if (res == Breakdown) {
+        throw PasoException("Solver: fatal break down in iterative solver.");
+    } else if (res != NoError) {
+        throw PasoException("Solver: Generic error in solver.");
     }
     Performance_close(&pp, options->verbose);
-    //options->showDiagnostics();
 }
 
 void solve_free(SystemMatrix* in)
diff --git a/pasowrap/py_src/__init__.py b/pasowrap/py_src/__init__.py
deleted file mode 100644
index 89dfee8..0000000
--- a/pasowrap/py_src/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-from .pasowrap import *
-
-__nodocorecursion=['pasowrapcpp', 'pasowrap']
diff --git a/pasowrap/py_src/pasowrap.py b/pasowrap/py_src/pasowrap.py
deleted file mode 100644
index 3bd8b4a..0000000
--- a/pasowrap/py_src/pasowrap.py
+++ /dev/null
@@ -1,27 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2011-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2011-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-import esys.escript     # This is just to ensure required libraries are loaded
-from .pasowrapcpp import *
diff --git a/pasowrap/src/PasoException.cpp b/pasowrap/src/PasoException.cpp
deleted file mode 100644
index 5c5a2da..0000000
--- a/pasowrap/src/PasoException.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#include "PasoException.h"
-#include <esysUtils/error.h>
-
-namespace paso
-{
-
-const std::string 
-PasoException::exceptionNameValue("PasoException");
-
-PASOWRAP_DLL_API
-const std::string &
-PasoException::exceptionName() const
-{
-  return exceptionNameValue;
-}
-
-PASOWRAP_DLL_API
-void checkPasoError() 
-{
-  if (Esys_noError()) {
-    return;
-  } else {
-    //
-    // reset the error code to no error otherwise the next call to
-    // this function may resurrect a previous error
-    Esys_resetError();
-    throw PasoException(Esys_getErrorMessage());
-  }
-}
-
-}
diff --git a/pasowrap/src/PasoException.h b/pasowrap/src/PasoException.h
deleted file mode 100644
index af6158c..0000000
--- a/pasowrap/src/PasoException.h
+++ /dev/null
@@ -1,111 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/* File extracted from finley and modified */
-
-#if !defined  PasoException_20040526_H
-#define PasoException_20040526_H
-#include "system_dep.h"
-
-#include "esysUtils/EsysException.h"
-
-namespace paso
-{
-
-  /**
-  \brief
-  PasoException exception class.
-
-  Description:
-  PasoException exception class.
-  The class provides a public function returning the exception name
-  */
-  class PasoException : public esysUtils::EsysException
-  {
-
-  protected:
-
-     typedef EsysException Parent;
-
-  public:
-    /**
-    \brief
-    Default constructor for the exception.
-    */
-    PASOWRAP_DLL_API
-    PasoException() : Parent() { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    PASOWRAP_DLL_API
-    PasoException(const char *cstr) : Parent(cstr) { updateMessage();}
-    /**
-    \brief
-    Constructor for the exception.
-    */
-    PASOWRAP_DLL_API
-    PasoException(const std::string &str) :
-    Parent(str) { updateMessage();}
-    /**
-    \brief
-    Copy Constructor for the exception.
-    */
-    PASOWRAP_DLL_API
-    PasoException(const PasoException &other) : Parent(other)
-      {
-        updateMessage();
-      }
-
-    /// Destructor
-    PASOWRAP_DLL_API
-    virtual ~PasoException() THROW(NO_ARG) {}
-
-    /**
-    \brief
-    Assignment operator.
-    */
-    PASOWRAP_DLL_API
-    inline PasoException &
-    operator=(const PasoException &other ) THROW(NO_ARG)
-       {
-         Parent::operator=(other);
-         updateMessage();
-         return *this;
-       }
-
-    /**
-    \brief
-    Returns the name of the exception.
-    */
-    PASOWRAP_DLL_API
-    virtual const std::string & exceptionName() const;
-
-  private:
-
-    //
-    // the exception name is immutable and class-wide.
-    // Inheritor note; you need one of these too.
-    // and an overloaded exceptionName() in your .cpp implementation file. 
-    static const std::string exceptionNameValue;
-  };
-
-  PASOWRAP_DLL_API
-  void checkPasoError(); 
-  
-  
-} // end of namespace
-#endif
diff --git a/pasowrap/src/SConscript b/pasowrap/src/SConscript
deleted file mode 100644
index eaab536..0000000
--- a/pasowrap/src/SConscript
+++ /dev/null
@@ -1,83 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-import os
-Import('*')
-
-local_env = env.Clone()
-local_dodgy = dodgy_env.Clone()
-py_wrapper_local_env = env.Clone()
-
-# Remove the sharedlibrary prefix on all platform - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
-
-sources = """
-  PasoException.cpp
-  SystemMatrixAdapter.cpp
-  TransportProblemAdapter.cpp
-""".split()
-
-headers = """
-  PasoException.h
-  SystemMatrixAdapter.h      
-  TransportProblemAdapter.h
-  system_dep.h
-""".split()
-
-local_env.Prepend(LIBS = ['escript', 'paso', 'esysUtils'])
-if IS_WINDOWS:
-    local_env.Append(CPPDEFINES = ['PASOWRAP_EXPORTS'])
-
-module_name = 'pasowrap'
-
-# specify to build shared object
-
-lib = local_env.SharedLibrary(module_name, sources)
-env.Alias('build_pasowrap_lib', lib)
-
-include_path = Dir('pasowrap', local_env['incinstall'])
-hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_pasowrap_headers', hdr_inst)
-
-lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_pasowrap_lib', lib_inst)
-
-### Python wrapper ###
-if not env['build_shared']:
-    py_wrapper_local_env.Prepend(LIBS = ['pasowrap', 'escript', 'esysUtils'])
-else:
-    py_wrapper_local_env.Prepend(LIBS = ['pasowrap', 'escript', 'paso', 'esysUtils'])
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'pasowrapcpp.cpp')
-env.Alias('build_pasowrapcpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_pasowrapcpp_lib', mod_inst)
-
-# configure python module
-local_env.SConscript(dirs = ['#/pasowrap/py_src'], variant_dir='py', duplicate=0)
-
-# configure unit tests
-#local_env.SConscript(dirs = ['#/pasowrap/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
-
diff --git a/pasowrap/src/SystemMatrixAdapter.cpp b/pasowrap/src/SystemMatrixAdapter.cpp
deleted file mode 100644
index 2ba84d1..0000000
--- a/pasowrap/src/SystemMatrixAdapter.cpp
+++ /dev/null
@@ -1,392 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "SystemMatrixAdapter.h" 
-#include <escript/SolverOptions.h>
-#include <paso/Solver.h>
-
-using namespace std;
-
-namespace paso {
-
-SystemMatrixAdapter::SystemMatrixAdapter()
-{
-   throw PasoException("Error - Illegal to generate default SystemMatrixAdapter.");
-}
-
-SystemMatrixAdapter::SystemMatrixAdapter(SystemMatrix_ptr system_matrix,
-        int row_blocksize, const escript::FunctionSpace& row_fs,
-        int column_blocksize, const escript::FunctionSpace& column_fs) :
-    AbstractSystemMatrix(row_blocksize,row_fs,column_blocksize,column_fs),
-    m_system_matrix(system_matrix)
-{
-}
-
-SystemMatrixAdapter::~SystemMatrixAdapter()
-{ 
-}
-
-SystemMatrix_ptr SystemMatrixAdapter::getPaso_SystemMatrix() const 
-{
-   return m_system_matrix;
-}
-
-void SystemMatrixAdapter::ypAx(escript::Data& y, escript::Data& x) const 
-{
-   if ( x.getDataPointSize()  != getColumnBlockSize()) {
-      throw PasoException("matrix vector product : column block size does not match the number of components in input.");
-   } else if (y.getDataPointSize() != getRowBlockSize()) {
-      throw PasoException("matrix vector product : row block size does not match the number of components in output.");
-   } else if ( x.getFunctionSpace()  != getColumnFunctionSpace()) {
-      throw PasoException("matrix vector product : column function space and function space of input don't match.");
-   } else if (y.getFunctionSpace() != getRowFunctionSpace()) {
-      throw PasoException("matrix vector product : row function space and function space of output don't match.");
-   }
-   x.expand();
-   y.expand();
-   x.requireWrite();
-   y.requireWrite();
-   double* x_dp=x.getSampleDataRW(0);
-   double* y_dp=y.getSampleDataRW(0);
-   SystemMatrix_MatrixVector(1., m_system_matrix, x_dp, 1., y_dp);
-   checkPasoError();
-}
-
-int SystemMatrixAdapter::mapOptionToPaso(int option)
-{
-    switch (option) {
-        case escript::SO_DEFAULT:
-            return PASO_DEFAULT;
-
-        case escript::SO_PACKAGE_MKL:
-            return PASO_MKL;
-        case escript::SO_PACKAGE_PASO:
-            return PASO_PASO;
-        case escript::SO_PACKAGE_PASTIX:
-            return PASO_PASTIX;
-        case escript::SO_PACKAGE_SUPER_LU:
-            return PASO_SUPER_LU;
-        case escript::SO_PACKAGE_TRILINOS:
-            return PASO_TRILINOS;
-        case escript::SO_PACKAGE_UMFPACK:
-            return PASO_UMFPACK;
-
-        case escript::SO_METHOD_BICGSTAB:
-            return PASO_BICGSTAB;
-        case escript::SO_METHOD_CGS:
-            return PASO_CGS;
-        case escript::SO_METHOD_CHOLEVSKY:
-            return PASO_CHOLEVSKY;
-        case escript::SO_METHOD_CR:
-            return PASO_CR;
-        case escript::SO_METHOD_DIRECT:
-            return PASO_DIRECT;
-        case escript::SO_METHOD_GMRES:
-            return PASO_GMRES;
-        case escript::SO_METHOD_ITERATIVE:
-            return PASO_ITERATIVE;
-        case escript::SO_METHOD_MINRES:
-            return PASO_MINRES;
-        case escript::SO_METHOD_NONLINEAR_GMRES:
-            return PASO_NONLINEAR_GMRES;
-        case escript::SO_METHOD_PCG:
-            return PASO_PCG;
-        case escript::SO_METHOD_PRES20:
-            return PASO_PRES20;
-        case escript::SO_METHOD_TFQMR:
-            return PASO_TFQMR;
-
-        case escript::SO_PRECONDITIONER_AMG:
-            return PASO_AMG;
-        case escript::SO_PRECONDITIONER_AMLI:
-            return PASO_AMLI;
-        case escript::SO_PRECONDITIONER_BOOMERAMG:
-            return PASO_BOOMERAMG;
-        case escript::SO_PRECONDITIONER_GAUSS_SEIDEL:
-            return PASO_GAUSS_SEIDEL;
-        case escript::SO_PRECONDITIONER_ILU0:
-            return PASO_ILU0;
-        case escript::SO_PRECONDITIONER_ILUT:
-            return PASO_ILUT;
-        case escript::SO_PRECONDITIONER_JACOBI:
-            return PASO_JACOBI;
-        case escript::SO_PRECONDITIONER_NONE:
-            return PASO_NO_PRECONDITIONER;
-        case escript::SO_PRECONDITIONER_REC_ILU:
-            return PASO_REC_ILU;
-        case escript::SO_PRECONDITIONER_RILU:
-            return PASO_RILU;
-
-        case escript::SO_ODESOLVER_BACKWARD_EULER:         
-            return PASO_BACKWARD_EULER;
-        case escript::SO_ODESOLVER_CRANK_NICOLSON:
-            return PASO_CRANK_NICOLSON;
-        case escript::SO_ODESOLVER_LINEAR_CRANK_NICOLSON:
-            return PASO_LINEAR_CRANK_NICOLSON;
-
-        case escript::SO_INTERPOLATION_CLASSIC:
-            return PASO_CLASSIC_INTERPOLATION;
-        case escript::SO_INTERPOLATION_CLASSIC_WITH_FF_COUPLING:
-            return PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING;
-        case escript::SO_INTERPOLATION_DIRECT:
-            return PASO_DIRECT_INTERPOLATION;
-
-        case escript::SO_COARSENING_AGGREGATION:
-            return PASO_AGGREGATION_COARSENING;
-        case escript::SO_COARSENING_CIJP:
-            return PASO_CIJP_COARSENING;
-        case escript::SO_COARSENING_CIJP_FIXED_RANDOM:
-            return PASO_CIJP_FIXED_RANDOM_COARSENING;
-        case escript::SO_COARSENING_FALGOUT:
-            return PASO_FALGOUT_COARSENING;
-        case escript::SO_COARSENING_HMIS:
-            return PASO_HMIS_COARSENING;
-        case escript::SO_COARSENING_PMIS:
-            return PASO_PMIS_COARSENING;
-        case escript::SO_COARSENING_RUGE_STUEBEN:
-            return PASO_RUGE_STUEBEN_COARSENING;
-        case escript::SO_COARSENING_STANDARD:
-            return PASO_STANDARD_COARSENING;   
-        case escript::SO_COARSENING_YAIR_SHAPIRA:
-            return PASO_YAIR_SHAPIRA_COARSENING;
-
-        case escript::SO_REORDERING_DEFAULT:
-            return PASO_DEFAULT_REORDERING;
-        case escript::SO_REORDERING_MINIMUM_FILL_IN:
-            return PASO_MINIMUM_FILL_IN;
-        case escript::SO_REORDERING_NESTED_DISSECTION:
-            return PASO_NESTED_DISSECTION;
-        case escript::SO_REORDERING_NONE:
-            return PASO_NO_REORDERING;
-
-        default:
-            stringstream temp;
-            temp << "Error - Cannot map option value "<< option << " onto Paso";
-            throw PasoException(temp.str());
-    }
-}
-
-int SystemMatrixAdapter::getSystemMatrixTypeId(int solver, int preconditioner,
-        int package, bool symmetry, const esysUtils::JMPI& mpiInfo)
-{
-    int out=SystemMatrix::getSystemMatrixTypeId(mapOptionToPaso(solver),
-            mapOptionToPaso(preconditioner), mapOptionToPaso(package),
-            symmetry?1:0, mpiInfo);
-    checkPasoError();
-    return out;
-}
-
-void SystemMatrixAdapter::Print_Matrix_Info(bool full=false) const
-{
-    int first_row_index = m_system_matrix->row_distribution->first_component[m_system_matrix->mpi_info->rank];
-    int last_row_index  = m_system_matrix->row_distribution->first_component[m_system_matrix->mpi_info->rank+1]-1;
-    int first_col_index = m_system_matrix->col_distribution->first_component[m_system_matrix->mpi_info->rank];
-    int last_col_index  = m_system_matrix->col_distribution->first_component[m_system_matrix->mpi_info->rank+1]-1;
-
-    std::cout << "Print_Matrix_Info running on CPU "
-        << m_system_matrix->mpi_info->rank << " of "
-        << m_system_matrix->mpi_info->size << std::endl;
-
-    switch (m_system_matrix->type) {
-        case MATRIX_FORMAT_DEFAULT:      
-            std::cout << "\tMatrix type MATRIX_FORMAT_DEFAULT" << std::endl;
-            break;
-        case MATRIX_FORMAT_CSC:
-            std::cout << "\tMatrix type MATRIX_FORMAT_CSC" << std::endl;
-            break;
-        case MATRIX_FORMAT_BLK1:
-            std::cout << "\tMatrix type MATRIX_FORMAT_BLK1" << std::endl;
-            break;
-        case MATRIX_FORMAT_OFFSET1:
-            std::cout << "\tMatrix type MATRIX_FORMAT_OFFSET1" << std::endl;
-            break;
-        case MATRIX_FORMAT_TRILINOS_CRS:
-            std::cout << "\tMatrix type MATRIX_FORMAT_TRILINOS_CRS" << std::endl;
-            break;
-        default:
-            std::cout << "\tMatrix type unknown" << std::endl;
-            break;
-    }
-
-    std::cout << "\trow indices run from " << first_row_index << " to "
-              << last_row_index << std::endl;
-    std::cout << "\tcol indices run from " << first_col_index << " to "
-              << last_col_index << std::endl;
-    std::cout << "\tmainBlock numRows " << m_system_matrix->mainBlock->numRows
-              << std::endl;
-    std::cout << "\tmainBlock numCols " << m_system_matrix->mainBlock->numCols
-              << std::endl;
-    std::cout << "\tmainBlock pattern numOutput "
-              << m_system_matrix->mainBlock->pattern->numOutput << std::endl;
-    std::cout << "\tcol_coupleBlock numRows "
-              << m_system_matrix->col_coupleBlock->numRows << std::endl;
-    std::cout << "\tcol_coupleBlock numCols "
-              << m_system_matrix->col_coupleBlock->numCols << std::endl;
-    std::cout << "\tcol_coupleBlock pattern numOutput "
-              << m_system_matrix->col_coupleBlock->pattern->numOutput
-              << std::endl;
-    std::cout << "\trow_coupleBlock numRows "
-              << m_system_matrix->row_coupleBlock->numRows << std::endl;
-    std::cout << "\trow_coupleBlock numCols "
-              << m_system_matrix->row_coupleBlock->numCols << std::endl;
-    std::cout << "\trow_coupleBlock pattern numOutput "
-              << m_system_matrix->row_coupleBlock->pattern->numOutput
-              << std::endl;
-    std::cout << "\trow_block_size " << m_system_matrix->row_block_size
-              << std::endl;
-    std::cout << "\tcol_block_size " << m_system_matrix->col_block_size
-              << std::endl;
-    std::cout << "\tblock_size " << m_system_matrix->block_size << std::endl;
-    std::cout << "\tlogical_row_block_size "
-              << m_system_matrix->logical_row_block_size << std::endl;
-    std::cout << "\tlogical_col_block_size "
-              << m_system_matrix->logical_col_block_size << std::endl;
-}
-
-void SystemMatrixAdapter::setToSolution(escript::Data& out, escript::Data& in,
-                                        boost::python::object& options) const
-{
-   Options paso_options;
-   options.attr("resetDiagnostics")();
-   escriptToPasoOptions(&paso_options,options);
-   if ( out.getDataPointSize()  != getColumnBlockSize()) {
-      throw PasoException("solve : column block size does not match the number of components of solution.");
-   } else if ( in.getDataPointSize() != getRowBlockSize()) {
-      throw PasoException("solve : row block size does not match the number of components of  right hand side.");
-   } else if ( out.getFunctionSpace()  != getColumnFunctionSpace()) {
-      throw PasoException("solve : column function space and function space of solution don't match.");
-   } else if (in.getFunctionSpace() != getRowFunctionSpace()) {
-      throw PasoException("solve : row function space and function space of right hand side don't match.");
-   }
-   out.expand();
-   in.expand();
-   out.requireWrite();
-   in.requireWrite();
-   double* out_dp=out.getSampleDataRW(0);        
-   double* in_dp=in.getSampleDataRW(0);                
-   paso::solve(m_system_matrix, out_dp, in_dp, &paso_options);
-   pasoToEscriptOptions(&paso_options,options);
-   checkPasoError();
-}
-
-void SystemMatrixAdapter::nullifyRowsAndCols(escript::Data& row_q,
-                                             escript::Data& col_q, double mdv)
-{
-   if ( col_q.getDataPointSize()  != getColumnBlockSize()) {
-      throw PasoException("nullifyRowsAndCols : column block size does not match the number of components of column mask.");
-   } else if ( row_q.getDataPointSize() != getRowBlockSize()) {
-      throw PasoException("nullifyRowsAndCols : row block size does not match the number of components of row mask.");
-   } else if ( col_q.getFunctionSpace()  != getColumnFunctionSpace()) {
-      throw PasoException("nullifyRowsAndCols : column function space and function space of column mask don't match.");
-   } else if (row_q.getFunctionSpace() != getRowFunctionSpace()) {
-      throw PasoException("nullifyRowsAndCols : row function space and function space of row mask don't match.");
-   }
-   row_q.expand();
-   col_q.expand();
-   row_q.requireWrite();
-   col_q.requireWrite();
-   double* row_q_dp=row_q.getSampleDataRW(0);
-   double* col_q_dp=col_q.getSampleDataRW(0);
-   m_system_matrix->nullifyRowsAndCols(row_q_dp, col_q_dp, mdv);
-   checkPasoError();
-}
-
-void SystemMatrixAdapter::saveMM(const std::string& filename) const
-{
-   m_system_matrix->saveMM(filename.c_str());
-   checkPasoError();
-}
-
-void SystemMatrixAdapter::saveHB(const std::string& filename) const
-{
-   m_system_matrix->saveHB(filename.c_str());
-   checkPasoError();
-}
-
-void SystemMatrixAdapter::resetValues()
-{
-   m_system_matrix->setValues(0.);
-   solve_free(m_system_matrix.get());
-   checkPasoError();
-}
-
-void SystemMatrixAdapter::pasoToEscriptOptions(const Options* paso_options,
-                                               boost::python::object& options) 
-{
-#define SET(__key__,__val__,__type__) options.attr("_updateDiagnostics")(__key__,(__type__)paso_options->__val__)
-   SET("num_iter", num_iter, int);
-   SET("num_level", num_level, int);
-   SET("num_inner_iter", num_inner_iter, int);
-   SET("time", time, double);
-   SET("set_up_time", set_up_time, double);
-   SET("net_time", net_time, double);
-   SET("residual_norm", residual_norm, double);
-   SET("converged",converged, bool);
-   SET("time_step_backtracking_used", time_step_backtracking_used,bool);
-   SET("coarse_level_sparsity",coarse_level_sparsity,double);
-   SET("num_coarse_unknowns",num_coarse_unknowns,int);
-#undef SET
-}
-
-void SystemMatrixAdapter::escriptToPasoOptions(Options* paso_options,
-                                         const boost::python::object& options) 
-{
-    escript::SolverBuddy sb = boost::python::extract<escript::SolverBuddy>(options);
-
-    paso_options->setDefaults();
-    paso_options->method = mapOptionToPaso(sb.getSolverMethod());
-    paso_options->package = mapOptionToPaso(sb.getPackage());
-    paso_options->verbose = sb.isVerbose();
-    paso_options->symmetric = sb.isSymmetric();
-    paso_options->tolerance = sb.getTolerance();
-    paso_options->absolute_tolerance = sb.getAbsoluteTolerance();
-    paso_options->inner_tolerance = sb.getInnerTolerance();
-    paso_options->adapt_inner_tolerance = sb.adaptInnerTolerance();
-    paso_options->reordering = mapOptionToPaso(sb.getReordering());
-    paso_options->preconditioner = mapOptionToPaso(sb.getPreconditioner());
-    paso_options->ode_solver = mapOptionToPaso(sb.getODESolver());
-    paso_options->iter_max = sb.getIterMax();
-    paso_options->inner_iter_max = sb.getInnerIterMax();
-    paso_options->drop_tolerance = sb.getDropTolerance();
-    paso_options->drop_storage = sb.getDropStorage();
-    paso_options->truncation = sb.getTruncation();
-    paso_options->restart = sb._getRestartForC();
-    paso_options->sweeps = sb.getNumSweeps();
-    paso_options->pre_sweeps = sb.getNumPreSweeps();
-    paso_options->post_sweeps = sb.getNumPostSweeps();
-    paso_options->level_max = sb.getLevelMax();
-    paso_options->min_coarse_matrix_size = sb.getMinCoarseMatrixSize();
-    paso_options->coarsening_threshold = sb.getCoarseningThreshold();
-    paso_options->accept_failed_convergence = sb.acceptConvergenceFailure();
-    paso_options->coarsening_method = mapOptionToPaso(sb.getCoarsening());
-    paso_options->smoother = mapOptionToPaso(sb.getSmoother());
-    paso_options->relaxation_factor = sb.getRelaxationFactor();
-    paso_options->use_local_preconditioner = sb.useLocalPreconditioner();
-    paso_options->min_coarse_sparsity = sb.getMinCoarseMatrixSparsity();
-    paso_options->refinements = sb.getNumRefinements();
-    paso_options->coarse_matrix_refinements = sb.getNumCoarseMatrixRefinements();
-    paso_options->usePanel = sb.usePanel();
-    paso_options->interpolation_method = sb.getAMGInterpolation();
-    paso_options->diagonal_dominance_threshold = sb.getDiagonalDominanceThreshold();
-}
- 
-
-}  // end of namespace
-
diff --git a/pasowrap/src/SystemMatrixAdapter.h b/pasowrap/src/SystemMatrixAdapter.h
deleted file mode 100644
index 769ff40..0000000
--- a/pasowrap/src/SystemMatrixAdapter.h
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-#if !defined  SystemMatrixAdapter_20040610_H
-#define SystemMatrixAdapter_20040610_H
-#include "system_dep.h"
-
-#include "paso/SystemMatrix.h"
-#include "paso/Options.h"
-
-#include "PasoException.h"
-
-#include "escript/AbstractSystemMatrix.h"
-#include "escript/Data.h"
-#include "escript/UtilC.h"   
-
-#include <boost/python/object.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/python/extract.hpp>
-
-namespace paso {
-
-PASOWRAP_DLL_API
-class SystemMatrixAdapter: public escript::AbstractSystemMatrix
-{
-
-/**
-   \brief
-   Wrapper for paso::SystemMatrix. 
-
-   Description:
-   Wrapper for paso::SystemMatrix.
-*/
-
- public:
-
-  /**
-     /brief
-     Default Constructor for SystemMatrixAdapter.
-     NB: Only throws an exception.
-  */
-  SystemMatrixAdapter();
-
-  /**
-     /brief
-     Constructor for SystemMatrixAdapter.
-  */
-  SystemMatrixAdapter(SystemMatrix_ptr system_matrix,
-                      int row_blocksize,
-                      const escript::FunctionSpace& row_functionspace,
-                      int column_blocksize,
-                      const escript::FunctionSpace& colum_functionspace);
-
-
-  /**
-     \brief
-     Destructor for SystemMatrixAdapter. As specified in the constructor
-     this deallocates the pointer given to the constructor.
-  */
-  ~SystemMatrixAdapter();
-
-  /**
-     \brief
-     Returns the pointer to the system matrix.
-  */
-  SystemMatrix_ptr getPaso_SystemMatrix() const;
-
-  /**
-     \brief
-     Returns the system matrix as a const AbstractSystemMatrix&.
-  */
-  inline const escript::AbstractSystemMatrix& asAbstractSystemMatrix() const
-  {
-     return dynamic_cast<const escript::AbstractSystemMatrix&>(*this);
-  }
-
-  /**
-     \brief
-     Returns a system matrix as a const SystemMatrixAdapter&.
-  */
-  inline static const SystemMatrixAdapter& asSystemMatrixAdapter(const AbstractSystemMatrix& systemmatrix)
-  {
-     return dynamic_cast<const SystemMatrixAdapter&>(systemmatrix);
-  }
-
-  /**
-    \brief
-    nullifyRowsAndCols - calls SystemMatrix::nullifyRowsAndCols.
-  */
-  void nullifyRowsAndCols(escript::Data& row_q, escript::Data& col_q, double mdv);
-
-  /**
-     \brief writes the matrix to a file using the Matrix Market file format
-  */
-  virtual void saveMM(const std::string& filename) const;
-
-  /**
-     \brief writes the matrix to a file using the Harwell-Boeing file format
-  */
-  virtual void saveHB(const std::string& filename) const;
-
-  /**
-     \brief sets the matrix entries to zero
-  */
-  virtual void resetValues();
-
-  /**
-     \brief maps escript options onto Paso options
-  */
-  static int mapOptionToPaso(int option);
-
-  /**
-     \brief returns the identifier of the matrix type to be used for the global
-     stiffness matrix when the given solver, preconditioner and package is
-     used
-  */
-  static int getSystemMatrixTypeId(int solver, int preconditioner, int package, const bool symmetry, const esysUtils::JMPI& mpiInfo);
-
-  /**
-     \brief extract paso options from SolutionOptions class
-  */
- 
-  static void escriptToPasoOptions(Options* paso_options, const boost::python::object& options);
-
-  /**
-     \brief copies diagnostic data back to the solver options
-  */
- 
-  static void pasoToEscriptOptions(const Options* paso_options, boost::python::object& options);
- 
-  /**
-     \brief prints information about a system matrix
-  */
-  void Print_Matrix_Info(bool) const;
-
- protected:
-
- private:
-
-   /**
-      \brief
-      solves the linear system this*out=in
-   */
-   virtual void setToSolution(escript::Data& out, escript::Data& in, boost::python::object& options) const;
-
-   /**
-       \brief
-       performs y+=this*x
-   */
-   virtual void ypAx(escript::Data& y, escript::Data& x) const;
-
-   //
-   // pointer to the externally created system_matrix.
-   //
-   SystemMatrix_ptr m_system_matrix;
-
-};
-
-} // end of namespace
-#endif
diff --git a/pasowrap/src/TransportProblemAdapter.cpp b/pasowrap/src/TransportProblemAdapter.cpp
deleted file mode 100644
index c8ff1ad..0000000
--- a/pasowrap/src/TransportProblemAdapter.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include "TransportProblemAdapter.h" 
-#include "SystemMatrixAdapter.h" 
-
-using namespace std;
-
-namespace paso {
-
-TransportProblemAdapter::TransportProblemAdapter()
-{
-   throw PasoException("Error - Illegal to generate default TransportProblemAdapter.");
-}
-
-TransportProblemAdapter::TransportProblemAdapter(TransportProblem_ptr tp,
-                int block_size, const escript::FunctionSpace& functionspace) :
-    AbstractTransportProblem(block_size, functionspace),
-    m_transport_problem(tp)
-{
-}
-
-TransportProblem_ptr TransportProblemAdapter::getPaso_TransportProblem() const 
-{
-   return m_transport_problem;
-}
-
-void TransportProblemAdapter::setToSolution(escript::Data& out,
-        escript::Data& u0, escript::Data& source, double dt,
-        boost::python::object& options) const
-{
-    Options paso_options;
-    SystemMatrixAdapter::escriptToPasoOptions(&paso_options, options);
-    options.attr("resetDiagnostics")();
-    if ( out.getDataPointSize() != getBlockSize()) {
-        throw PasoException("solve : block size of solution does not match block size of transport problems.");
-    } else if ( source.getDataPointSize() != getBlockSize()) {
-        throw PasoException("solve : block size of source term does not match block size of transport problems.");
-    } else if ( out.getFunctionSpace()  != getFunctionSpace()) {
-        throw PasoException("solve : function spaces of solution and of transport problem don't match.");
-    } else if (source.getFunctionSpace() != getFunctionSpace()) {
-        throw PasoException("solve : function spaces of source term and of transport problem don't match.");
-    } else if (dt<=0.) {
-        throw PasoException("solve : time increment dt needs to be positive.");
-    }
-    out.expand();
-    source.expand();
-    u0.expand();
-    out.requireWrite();
-    source.requireWrite();
-    u0.requireWrite();
-    double* out_dp = out.getSampleDataRW(0);
-    double* u0_dp = u0.getSampleDataRW(0);
-    double* source_dp = source.getSampleDataRW(0);
-    SystemMatrixAdapter::pasoToEscriptOptions(&paso_options, options);
-    m_transport_problem->solve(out_dp, dt, u0_dp, source_dp, &paso_options);
-
-    checkPasoError();
-}
-
-void TransportProblemAdapter::resetTransport() const
-{
-    m_transport_problem->reset();
-    checkPasoError();
-}
-
-void TransportProblemAdapter::copyConstraint(escript::Data& source,
-                                             escript::Data& q,
-                                             escript::Data& r) const
-{
-    if (q.getDataPointSize() != getBlockSize()) {
-        throw PasoException("copyConstraint : block size does not match the number of components of constraint mask.");
-    } else if ( q.getFunctionSpace()  != getFunctionSpace()) {
-        throw PasoException("copyConstraint : function spaces of transport problem and constraint mask don't match.");
-    } else if ( r.getDataPointSize()  != getBlockSize()) {
-        throw PasoException("copyConstraint : block size does not match the number of components of constraint values.");
-    } else if ( r.getFunctionSpace()  != getFunctionSpace()) {
-        throw PasoException("copyConstraint : function spaces of transport problem and constraint values don't match.");
-    } else if ( source.getDataPointSize()  != getBlockSize()) {
-        throw PasoException("copyConstraint : block size does not match the number of components of source.");
-    } else if ( source.getFunctionSpace()  != getFunctionSpace()) {
-        throw PasoException("copyConstraint : function spaces of transport problem and source don't match.");
-    }
-
-    if (false) {
-        // r2=r where q>0, 0 elsewhere
-        escript::Data r2(0., q.getDataPointShape(), q.getFunctionSpace());
-        r2.copyWithMask(r, q);
-
-        // source -= tp->mass_matrix*r2
-        r2.expand();
-        source.expand();
-        q.expand();
-        r2.requireWrite();
-        source.requireWrite();
-        q.requireWrite();
-        double* r2_dp = r2.getSampleDataRW(0);
-        double* source_dp = source.getSampleDataRW(0);
-        double* q_dp = q.getSampleDataRW(0);
-    
-        SystemMatrix_MatrixVector(-1., m_transport_problem->mass_matrix,
-                                  r2_dp, 1., source_dp);
-        checkPasoError();
-
-        // insert 0 rows into transport matrix
-        m_transport_problem->transport_matrix->nullifyRows(q_dp, 0.);
-        checkPasoError();
-
-        // insert 0 rows and 1 in main diagonal into mass matrix
-        m_transport_problem->mass_matrix->nullifyRowsAndCols(q_dp, q_dp, 1.);
-        checkPasoError();
-
-        source.copyWithMask(escript::Data(0.,q.getDataPointShape(),q.getFunctionSpace()),q);
-    } else {
-        r.expand();
-        source.expand();
-        q.expand();
-        r.requireWrite();
-        source.requireWrite();
-        q.requireWrite();
-        double* r_dp = r.getSampleDataRW(0);
-        double* source_dp = source.getSampleDataRW(0);
-        double* q_dp = q.getSampleDataRW(0);
-        m_transport_problem->setUpConstraint(q_dp);
-        checkPasoError();
-        m_transport_problem->insertConstraint(r_dp, source_dp);
-        checkPasoError();
-    }
-}
-
-double TransportProblemAdapter::getSafeTimeStepSize() const
-{
-    const double dt = m_transport_problem->getSafeTimeStepSize();
-    checkPasoError();
-    return dt;
-}
-
-double TransportProblemAdapter::getUnlimitedTimeStepSize() const
-{
-    return LARGE_POSITIVE_FLOAT;
-}
-
-int TransportProblemAdapter::getTransportTypeId(int solver, int preconditioner,
-                                                int package, bool symmetry,
-                                                const esysUtils::JMPI& mpiInfo)
-{
-    return TransportProblem::getTypeId(
-            SystemMatrixAdapter::mapOptionToPaso(solver),
-            SystemMatrixAdapter::mapOptionToPaso(preconditioner),
-            SystemMatrixAdapter::mapOptionToPaso(package), symmetry, mpiInfo);
-}
-
-
-}  // end of namespace
-
diff --git a/pasowrap/src/TransportProblemAdapter.h b/pasowrap/src/TransportProblemAdapter.h
deleted file mode 100644
index 9e58dc4..0000000
--- a/pasowrap/src/TransportProblemAdapter.h
+++ /dev/null
@@ -1,136 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-/* This file was extracted from finley's CPPAdapter then modified */
-
-#ifndef __PASOWRAP_TRANSPORTPROBLEMADAPTER_H__
-#define __PASOWRAP_TRANSPORTPROBLEMADAPTER_H__
-
-#include "system_dep.h"
-
-#include "paso/Transport.h"
-#include "paso/Options.h"
-
-#include "PasoException.h"
-
-#include "escript/AbstractTransportProblem.h"
-#include "escript/Data.h"
-#include "escript/UtilC.h"
-
-#include <boost/python/object.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/python/extract.hpp>
-
-namespace paso {
-
-PASOWRAP_DLL_API
-class TransportProblemAdapter : public escript::AbstractTransportProblem
-{
-
-/**
-   \brief
-   Wrapper for paso::TransportProblem. 
-
-   Description:
-   Wrapper for paso::TransportProblem.
-*/
-
-public:
-
-  /**
-     /brief
-     Default Constructor for TransportProblemAdapter.
-     NB: Only throws an exception.
-  */
-  TransportProblemAdapter();
-
-  /**
-     /brief
-     Constructor for TransportProblemAdapter.
-  */
-  TransportProblemAdapter(TransportProblem_ptr transport_problem,
-                          int block_size,
-                          const escript::FunctionSpace& functionspace);
-
-  /**
-     \brief
-     Empty destructor for TransportProblemAdapter.
-  */
-  ~TransportProblemAdapter() {}
-
-  /**
-     \brief
-     Returns the pointer to the transport problem.
-  */
-  TransportProblem_ptr getPaso_TransportProblem() const;
-
-  /**
-  *  \brief resets the transport operator typically as they have been updated.
-  */
-  virtual void resetTransport() const;
-
-  /**
-  *      \brief returns a save time step size.
-  */
-  virtual double getSafeTimeStepSize() const;
-
-  /**
-  *      \brief \brief returns the value for unlimited time step size.
-  */
-  virtual double getUnlimitedTimeStepSize() const;
-
-  /**
-     \brief
-     returns the identifier of the transport problem type to be used
-     when a particular solver, preconditioner and package is used
-  */
-  static int getTransportTypeId(const int solver, const int preconditioner,
-          const int package, const bool symmetry, const esysUtils::JMPI& mpiInfo);
-
- protected:
-
- private:
-
-    /**
-    * \brief
-    * sets solution out by time step dt.
-    */
-    virtual void setToSolution(escript::Data& out, escript::Data& u0,
-                               escript::Data& source, double dt,
-                               boost::python::object& options) const;
-   
-
-   /**
-    * \brief
-    * copy constraint u_{,t}=r where q>0  into the problem 
-    * it is assumed that q and r are not empty and has appropriate shape
-    * and function space.
-    */
-    virtual void copyConstraint(escript::Data& source, escript::Data& q,
-                                escript::Data& r) const;
-
-
-   //
-   // shared pointer to the externally created transport_problem.
-   //
-   TransportProblem_ptr m_transport_problem;
-
-};
-
-} // end of namespace
-
-#endif // __PASOWRAP_TRANSPORTPROBLEMADAPTER_H__
-
diff --git a/pasowrap/src/pasowrapcpp.cpp b/pasowrap/src/pasowrapcpp.cpp
deleted file mode 100644
index 1e74e4a..0000000
--- a/pasowrap/src/pasowrapcpp.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include <paso/Paso.h>
-
-#include "SystemMatrixAdapter.h"
-#include "TransportProblemAdapter.h"
-
-#include "PasoException.h"   
-#include "esysUtils/esysExceptionTranslator.h"
-
-#include "escript/AbstractContinuousDomain.h"
-
-#include <boost/python.hpp>
-#include <boost/python/module.hpp>
-#include <boost/python/def.hpp>
-#include <boost/python/detail/defaults_gen.hpp>
-#include <boost/version.hpp>
-
-using namespace boost::python;
-
-BOOST_PYTHON_MODULE(pasowrapcpp)
-{
-// This feature was added in boost v1.34
-#if ((BOOST_VERSION/100)%1000 > 34) || (BOOST_VERSION/100000 >1)
-  // params are: bool show_user_defined, bool show_py_signatures, bool show_cpp_signatures
-  docstring_options docopt(true, true, false);
-#endif
-
-
-  register_exception_translator<paso::PasoException>(&(esysUtils::RuntimeErrorTranslator));
-
-  class_<paso::SystemMatrixAdapter, bases<escript::AbstractSystemMatrix> >
-      ("OperatorAdapter","A concrete class representing an operator. For more details, please see the c++ documentation.", no_init)
-      .def("print_matrix_info",&paso::SystemMatrixAdapter::Print_Matrix_Info,(arg("full")=false),"prints information about a system matrix")
-      .def("nullifyRowsAndCols",&paso::SystemMatrixAdapter::nullifyRowsAndCols)
-      .def("resetValues",&paso::SystemMatrixAdapter::resetValues, "resets the matrix entries")
-      .def("saveMM",&paso::SystemMatrixAdapter::saveMM,args("fileName"), 
-"writes the matrix to a file using the Matrix Market file format")
-      .def("saveHB",&paso::SystemMatrixAdapter::saveHB, args("filename"),
-"writes the matrix to a file using the Harwell-Boeing file format");
-
-  class_<paso::TransportProblemAdapter, bases<escript::AbstractTransportProblem> >
-      ("TransportProblemAdapter","",no_init)
-      .def("getSafeTimeStepSize",&paso::TransportProblemAdapter::getSafeTimeStepSize)
-      .def("getUnlimitedTimeStepSize",&paso::TransportProblemAdapter::getUnlimitedTimeStepSize)
-      .def("resetTransport",&paso::TransportProblemAdapter::resetTransport,
-"resets the transport operator typically as they have been updated");
-}
diff --git a/pasowrap/src/system_dep.h b/pasowrap/src/system_dep.h
deleted file mode 100644
index 063613e..0000000
--- a/pasowrap/src/system_dep.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-
-/**
-\file pasowrap/src/system_dep.h
-\ingroup Other
- */
-/*
-   @(#) system_dep.h
-*/
-
-#ifndef pasowrap_system_dep_h
-#define pasowrap_system_dep_h
-
-#include <cmath>
-
-#define PASOWRAP_DLL_API
-
-#ifdef _WIN32
-
-#   ifndef PASOWRAP_STATIC_LIB
-#      undef PASOWRAP_DLL_API
-#      ifdef PASOWRAP_EXPORTS
-#         define PASOWRAP_DLL_API __declspec(dllexport)
-#      else
-#         define PASOWRAP_DLL_API __declspec(dllimport)
-#      endif
-#   endif
-#endif
-
-#endif
-
diff --git a/pycad/test/python/SConscript b/pycad/test/python/SConscript
index 775b93f..1678c08 100644
--- a/pycad/test/python/SConscript
+++ b/pycad/test/python/SConscript
@@ -48,7 +48,8 @@ if env['usempi']:
     Depends(program, env['prefix']+"/lib/pythonMPI")
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("ESCRIPT_TEST_DATA_ROOT","$BATCH_ROOT/pycad/test/python"),("ESCRIPT_WORKDIR","$BUILD_DIR/pycad/test/python")),"$BATCH_ROOT/pycad/test/python","$BATCH_ROOT/pycad/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("pycad", "$PYTHONRUNNER ", (("ESCRIPT_TEST_DATA_ROOT","$BATCH_ROOT/pycad/test/python"),("ESCRIPT_WORKDIR","$BUILD_DIR/pycad/test/python")), "$BATCH_ROOT/pycad/test/python", "$BATCH_ROOT/pycad/test/python", testruns)
+tgroup.makeDir("$BUILD_DIR/pycad/test/python")
 TestGroups.append(tgroup)
 
diff --git a/pythonMPI/src/SConscript b/pythonMPI/src/SConscript
index ec9f8d5..16e2deb 100644
--- a/pythonMPI/src/SConscript
+++ b/pythonMPI/src/SConscript
@@ -20,14 +20,13 @@ sources = ['ScriptMPI.cpp']
 sources_redirect = ['ScriptMPIredirect.cpp']
 
 local_env = env.Clone()
-local_env.Prepend(LIBS = ['esysUtils'])
+local_env.PrependUnique(LIBS = ['escript'])
 
-prog = local_env.Program('pythonMPI', sources)
-prog_redirect = local_env.Program('pythonMPIredirect', sources_redirect)
+progs = local_env.Program('pythonMPI', sources)
+progs += local_env.Program('pythonMPIredirect', sources_redirect)
 
-lib_inst1 = local_env.Install(local_env['libinstall'], prog)
-lib_inst2 = local_env.Install(local_env['libinstall'], prog_redirect)
+inst = local_env.Install(local_env['libinstall'], progs)
 
-env.Alias('build_pythonMPI', [prog, prog_redirect])
-env.Alias('install_pythonMPI', [lib_inst1, lib_inst2])
+env.Alias('build_pythonMPI', progs)
+env.Alias('install_pythonMPI', inst)
 
diff --git a/pythonMPI/src/ScriptMPI.cpp b/pythonMPI/src/ScriptMPI.cpp
index ee39f26..ae45b63 100644
--- a/pythonMPI/src/ScriptMPI.cpp
+++ b/pythonMPI/src/ScriptMPI.cpp
@@ -19,11 +19,12 @@
 #include <iostream>
 #include <stdexcept>
 
-#include "esysUtils/Esys_MPI.h"
+#include <escript/EsysMPI.h>
 
 #ifdef ESYS_MPI
 
-int main( int argc, char **argv ) {
+int main( int argc, char **argv )
+{
   int status = 0;
   int provided;
   try
@@ -125,7 +126,8 @@ int main( int argc, char **argv ) {
 
 #else
 
-int main( int argc, char **argv ) {
+int main( int argc, char **argv )
+{
 	printf( "Esys must be compiled with ESYS_MPI defined to make the MPI version available\n\n" );
 	return 0;
 }
diff --git a/pythonMPI/src/ScriptMPIredirect.cpp b/pythonMPI/src/ScriptMPIredirect.cpp
index bcf03e6..e6938b1 100644
--- a/pythonMPI/src/ScriptMPIredirect.cpp
+++ b/pythonMPI/src/ScriptMPIredirect.cpp
@@ -18,11 +18,11 @@
 #include <iostream>
 #include <stdexcept>
 
-#include "esysUtils/Esys_MPI.h"
+#include <escript/EsysMPI.h>
 
 #ifdef ESYS_MPI
 
-int main( int argc, char **argv )
+int main(int argc, char **argv)
 {
     int status = 0;
     int provided;
@@ -126,7 +126,7 @@ int main( int argc, char **argv )
 
 #else
 
-int main( int argc, char **argv )
+int main(int argc, char **argv)
 {
     std::cout << "Escript must be compiled with ESYS_MPI defined to make the MPI version available" << std::endl;
     return 0;
diff --git a/pasowrap/py_src/SConscript b/ripley/SConscript
similarity index 56%
copy from pasowrap/py_src/SConscript
copy to ripley/SConscript
index 05a35ee..1e2ca78 100644
--- a/pasowrap/py_src/SConscript
+++ b/ripley/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,19 +13,18 @@
 #
 ##############################################################################
 
+Import('env')
+if 'ripley' in env['domains']:
+    if not env['paso'] and not env['trilinos']:
+        print("Ripley requires a solver library! Please either enable Paso or Trilinos.")
+        env.Exit(1)
 
-import os
-Import('*')
-
-local_env = env.Clone()
-
-# get the source file names
-sources = Glob('*.py')
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-# compile
-pyc = local_env.PyCompile(sources)
+    # configure python module
+    env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# install
-py_inst = local_env.Install(local_env['pyinstall']+'/pasowrap', pyc)
-env.Alias('install_pasowrap_py', py_inst)
+    # configure unit tests
+    env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/ripley/py_src/SConscript b/ripley/py_src/SConscript
index a9a2ff6..f392058 100644
--- a/ripley/py_src/SConscript
+++ b/ripley/py_src/SConscript
@@ -14,9 +14,7 @@
 #
 ##############################################################################
 
-import os
 Import('*')
-
 local_env = env.Clone()
 
 # get the source file names
@@ -26,6 +24,6 @@ sources = Glob('*.py')
 pyc = local_env.PyCompile(sources)
 
 # install
-py_inst = local_env.Install(local_env['pyinstall']+'/ripley', pyc)
-env.Alias('install_ripley_py', py_inst)
+py_inst = local_env.Install(Dir('ripley', local_env['pyinstall']), pyc)
+env.Alias('install_ripley', py_inst)
 
diff --git a/ripley/src/Brick.cpp b/ripley/src/Brick.cpp
index 9bd3e6b..747576b 100644
--- a/ripley/src/Brick.cpp
+++ b/ripley/src/Brick.cpp
@@ -14,13 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-#include <esysUtils/esysFileWriter.h>
-#include <esysUtils/EsysRandom.h>
-
-#include <paso/SystemMatrix.h>
-
 #include <ripley/Brick.h>
 #include <ripley/DefaultAssembler3D.h>
 #include <ripley/LameAssembler3D.h>
@@ -28,11 +21,19 @@
 #include <ripley/blocktools.h>
 #include <ripley/domainhelpers.h>
 
-#ifdef USE_NETCDF
+#include <escript/FileWriter.h>
+#include <escript/index.h>
+#include <escript/Random.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #ifdef ESYS_MPI
 #include <pmpio.h>
@@ -47,8 +48,10 @@
 
 namespace bp = boost::python;
 namespace bm = boost::math;
-using esysUtils::FileWriter;
 using escript::AbstractSystemMatrix;
+using escript::FileWriter;
+using escript::NotImplementedError;
+using escript::ValueError;
 using std::vector;
 using std::string;
 using std::min;
@@ -84,7 +87,7 @@ Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
                 "limit may be raised in future releases.");
 
     if (n0 <= 0 || n1 <= 0 || n2 <= 0)
-        throw RipleyException("Number of elements in each spatial dimension "
+        throw ValueError("Number of elements in each spatial dimension "
                 "must be positive");
 
     // ignore subdivision parameters for serial run
@@ -107,7 +110,7 @@ Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
             }
             epr[i] = -1; // can no longer be max
             if (ranks % d[i] != 0) {
-                throw RipleyException("Invalid number of spatial subdivisions");
+                throw ValueError("Invalid number of spatial subdivisions");
             }
             //remove
             ranks /= d[i];
@@ -129,7 +132,7 @@ Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
     // ensure number of subdivisions is valid and nodes can be distributed
     // among number of ranks
     if (d0*d1*d2 != m_mpiInfo->size){
-        throw RipleyException("Invalid number of spatial subdivisions");
+        throw ValueError("Invalid number of spatial subdivisions");
     }
     if (warn) {
         std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
@@ -143,28 +146,59 @@ Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
     m_dx[1] = l1/n1;
     m_dx[2] = l2/n2;
 
+    warn = false;
     if ((n0+1)%d0 > 0) {
-        n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
-        l0=m_dx[0]*n0;
-        std::cout << "Warning: Adjusted number of elements and length. N0="
-            << n0 << ", l0=" << l0 << std::endl;
+        switch (getDecompositionPolicy()) {
+            case DECOMP_EXPAND:
+                l0 = m_dx[0]*n0; // fall through
+            case DECOMP_ADD_ELEMENTS:
+                n0 = (dim_t)round((float)(n0+1)/d0+0.5)*d0-1; // fall through
+            case DECOMP_STRICT:
+                warn = true;
+                break;
+        }
     }
     if ((n1+1)%d1 > 0) {
-        n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
-        l1=m_dx[1]*n1;
-        std::cout << "Warning: Adjusted number of elements and length. N1="
-            << n1 << ", l1=" << l1 << std::endl;
+        switch (getDecompositionPolicy()) {
+            case DECOMP_EXPAND:
+                l1 = m_dx[1]*n1; // fall through
+            case DECOMP_ADD_ELEMENTS:
+                n1 = (dim_t)round((float)(n1+1)/d1+0.5)*d1-1; // fall through
+            case DECOMP_STRICT:
+                warn = true;
+                break;
+        }
     }
     if ((n2+1)%d2 > 0) {
-        n2=(dim_t)round((float)(n2+1)/d2+0.5)*d2-1;
-        l2=m_dx[2]*n2;
-        std::cout << "Warning: Adjusted number of elements and length. N2="
-            << n2 << ", l2=" << l2 << std::endl;
+        switch (getDecompositionPolicy()) {
+            case DECOMP_EXPAND:
+                l2 = m_dx[2]*n2; // fall through
+            case DECOMP_ADD_ELEMENTS:
+                n2 = (dim_t)round((float)(n2+1)/d2+0.5)*d2-1; // fall through
+            case DECOMP_STRICT:
+                warn = true;
+                break;
+        }
     }
 
     if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
-        throw RipleyException("Too few elements for the number of ranks");
+        throw ValueError("Too few elements for the number of ranks");
 
+    if (warn) {
+        if (getDecompositionPolicy() == DECOMP_STRICT) {
+            throw ValueError("Unable to decompose domain to the number of "
+                    "MPI ranks without adding elements and the policy "
+                    "is set to STRICT. Use setDecompositionPolicy() "
+                    "to allow adding elements.");
+        } else {
+            std::cout << "Warning: Domain setup has been adjusted as follows "
+                    "to allow decomposition into " << m_mpiInfo->size
+                    << " MPI ranks:" << std::endl
+                    << "    N0=" << n0 << ", l0=" << l0 << std::endl
+                    << "    N1=" << n1 << ", l1=" << l1 << std::endl
+                    << "    N2=" << n2 << ", l2=" << l1 << std::endl;
+        }
+    }
     m_gNE[0] = n0;
     m_gNE[1] = n1;
     m_gNE[2] = n2;
@@ -248,7 +282,7 @@ bool Brick::operator==(const escript::AbstractDomain& other) const
 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
             const ReaderParameters& params) const
 {
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
     // check destination function space
     dim_t myN0, myN1, myN2;
     if (out.getFunctionSpace().getTypeCode() == Nodes) {
@@ -261,19 +295,19 @@ void Brick::readNcGrid(escript::Data& out, string filename, string varname,
         myN1 = m_NE[1];
         myN2 = m_NE[2];
     } else
-        throw RipleyException("readNcGrid(): invalid function space for output data object");
+        throw ValueError("readNcGrid(): invalid function space for output data object");
 
     if (params.first.size() != 3)
-        throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
+        throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
 
     if (params.numValues.size() != 3)
-        throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
+        throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
 
     if (params.multiplier.size() != 3)
-        throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
+        throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
     for (size_t i=0; i<params.multiplier.size(); i++)
         if (params.multiplier[i]<1)
-            throw RipleyException("readNcGrid(): all multipliers must be positive");
+            throw ValueError("readNcGrid(): all multipliers must be positive");
 
     // check file existence and size
     NcFile f(filename.c_str(), NcFile::ReadOnly);
@@ -392,10 +426,10 @@ void Brick::readNcGrid(escript::Data& out, string filename, string varname,
 #endif
 }
 
-#ifdef USE_BOOSTIO
 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
                            const ReaderParameters& params) const
 {
+#ifdef ESYS_HAVE_BOOST_IO
     // the mapping is not universally correct but should work on our
     // supported platforms
     switch (params.dataType) {
@@ -409,10 +443,12 @@ void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
             readBinaryGridZippedImpl<double>(out, filename, params);
             break;
         default:
-            throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
+            throw ValueError("readBinaryGridZipped(): invalid or unsupported datatype");
     }
-}
+#else
+    throw RipleyException("readBinaryGridZipped(): not compiled with zip support");
 #endif
+}
 
 void Brick::readBinaryGrid(escript::Data& out, string filename,
                            const ReaderParameters& params) const
@@ -430,7 +466,7 @@ void Brick::readBinaryGrid(escript::Data& out, string filename,
             readBinaryGridImpl<double>(out, filename, params);
             break;
         default:
-            throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
+            throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
     }
 }
 
@@ -450,26 +486,26 @@ void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
         myN1 = m_NE[1];
         myN2 = m_NE[2];
     } else
-        throw RipleyException("readBinaryGrid(): invalid function space for output data object");
+        throw ValueError("readBinaryGrid(): invalid function space for output data object");
 
     if (params.first.size() != 3)
-        throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
+        throw ValueError("readBinaryGrid(): argument 'first' must have 3 entries");
 
     if (params.numValues.size() != 3)
-        throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
+        throw ValueError("readBinaryGrid(): argument 'numValues' must have 3 entries");
 
     if (params.multiplier.size() != 3)
-        throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
+        throw ValueError("readBinaryGrid(): argument 'multiplier' must have 3 entries");
     for (size_t i=0; i<params.multiplier.size(); i++)
         if (params.multiplier[i]<1)
-            throw RipleyException("readBinaryGrid(): all multipliers must be positive");
+            throw ValueError("readBinaryGrid(): all multipliers must be positive");
     if (params.reverse[0] != 0 || params.reverse[1] != 0)
         throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
 
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw RipleyException("readBinaryGrid(): cannot open file");
+        throw RipleyException("readBinaryGrid(): cannot open file " + filename);
     }
     f.seekg(0, std::ios::end);
     const int numComp = out.getDataPointSize();
@@ -590,7 +626,7 @@ void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
     f.close();
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 template<typename ValueType>
 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
                                const ReaderParameters& params) const
@@ -607,24 +643,24 @@ void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
         myN1 = m_NE[1];
         myN2 = m_NE[2];
     } else
-        throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
+        throw ValueError("readBinaryGridFromZipped(): invalid function space for output data object");
 
     if (params.first.size() != 3)
-        throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
+        throw ValueError("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
 
     if (params.numValues.size() != 3)
-        throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
+        throw ValueError("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
 
     if (params.multiplier.size() != 3)
-        throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
+        throw ValueError("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
     for (size_t i=0; i<params.multiplier.size(); i++)
         if (params.multiplier[i]<1)
-            throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
+            throw ValueError("readBinaryGridFromZipped(): all multipliers must be positive");
 
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw RipleyException("readBinaryGridFromZipped(): cannot open file");
+        throw RipleyException("readBinaryGridFromZipped(): cannot open file " + filename);
     }
     f.seekg(0, std::ios::end);
     const int numComp = out.getDataPointSize();
@@ -725,7 +761,7 @@ void Brick::writeBinaryGrid(const escript::Data& in, string filename,
             writeBinaryGridImpl<double>(in, filename, byteOrder);
             break;
         default:
-            throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
+            throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
     }
 }
 
@@ -818,7 +854,7 @@ void Brick::write(const std::string& filename) const
 
 void Brick::dump(const string& fileName) const
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     string fn(fileName);
     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
         fn+=".silo";
@@ -961,7 +997,7 @@ void Brick::dump(const string& fileName) const
         DBClose(dbfile);
     }
 
-#else // USE_SILO
+#else // ESYS_HAVE_SILO
     throw RipleyException("dump: no Silo support");
 #endif
 }
@@ -989,7 +1025,7 @@ const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
 
     std::stringstream msg;
     msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 bool Brick::ownSample(int fsType, index_t id) const
@@ -1042,13 +1078,13 @@ bool Brick::ownSample(int fsType, index_t id) const
 
     std::stringstream msg;
     msg << "ownSample: invalid function space type " << fsType;
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 RankVector Brick::getOwnerVector(int fsType) const
 {
     RankVector owner;
-    const Esys_MPI_rank rank = m_mpiInfo->rank;
+    const int rank = m_mpiInfo->rank;
 
     if (fsType == Elements || fsType == ReducedElements) {
         owner.assign(getNumElements(), rank);
@@ -1149,7 +1185,7 @@ RankVector Brick::getOwnerVector(int fsType) const
             }
         }
     } else {
-        throw RipleyException("getOwnerVector: only valid for element types");
+        throw ValueError("getOwnerVector: only valid for element types");
     }
 
     return owner;
@@ -1330,7 +1366,7 @@ void Brick::setToNormal(escript::Data& out) const
         std::stringstream msg;
         msg << "setToNormal: invalid function space type "
             << out.getFunctionSpace().getTypeCode();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 }
 
@@ -1427,7 +1463,7 @@ void Brick::setToSize(escript::Data& out) const
         std::stringstream msg;
         msg << "setToSize: invalid function space type "
             << out.getFunctionSpace().getTypeCode();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 }
 
@@ -1453,9 +1489,9 @@ void Brick::assembleCoordinates(escript::Data& arg) const
 {
     int numDim = m_numDim;
     if (!arg.isDataPointShapeEqual(1, &numDim))
-        throw RipleyException("setToX: Invalid Data object shape");
+        throw ValueError("setToX: Invalid Data object shape");
     if (!arg.numSamplesEqual(1, getNumNodes()))
-        throw RipleyException("setToX: Illegal number of samples in Data object");
+        throw ValueError("setToX: Illegal number of samples in Data object");
 
     const dim_t NN0 = m_NN[0];
     const dim_t NN1 = m_NN[1];
@@ -2252,29 +2288,18 @@ void Brick::nodesToDOF(escript::Data& out, const escript::Data& in) const
     }
 }
 
+#ifdef ESYS_HAVE_TRILINOS
 //protected
-void Brick::dofToNodes(escript::Data& out, const escript::Data& in) const
+esys_trilinos::const_TrilinosGraph_ptr Brick::getTrilinosGraph() const
 {
-    const dim_t numComp = in.getDataPointSize();
-    paso::Coupler_ptr coupler(new paso::Coupler(m_connector, numComp));
-    // expand data object if necessary to be able to grab the whole data
-    const_cast<escript::Data*>(&in)->expand();
-    coupler->startCollect(in.getDataRO());
-
-    const dim_t numDOF = getNumDOF();
-    const dim_t numNodes = getNumNodes();
-    out.requireWrite();
-    const double* buffer = coupler->finishCollect();
-
-#pragma omp parallel for
-    for (index_t i=0; i<numNodes; i++) {
-        const double* src=(m_dofMap[i]<numDOF ?
-                in.getSampleDataRO(m_dofMap[i])
-                : &buffer[(m_dofMap[i]-numDOF)*numComp]);
-        std::copy(src, src+numComp, out.getSampleDataRW(i));
+    if (m_graph.is_null()) {
+        m_graph = createTrilinosGraph(m_dofId, m_nodeId);
     }
+    return m_graph;
 }
+#endif
 
+#ifdef ESYS_HAVE_PASO
 //protected
 paso::SystemMatrixPattern_ptr Brick::getPasoMatrixPattern(
                                                     bool reducedRowOrder,
@@ -2284,12 +2309,13 @@ paso::SystemMatrixPattern_ptr Brick::getPasoMatrixPattern(
         return m_pattern;
 
     // first call to this method -> create the pattern, then return it
+    paso::Connector_ptr conn(getPasoConnector());
     const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
     const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
     const dim_t nDOF2 = (m_gNE[2]+1)/m_NX[2];
     const dim_t numDOF = nDOF0*nDOF1*nDOF2;
-    const dim_t numShared = m_connector->send->numSharedComponents;
-    const index_t* sendShared = m_connector->send->shared;
+    const dim_t numShared = conn->send->numSharedComponents;
+    const index_t* sendShared = conn->send->shared;
     const int x = m_mpiInfo->rank%m_NX[0];
     const int y = m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
     const int z = m_mpiInfo->rank/(m_NX[0]*m_NX[1]);
@@ -2298,13 +2324,13 @@ paso::SystemMatrixPattern_ptr Brick::getPasoMatrixPattern(
     vector<IndexVector> colIndices(numDOF);
     vector<IndexVector> rowIndices(numShared);
 
-    for (dim_t i=0; i < m_connector->send->numNeighbors; i++) {
-        const dim_t start = m_connector->send->offsetInShared[i];
-        const dim_t end = m_connector->send->offsetInShared[i+1];
+    for (dim_t i=0; i < conn->send->neighbour.size(); i++) {
+        const dim_t start = conn->send->offsetInShared[i];
+        const dim_t end = conn->send->offsetInShared[i+1];
         // location of neighbour rank relative to this rank
-        const int xDiff = m_connector->send->neighbor[i]%m_NX[0] - x;
-        const int yDiff = m_connector->send->neighbor[i]%(m_NX[0]*m_NX[1])/m_NX[0] - y;
-        const int zDiff = m_connector->send->neighbor[i]/(m_NX[0]*m_NX[1]) - z;
+        const int xDiff = conn->send->neighbour[i]%m_NX[0] - x;
+        const int yDiff = conn->send->neighbour[i]%(m_NX[0]*m_NX[1])/m_NX[0] - y;
+        const int zDiff = conn->send->neighbour[i]/(m_NX[0]*m_NX[1]) - z;
         
         if (xDiff==0 && yDiff==0) {
             // sharing front or back plane
@@ -2425,13 +2451,13 @@ paso::SystemMatrixPattern_ptr Brick::getPasoMatrixPattern(
     paso::Pattern_ptr rowPattern = createPasoPattern(rowIndices, numDOF);
 
     // allocate paso distribution
-    paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,
-            const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0));
+    escript::Distribution_ptr distribution(new escript::Distribution(
+                                               m_mpiInfo, m_nodeDistribution));
 
     // finally create the system matrix pattern
     m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
             distribution, distribution, mainPattern, colPattern, rowPattern,
-            m_connector, m_connector));
+            conn, conn));
 
     // useful debug output
     /*
@@ -2477,6 +2503,7 @@ paso::SystemMatrixPattern_ptr Brick::getPasoMatrixPattern(
 
     return m_pattern;
 }
+#endif // ESYS_HAVE_PASO
 
 //private
 void Brick::populateSampleIds()
@@ -2727,30 +2754,62 @@ void Brick::populateSampleIds()
     populateDofMap();
 }
 
-//private
-vector<IndexVector> Brick::getConnections() const
+//protected
+vector<IndexVector> Brick::getConnections(bool includeShared) const
 {
     // returns a vector v of size numDOF where v[i] is a vector with indices
-    // of DOFs connected to i (up to 27 in 3D)
-    const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
-    const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
-    const dim_t nDOF2 = (m_gNE[2]+1)/m_NX[2];
-    const dim_t M = nDOF0*nDOF1*nDOF2;
-    vector<IndexVector> indices(M);
-
+    // of DOFs connected to i (up to 27 in 3D).
+    // In other words this method returns the occupied (local) matrix columns
+    // for all (local) matrix rows.
+    // If includeShared==true then connections to non-owned DOFs are also
+    // returned (i.e. indices of the column couplings)
+    const dim_t nDOF0 = getNumDOFInAxis(0);
+    const dim_t nDOF1 = getNumDOFInAxis(1);
+    const dim_t nDOF2 = getNumDOFInAxis(2);
+    const dim_t numMatrixRows = nDOF0*nDOF1*nDOF2;
+    vector<IndexVector> indices(numMatrixRows);
+
+    if (includeShared) {
+        const index_t left = getFirstInDim(0);
+        const index_t bottom = getFirstInDim(1);
+        const index_t front = getFirstInDim(2);
+        const dim_t NN0 = m_NN[0];
+        const dim_t NN1 = m_NN[1];
+        const dim_t NN2 = m_NN[2];
+#pragma omp parallel for
+        for (index_t i=0; i < numMatrixRows; i++) {
+            const index_t x = left + i % nDOF0;
+            const index_t y = bottom + i % (nDOF0*nDOF1)/nDOF0;
+            const index_t z = front + i / (nDOF0*nDOF1);
+            // loop through potential neighbours and add to index if positions
+            // are within bounds
+            for (int i2=z-1; i2<z+2; i2++) {
+                for (int i1=y-1; i1<y+2; i1++) {
+                    for (int i0=x-1; i0<x+2; i0++) {
+                        if (i0>=0 && i1>=0 && i2>=0
+                                && i0<NN0 && i1<NN1 && i2<NN2) {
+                            indices[i].push_back(m_dofMap[i2*NN0*NN1+i1*NN0+i0]);
+                        }
+                    }
+                }
+            }
+            sort(indices[i].begin(), indices[i].end());
+        }
+    } else {
 #pragma omp parallel for
-    for (index_t i=0; i < M; i++) {
-        const index_t x = i % nDOF0;
-        const index_t y = i % (nDOF0*nDOF1)/nDOF0;
-        const index_t z = i / (nDOF0*nDOF1);
-        // loop through potential neighbours and add to index if positions are
-        // within bounds
-        for (int i2=z-1; i2<z+2; i2++) {
-            for (int i1=y-1; i1<y+2; i1++) {
-                for (int i0=x-1; i0<x+2; i0++) {
-                    if (i0>=0 && i1>=0 && i2>=0
-                            && i0<nDOF0 && i1<nDOF1 && i2<nDOF2) {
-                        indices[i].push_back(i2*nDOF0*nDOF1 + i1*nDOF0 + i0);
+        for (index_t i=0; i < numMatrixRows; i++) {
+            const index_t x = i % nDOF0;
+            const index_t y = i % (nDOF0*nDOF1)/nDOF0;
+            const index_t z = i / (nDOF0*nDOF1);
+            // loop through potential neighbours and add to index if positions
+            // are within bounds
+            for (int i2=z-1; i2<z+2; i2++) {
+                for (int i1=y-1; i1<y+2; i1++) {
+                    for (int i0=x-1; i0<x+2; i0++) {
+                        if (i0>=0 && i1>=0 && i2>=0
+                                && i0<nDOF0 && i1<nDOF1 && i2<nDOF2) {
+                            indices[i].push_back(i2*nDOF0*nDOF1+i1*nDOF0+i0);
+                        }
                     }
                 }
             }
@@ -2762,12 +2821,12 @@ vector<IndexVector> Brick::getConnections() const
 //private
 void Brick::populateDofMap()
 {
-    const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
-    const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
-    const dim_t nDOF2 = (m_gNE[2]+1)/m_NX[2];
-    const index_t left = (m_offset[0]==0 ? 0 : 1);
-    const index_t bottom = (m_offset[1]==0 ? 0 : 1);
-    const index_t front = (m_offset[2]==0 ? 0 : 1);
+    const dim_t nDOF0 = getNumDOFInAxis(0);
+    const dim_t nDOF1 = getNumDOFInAxis(1);
+    const dim_t nDOF2 = getNumDOFInAxis(2);
+    const index_t left = getFirstInDim(0);
+    const index_t bottom = getFirstInDim(1);
+    const index_t front = getFirstInDim(2);
 
     // populate node->DOF mapping with own degrees of freedom.
     // The rest is assigned in the loop further down
@@ -2906,30 +2965,17 @@ void Brick::populateDofMap()
         }
     }
 
-    // TODO: paso::SharedComponents should take vectors to avoid this
-    Esys_MPI_rank* neighPtr = NULL;
-    index_t* sendPtr = NULL;
-    index_t* recvPtr = NULL;
-    if (neighbour.size() > 0) {
-        neighPtr = &neighbour[0];
-        sendPtr = &sendShared[0];
-        recvPtr = &recvShared[0];
-    }
-    // create connector
-    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, sendPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, recvPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+#ifdef ESYS_HAVE_PASO
+    createPasoConnector(neighbour, offsetInShared, offsetInShared, sendShared,
+                        recvShared);
+#endif
 
     // useful debug output
     /*
     std::cout << "--- rcv_shcomp ---" << std::endl;
-    std::cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << std::endl;
+    std::cout << "numDOF=" << numDOF << ", numNeighbours=" << neighbour.size() << std::endl;
     for (size_t i=0; i<neighbour.size(); i++) {
-        std::cout << "neighbor[" << i << "]=" << neighbour[i]
+        std::cout << "neighbour[" << i << "]=" << neighbour[i]
             << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << std::endl;
     }
     for (size_t i=0; i<recvShared.size(); i++) {
@@ -2947,8 +2993,9 @@ void Brick::populateDofMap()
 }
 
 //private
+template<typename Scalar>
 void Brick::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
-         const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
+         const vector<Scalar>& EM_S, const vector<Scalar>& EM_F, bool addS,
          bool addF, index_t firstNode, int nEq, int nComp) const
 {
     IndexVector rowIndex(8);
@@ -2961,20 +3008,30 @@ void Brick::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
     rowIndex[6] = m_dofMap[firstNode+m_NN[0]*(m_NN[1]+1)];
     rowIndex[7] = m_dofMap[firstNode+m_NN[0]*(m_NN[1]+1)+1];
     if (addF) {
-        double *F_p=F.getSampleDataRW(0);
-        for (index_t i=0; i<rowIndex.size(); i++) {
-            if (rowIndex[i]<getNumDOF()) {
-                for (int eq=0; eq<nEq; eq++) {
+        Scalar* F_p = F.getSampleDataRW(0, static_cast<Scalar>(0));
+        for (index_t i = 0; i < rowIndex.size(); i++) {
+            if (rowIndex[i] < getNumDOF()) {
+                for (int eq = 0; eq < nEq; eq++) {
                     F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
                 }
             }
         }
     }
     if (addS) {
-        addToSystemMatrix(S, rowIndex, nEq, EM_S);
+        addToSystemMatrix<Scalar>(S, rowIndex, nEq, EM_S);
     }
 }
 
+template
+void Brick::addToMatrixAndRHS<real_t>(AbstractSystemMatrix* S, escript::Data& F,
+         const vector<real_t>& EM_S, const vector<real_t>& EM_F, bool addS,
+         bool addF, index_t firstNode, int nEq, int nComp) const;
+
+template
+void Brick::addToMatrixAndRHS<cplx_t>(AbstractSystemMatrix* S, escript::Data& F,
+         const vector<cplx_t>& EM_S, const vector<cplx_t>& EM_F, bool addS,
+         bool addF, index_t firstNode, int nEq, int nComp) const;
+
 //protected
 void Brick::interpolateNodesOnElements(escript::Data& out,
                                        const escript::Data& in,
@@ -3345,11 +3402,11 @@ namespace
  */
 escript::Data Brick::randomFill(const escript::DataTypes::ShapeType& shape,
                                 const escript::FunctionSpace& what,
-                                long seed, const boost::python::tuple& filter) const
+                                long seed, const bp::tuple& filter) const
 {
     int numvals=escript::DataTypes::noValues(shape);
     if (len(filter) > 0 && numvals != 1) {
-        throw RipleyException("Ripley only supports filters for scalar data.");
+        throw NotImplementedError("Ripley only supports filters for scalar data.");
     }
     escript::Data res = randomFillWorker(shape, seed, filter);
     if (res.getFunctionSpace()!=what) {
@@ -3398,7 +3455,7 @@ that ripley has.
 */
 escript::Data Brick::randomFillWorker(
                         const escript::DataTypes::ShapeType& shape, long seed,
-                        const boost::python::tuple& filter) const
+                        const bp::tuple& filter) const
 {
     unsigned int radius=0;  // these are only used by gaussian
     double sigma=0.5;
@@ -3410,20 +3467,20 @@ escript::Data Brick::randomFillWorker(
     } else if (len(filter) == 3) {
         bp::extract<string> ex(filter[0]);
         if (!ex.check() || (ex() != "gaussian")) {
-            throw RipleyException("Unsupported random filter for Brick.");
+            throw ValueError("Unsupported random filter for Brick.");
         }
         bp::extract<unsigned int> ex1(filter[1]);
         if (!ex1.check()) {
-            throw RipleyException("Radius of gaussian filter must be a positive integer.");
+            throw ValueError("Radius of gaussian filter must be a positive integer.");
         }
         radius=ex1();
         sigma=0.5;
         bp::extract<double> ex2(filter[2]);
         if (!ex2.check() || (sigma=ex2()) <= 0) {
-            throw RipleyException("Sigma must be a positive floating point number.");
+            throw ValueError("Sigma must be a positive floating point number.");
         }
     } else {
-        throw RipleyException("Unsupported random filter");
+        throw ValueError("Unsupported random filter");
     }
 
     // number of points in the internal region
@@ -3438,23 +3495,23 @@ escript::Data Brick::randomFillWorker(
     // That is, would not cross multiple ranks in MPI
 
     if (2*radius>=internal[0]-4) {
-        throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
+        throw ValueError("Radius of gaussian filter is too large for X dimension of a rank");
     }
     if (2*radius>=internal[1]-4) {
-        throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
+        throw ValueError("Radius of gaussian filter is too large for Y dimension of a rank");
     }
     if (2*radius>=internal[2]-4) {
-        throw RipleyException("Radius of gaussian filter is too large for Z dimension of a rank");
+        throw ValueError("Radius of gaussian filter is too large for Z dimension of a rank");
     }
 
     double* src=new double[ext[0]*ext[1]*ext[2]*numvals];
-    esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*ext[2]*numvals);
+    escript::randomFillArray(seed, src, ext[0]*ext[1]*ext[2]*numvals);
 
 #ifdef ESYS_MPI
     if ((internal[0]<5) || (internal[1]<5) || (internal[2]<5)) {
         // since the dimensions are equal for all ranks, this exception
         // will be thrown on all ranks
-        throw RipleyException("Random Data in Ripley requires at least five elements per side per rank.");
+        throw ValueError("Random Data in Ripley requires at least five elements per side per rank.");
     }
     dim_t X=m_mpiInfo->rank%m_NX[0];
     dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
@@ -3470,9 +3527,9 @@ escript::Data Brick::randomFillWorker(
     basex=X*m_gNE[0]/m_NX[0];
     basey=Y*m_gNE[1]/m_NX[1];
     basez=Z*m_gNE[2]/m_NX[2];
-std::cout << "basex=" << basex << " basey=" << basey << " basez=" << basez << std::endl;
+    std::cout << "basex=" << basex << " basey=" << basey << " basez=" << basez << std::endl;
 #endif
-    esysUtils::patternFillArray(1, ext[0],ext[1],ext[2], src, 4, basex, basey, basez, numvals);
+    escript::patternFillArray(1, ext[0],ext[1],ext[2], src, 4, basex, basey, basez, numvals);
 */
 
 #ifdef ESYS_MPI
@@ -3536,7 +3593,7 @@ std::cout << "basex=" << basex << " basey=" << basey << " basez=" << basez << st
         escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
         escript::Data resdat(0, shape, fs , true);
         // don't need to check for exwrite because we just made it
-        escript::DataVector& dv=resdat.getExpandedVectorReference();
+        escript::DataTypes::RealVectorType& dv=resdat.getExpandedVectorReference();
 
         // now we need to copy values over
         for (size_t z=0; z < internal[2]; ++z) {
@@ -3554,7 +3611,7 @@ std::cout << "basex=" << basex << " basey=" << basey << " basez=" << basez << st
         escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
         escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
         // don't need to check for exwrite because we just made it
-        escript::DataVector& dv=resdat.getExpandedVectorReference();
+        escript::DataTypes::RealVectorType& dv=resdat.getExpandedVectorReference();
         double* convolution=get3DGauss(radius, sigma);
 
         for (size_t z=0;z<(internal[2]);++z) {
@@ -3578,10 +3635,12 @@ dim_t Brick::findNode(const double *coords) const
     //is the found element even owned by this rank
     // (inside owned or shared elements but will map to an owned element)
     for (int dim = 0; dim < m_numDim; dim++) {
+        //allows for point outside mapping onto node
         double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
-                - m_dx[dim]/2.; //allows for point outside mapping onto node
+                - m_dx[dim]/2. + escript::DataTypes::real_t_eps();
+        
         double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
-                + m_dx[dim]/2.;
+                + m_dx[dim]/2. - escript::DataTypes::real_t_eps();
         if (min > coords[dim] || max < coords[dim]) {
             return NOT_MINE;
         }
@@ -3631,8 +3690,21 @@ dim_t Brick::findNode(const double *coords) const
 
 Assembler_ptr Brick::createAssembler(string type, const DataMap& constants) const
 {
+    bool isComplex = false;
+    DataMap::const_iterator it;
+    for (it = constants.begin(); it != constants.end(); it++) {
+        if (!it->second.isEmpty() && it->second.isComplex()) {
+            isComplex = true;
+            break;
+        }
+    }
+
     if (type.compare("DefaultAssembler") == 0) {
-        return Assembler_ptr(new DefaultAssembler3D(shared_from_this(), m_dx, m_NE, m_NN));
+        if (isComplex) {
+            return Assembler_ptr(new DefaultAssembler3D<cplx_t>(shared_from_this(), m_dx, m_NE, m_NN));
+        } else {
+            return Assembler_ptr(new DefaultAssembler3D<real_t>(shared_from_this(), m_dx, m_NE, m_NN));
+        }
     } else if (type.compare("WaveAssembler") == 0) {
         return Assembler_ptr(new WaveAssembler3D(shared_from_this(), m_dx, m_NE, m_NN, constants));
     } else if (type.compare("LameAssembler") == 0) {
diff --git a/ripley/src/Brick.h b/ripley/src/Brick.h
index b9b99df..f79bc8c 100644
--- a/ripley/src/Brick.h
+++ b/ripley/src/Brick.h
@@ -17,7 +17,6 @@
 #ifndef __RIPLEY_BRICK_H__
 #define __RIPLEY_BRICK_H__
 
-#include <paso/Coupler.h>
 #include <ripley/RipleyDomain.h>
 
 namespace ripley {
@@ -28,7 +27,7 @@ namespace ripley {
 */
 class RIPLEY_DLL_API Brick: public RipleyDomain
 {
-    friend class DefaultAssembler3D;
+    template<class Scalar> friend class DefaultAssembler3D;
     friend class WaveAssembler3D;
     friend class LameAssembler3D;
 public:
@@ -88,10 +87,8 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
 
-#ifdef USE_BOOSTIO
     virtual void readBinaryGridFromZipped(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#endif
 
     /**
     */
@@ -192,6 +189,8 @@ public:
                                      long seed,
                                      const boost::python::tuple& filter) const;
 
+    virtual Assembler_ptr createAssembler(std::string type,
+                                          const DataMap& options) const;
     /**
        \brief
        returns the lengths of the domain
@@ -216,35 +215,49 @@ protected:
     virtual dim_t getNumElements() const;
     virtual dim_t getNumFaceElements() const;
     virtual dim_t getNumDOF() const;
+    virtual dim_t getNumDOFInAxis(unsigned axis) const;
+    virtual dim_t getFirstInDim(unsigned axis) const;
+
     virtual IndexVector getDiagonalIndices(bool upperOnly) const;
     virtual void assembleCoordinates(escript::Data& arg) const;
-    virtual void assembleGradient(escript::Data& out, const escript::Data& in) const;
-    virtual void assembleIntegrate(DoubleVector& integrals, const escript::Data& arg) const;
+    virtual void assembleGradient(escript::Data& out,
+                                  const escript::Data& in) const;
+    virtual void assembleIntegrate(DoubleVector& integrals,
+                                   const escript::Data& arg) const;
+    virtual std::vector<IndexVector> getConnections(bool includeShared=false) const;
+#ifdef ESYS_HAVE_TRILINOS
+    virtual esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph() const;
+#endif
+
+#ifdef ESYS_HAVE_PASO
     virtual paso::SystemMatrixPattern_ptr getPasoMatrixPattern(
                              bool reducedRowOrder, bool reducedColOrder) const;
+#endif
     virtual void interpolateNodesOnElements(escript::Data& out,
                                   const escript::Data& in, bool reduced) const;
     virtual void interpolateNodesOnFaces(escript::Data& out,
                                          const escript::Data& in,
                                          bool reduced) const;
     virtual void nodesToDOF(escript::Data& out, const escript::Data& in) const;
-    virtual void dofToNodes(escript::Data& out, const escript::Data& in) const;
     virtual dim_t getDofOfNode(dim_t node) const;
-    Assembler_ptr createAssembler(std::string type, const DataMap& constants) const;
 
     void populateSampleIds();
     void populateDofMap();
-    std::vector<IndexVector> getConnections() const;
+
+    template<typename Scalar>
     void addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
-           const DoubleVector& EM_S, const DoubleVector& EM_F,
+           const std::vector<Scalar>& EM_S, const std::vector<Scalar>& EM_F,
            bool addS, bool addF, index_t firstNode, int nEq=1, int nComp=1) const;
 
     template<typename ValueType>
     void readBinaryGridImpl(escript::Data& out, const std::string& filename,
                             const ReaderParameters& params) const;
+#ifdef ESYS_HAVE_BOOST_IO
     template<typename ValueType>
-    void readBinaryGridZippedImpl(escript::Data& out, const std::string& filename,
-                            const ReaderParameters& params) const;
+    void readBinaryGridZippedImpl(escript::Data& out,
+                                  const std::string& filename,
+                                  const ReaderParameters& params) const;
+#endif
     template<typename ValueType>
     void writeBinaryGridImpl(const escript::Data& in,
                              const std::string& filename, int byteOrder) const;
@@ -299,15 +312,18 @@ protected:
     // vector with first node id on each rank
     IndexVector m_nodeDistribution;
 
-    // vector that maps each node to a DOF index (used for the coupler)
+    // vector that maps each node to a DOF index
     IndexVector m_dofMap;
 
-    // Paso connector used by the system matrix and to interpolate DOF to
-    // nodes
-    paso::Connector_ptr m_connector;
-
+#ifdef ESYS_HAVE_PASO
     // the Paso System Matrix pattern
     mutable paso::SystemMatrixPattern_ptr m_pattern;
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// Trilinos graph structure, cached for efficiency
+    mutable esys_trilinos::const_TrilinosGraph_ptr m_graph;
+#endif
 };
 
 ////////////////////////////// inline methods ////////////////////////////////
@@ -323,8 +339,8 @@ inline dim_t Brick::getNumDataPointsGlobal() const
 
 inline double Brick::getLocalCoordinate(index_t index, int dim) const
 {
-    EsysAssert((dim>=0 && dim<3), "'dim' out of bounds");
-    EsysAssert((index>=0 && index<m_NN[dim]), "'index' out of bounds");
+    ESYS_ASSERT(dim>=0 && dim<3, "'dim' out of bounds");
+    ESYS_ASSERT(index>=0 && index<m_NN[dim], "'index' out of bounds");
     return m_origin[dim]+m_dx[dim]*(m_offset[dim]+index);
 }
 
@@ -343,6 +359,13 @@ inline dim_t Brick::getNumDOF() const
 }
 
 //protected
+inline dim_t Brick::getNumDOFInAxis(unsigned axis) const
+{
+    ESYS_ASSERT(axis < m_numDim, "Invalid axis");
+    return (m_gNE[axis]+1)/m_NX[axis];
+}
+
+//protected
 inline dim_t Brick::getNumNodes() const
 {
     return m_NN[0]*m_NN[1]*m_NN[2];
@@ -361,6 +384,12 @@ inline dim_t Brick::getNumFaceElements() const
             + m_faceCount[3] + m_faceCount[4] + m_faceCount[5];
 }
 
+//protected
+inline index_t Brick::getFirstInDim(unsigned axis) const
+{
+    return m_offset[axis] == 0 ? 0 : 1;
+}
+
 } // end of namespace ripley
 
 #endif // __RIPLEY_BRICK_H__
diff --git a/ripley/src/DefaultAssembler2D.cpp b/ripley/src/DefaultAssembler2D.cpp
index d1cda6f..5c4afe8 100644
--- a/ripley/src/DefaultAssembler2D.cpp
+++ b/ripley/src/DefaultAssembler2D.cpp
@@ -14,20 +14,21 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <ripley/DefaultAssembler2D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/DataTypes.h>
+#include <escript/index.h>
+
 using namespace std;
 using escript::AbstractSystemMatrix;
 using escript::Data;
 
 namespace ripley {
 
-void DefaultAssembler2D::collateFunctionSpaceTypes(vector<int>& fsTypes, 
-                                                   const DataMap& coefs) const
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::collateFunctionSpaceTypes(
+                             vector<int>& fsTypes, const DataMap& coefs) const
 {
     if (isNotEmpty("A", coefs))
         fsTypes.push_back(coefs.find("A")->second.getFunctionSpace().getTypeCode());
@@ -47,7 +48,8 @@ void DefaultAssembler2D::collateFunctionSpaceTypes(vector<int>& fsTypes,
 // wrappers
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESingle(AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
@@ -60,7 +62,9 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
 
 }
 
-void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySingle(
+                                        AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const 
 {
     const Data& d = unpackData("d", coefs);
@@ -68,7 +72,9 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
     assemblePDEBoundarySingle(mat, rhs, d, y);
 }
 
-void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESingleReduced(
+                                        AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
@@ -80,7 +86,8 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
     assemblePDESingleReduced(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySingleReduced(
                                         AbstractSystemMatrix* mat, Data& rhs,
                                         const DataMap& coefs) const
 {
@@ -89,7 +96,8 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
     assemblePDEBoundarySingleReduced(mat, rhs, d, y);
 }
 
-void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESystem(AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
@@ -101,7 +109,9 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
     assemblePDESystem(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySystem(
+                                        AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& d = unpackData("d", coefs);
@@ -109,7 +119,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
     assemblePDEBoundarySystem(mat, rhs, d, y);
 }
 
-void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESystemReduced(
+                                        AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs); 
@@ -121,7 +133,8 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
     assemblePDESystemReduced(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySystemReduced(
                                         AbstractSystemMatrix* mat, Data& rhs,
                                         const DataMap& coefs) const
 {
@@ -134,7 +147,8 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
 // PDE SINGLE
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESingle(AbstractSystemMatrix* mat,
                                       Data& rhs, const Data& A, const Data& B,
                                       const Data& C, const Data& D,
                                       const Data& X, const Data& Y) const
@@ -172,100 +186,101 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool addEM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4, 0);
-        vector<double> EM_F(4, 0);
+        vector<Scalar> EM_S(4*4, zero);
+        vector<Scalar> EM_F(4, zero);
 
-        for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
+        for (index_t k1_0 = 0; k1_0 < 2; k1_0++) { // colouring
 #pragma omp for
-            for (index_t k1=k1_0; k1<NE1; k1+=2) {
-                for (index_t k0=0; k0<NE0; ++k0)  {
+            for (index_t k1 = k1_0; k1 < NE1; k1+=2) {
+                for (index_t k0 = 0; k0 < NE0; ++k0)  {
                     const index_t e = k0 + NE0*k1;
                     if (addEM_S)
-                        fill(EM_S.begin(), EM_S.end(), 0);
+                        fill(EM_S.begin(), EM_S.end(), zero);
                     if (addEM_F)
-                        fill(EM_F.begin(), EM_F.end(), 0);
+                        fill(EM_F.begin(), EM_F.end(), zero);
                     ///////////////
                     // process A //
                     ///////////////
                     if (!A.isEmpty()) {
-                        const double* A_p = A.getSampleDataRO(e);
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
                         if (A.actsExpanded()) {
-                            const double A_00_0 = A_p[INDEX3(0,0,0,2,2)];
-                            const double A_01_0 = A_p[INDEX3(0,1,0,2,2)];
-                            const double A_10_0 = A_p[INDEX3(1,0,0,2,2)];
-                            const double A_11_0 = A_p[INDEX3(1,1,0,2,2)];
-                            const double A_00_1 = A_p[INDEX3(0,0,1,2,2)];
-                            const double A_01_1 = A_p[INDEX3(0,1,1,2,2)];
-                            const double A_10_1 = A_p[INDEX3(1,0,1,2,2)];
-                            const double A_11_1 = A_p[INDEX3(1,1,1,2,2)];
-                            const double A_00_2 = A_p[INDEX3(0,0,2,2,2)];
-                            const double A_01_2 = A_p[INDEX3(0,1,2,2,2)];
-                            const double A_10_2 = A_p[INDEX3(1,0,2,2,2)];
-                            const double A_11_2 = A_p[INDEX3(1,1,2,2,2)];
-                            const double A_00_3 = A_p[INDEX3(0,0,3,2,2)];
-                            const double A_01_3 = A_p[INDEX3(0,1,3,2,2)];
-                            const double A_10_3 = A_p[INDEX3(1,0,3,2,2)];
-                            const double A_11_3 = A_p[INDEX3(1,1,3,2,2)];
-                            const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                            const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                            const double tmp2 = w4*(A_00_2 + A_00_3);
-                            const double tmp3 = w0*(A_00_0 + A_00_1);
-                            const double tmp4 = w5*(A_01_2 - A_10_3);
-                            const double tmp5 = w2*(-A_01_1 + A_10_0);
-                            const double tmp6 = w5*(A_01_3 + A_10_0);
-                            const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                            const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                            const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                            const double tmp10 = w2*(-A_01_0 - A_10_3);
-                            const double tmp11 = w4*(A_00_0 + A_00_1);
-                            const double tmp12 = w0*(A_00_2 + A_00_3);
-                            const double tmp13 = w5*(A_01_1 - A_10_0);
-                            const double tmp14 = w2*(-A_01_2 + A_10_3);
-                            const double tmp15 = w7*(A_11_0 + A_11_2);
-                            const double tmp16 = w4*(-A_00_2 - A_00_3);
-                            const double tmp17 = w0*(-A_00_0 - A_00_1);
-                            const double tmp18 = w5*(A_01_3 + A_10_3);
-                            const double tmp19 = w8*(A_11_1 + A_11_3);
-                            const double tmp20 = w2*(-A_01_0 - A_10_0);
-                            const double tmp21 = w7*(A_11_1 + A_11_3);
-                            const double tmp22 = w4*(-A_00_0 - A_00_1);
-                            const double tmp23 = w0*(-A_00_2 - A_00_3);
-                            const double tmp24 = w5*(A_01_0 + A_10_0);
-                            const double tmp25 = w8*(A_11_0 + A_11_2);
-                            const double tmp26 = w2*(-A_01_3 - A_10_3);
-                            const double tmp27 = w5*(-A_01_1 - A_10_2);
-                            const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                            const double tmp29 = w2*(A_01_2 + A_10_1);
-                            const double tmp30 = w7*(-A_11_1 - A_11_3);
-                            const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                            const double tmp32 = w5*(-A_01_0 + A_10_2);
-                            const double tmp33 = w8*(-A_11_0 - A_11_2);
-                            const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                            const double tmp35 = w2*(A_01_3 - A_10_1);
-                            const double tmp36 = w5*(A_01_0 + A_10_3);
-                            const double tmp37 = w2*(-A_01_3 - A_10_0);
-                            const double tmp38 = w7*(-A_11_0 - A_11_2);
-                            const double tmp39 = w5*(-A_01_3 + A_10_1);
-                            const double tmp40 = w8*(-A_11_1 - A_11_3);
-                            const double tmp41 = w2*(A_01_0 - A_10_2);
-                            const double tmp42 = w5*(A_01_1 - A_10_3);
-                            const double tmp43 = w2*(-A_01_2 + A_10_0);
-                            const double tmp44 = w5*(A_01_2 - A_10_0);
-                            const double tmp45 = w2*(-A_01_1 + A_10_3);
-                            const double tmp46 = w5*(-A_01_0 + A_10_1);
-                            const double tmp47 = w2*(A_01_3 - A_10_2);
-                            const double tmp48 = w5*(-A_01_1 - A_10_1);
-                            const double tmp49 = w2*(A_01_2 + A_10_2);
-                            const double tmp50 = w5*(-A_01_3 + A_10_2);
-                            const double tmp51 = w2*(A_01_0 - A_10_1);
-                            const double tmp52 = w5*(-A_01_2 - A_10_1);
-                            const double tmp53 = w2*(A_01_1 + A_10_2);
-                            const double tmp54 = w5*(-A_01_2 - A_10_2);
-                            const double tmp55 = w2*(A_01_1 + A_10_1);
+                            const Scalar A_00_0 = A_p[INDEX3(0,0,0,2,2)];
+                            const Scalar A_01_0 = A_p[INDEX3(0,1,0,2,2)];
+                            const Scalar A_10_0 = A_p[INDEX3(1,0,0,2,2)];
+                            const Scalar A_11_0 = A_p[INDEX3(1,1,0,2,2)];
+                            const Scalar A_00_1 = A_p[INDEX3(0,0,1,2,2)];
+                            const Scalar A_01_1 = A_p[INDEX3(0,1,1,2,2)];
+                            const Scalar A_10_1 = A_p[INDEX3(1,0,1,2,2)];
+                            const Scalar A_11_1 = A_p[INDEX3(1,1,1,2,2)];
+                            const Scalar A_00_2 = A_p[INDEX3(0,0,2,2,2)];
+                            const Scalar A_01_2 = A_p[INDEX3(0,1,2,2,2)];
+                            const Scalar A_10_2 = A_p[INDEX3(1,0,2,2,2)];
+                            const Scalar A_11_2 = A_p[INDEX3(1,1,2,2,2)];
+                            const Scalar A_00_3 = A_p[INDEX3(0,0,3,2,2)];
+                            const Scalar A_01_3 = A_p[INDEX3(0,1,3,2,2)];
+                            const Scalar A_10_3 = A_p[INDEX3(1,0,3,2,2)];
+                            const Scalar A_11_3 = A_p[INDEX3(1,1,3,2,2)];
+                            const Scalar tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                            const Scalar tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                            const Scalar tmp2 = w4*(A_00_2 + A_00_3);
+                            const Scalar tmp3 = w0*(A_00_0 + A_00_1);
+                            const Scalar tmp4 = w5*(A_01_2 - A_10_3);
+                            const Scalar tmp5 = w2*(-A_01_1 + A_10_0);
+                            const Scalar tmp6 = w5*(A_01_3 + A_10_0);
+                            const Scalar tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                            const Scalar tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                            const Scalar tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                            const Scalar tmp10 = w2*(-A_01_0 - A_10_3);
+                            const Scalar tmp11 = w4*(A_00_0 + A_00_1);
+                            const Scalar tmp12 = w0*(A_00_2 + A_00_3);
+                            const Scalar tmp13 = w5*(A_01_1 - A_10_0);
+                            const Scalar tmp14 = w2*(-A_01_2 + A_10_3);
+                            const Scalar tmp15 = w7*(A_11_0 + A_11_2);
+                            const Scalar tmp16 = w4*(-A_00_2 - A_00_3);
+                            const Scalar tmp17 = w0*(-A_00_0 - A_00_1);
+                            const Scalar tmp18 = w5*(A_01_3 + A_10_3);
+                            const Scalar tmp19 = w8*(A_11_1 + A_11_3);
+                            const Scalar tmp20 = w2*(-A_01_0 - A_10_0);
+                            const Scalar tmp21 = w7*(A_11_1 + A_11_3);
+                            const Scalar tmp22 = w4*(-A_00_0 - A_00_1);
+                            const Scalar tmp23 = w0*(-A_00_2 - A_00_3);
+                            const Scalar tmp24 = w5*(A_01_0 + A_10_0);
+                            const Scalar tmp25 = w8*(A_11_0 + A_11_2);
+                            const Scalar tmp26 = w2*(-A_01_3 - A_10_3);
+                            const Scalar tmp27 = w5*(-A_01_1 - A_10_2);
+                            const Scalar tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                            const Scalar tmp29 = w2*(A_01_2 + A_10_1);
+                            const Scalar tmp30 = w7*(-A_11_1 - A_11_3);
+                            const Scalar tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                            const Scalar tmp32 = w5*(-A_01_0 + A_10_2);
+                            const Scalar tmp33 = w8*(-A_11_0 - A_11_2);
+                            const Scalar tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                            const Scalar tmp35 = w2*(A_01_3 - A_10_1);
+                            const Scalar tmp36 = w5*(A_01_0 + A_10_3);
+                            const Scalar tmp37 = w2*(-A_01_3 - A_10_0);
+                            const Scalar tmp38 = w7*(-A_11_0 - A_11_2);
+                            const Scalar tmp39 = w5*(-A_01_3 + A_10_1);
+                            const Scalar tmp40 = w8*(-A_11_1 - A_11_3);
+                            const Scalar tmp41 = w2*(A_01_0 - A_10_2);
+                            const Scalar tmp42 = w5*(A_01_1 - A_10_3);
+                            const Scalar tmp43 = w2*(-A_01_2 + A_10_0);
+                            const Scalar tmp44 = w5*(A_01_2 - A_10_0);
+                            const Scalar tmp45 = w2*(-A_01_1 + A_10_3);
+                            const Scalar tmp46 = w5*(-A_01_0 + A_10_1);
+                            const Scalar tmp47 = w2*(A_01_3 - A_10_2);
+                            const Scalar tmp48 = w5*(-A_01_1 - A_10_1);
+                            const Scalar tmp49 = w2*(A_01_2 + A_10_2);
+                            const Scalar tmp50 = w5*(-A_01_3 + A_10_2);
+                            const Scalar tmp51 = w2*(A_01_0 - A_10_1);
+                            const Scalar tmp52 = w5*(-A_01_2 - A_10_1);
+                            const Scalar tmp53 = w2*(A_01_1 + A_10_2);
+                            const Scalar tmp54 = w5*(-A_01_2 - A_10_2);
+                            const Scalar tmp55 = w2*(A_01_1 + A_10_1);
                             EM_S[INDEX2(0,0,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
                             EM_S[INDEX2(0,1,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
                             EM_S[INDEX2(0,2,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
@@ -283,62 +298,62 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
                             EM_S[INDEX2(3,2,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
                             EM_S[INDEX2(3,3,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
                         } else { // constant data
-                            const double A_00 = A_p[INDEX2(0,0,2)];
-                            const double A_01 = A_p[INDEX2(0,1,2)];
-                            const double A_10 = A_p[INDEX2(1,0,2)];
-                            const double A_11 = A_p[INDEX2(1,1,2)];
-                            const double tmp0 = 6*w1*(A_01 - A_10);
-                            const double tmp1 = 6*w1*(A_01 + A_10);
-                            const double tmp2 = 6*w1*(-A_01 - A_10);
-                            const double tmp3 = 6*w1*(-A_01 + A_10);
-                            EM_S[INDEX2(0,0,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
-                            EM_S[INDEX2(0,1,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
-                            EM_S[INDEX2(0,2,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
-                            EM_S[INDEX2(0,3,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
-                            EM_S[INDEX2(1,0,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
-                            EM_S[INDEX2(1,1,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
-                            EM_S[INDEX2(1,2,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
-                            EM_S[INDEX2(1,3,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
-                            EM_S[INDEX2(2,0,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
-                            EM_S[INDEX2(2,1,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
-                            EM_S[INDEX2(2,2,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
-                            EM_S[INDEX2(2,3,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
-                            EM_S[INDEX2(3,0,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
-                            EM_S[INDEX2(3,1,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
-                            EM_S[INDEX2(3,2,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
-                            EM_S[INDEX2(3,3,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
+                            const Scalar A_00 = A_p[INDEX2(0,0,2)];
+                            const Scalar A_01 = A_p[INDEX2(0,1,2)];
+                            const Scalar A_10 = A_p[INDEX2(1,0,2)];
+                            const Scalar A_11 = A_p[INDEX2(1,1,2)];
+                            const Scalar tmp0 = 6.*w1*(A_01 - A_10);
+                            const Scalar tmp1 = 6.*w1*(A_01 + A_10);
+                            const Scalar tmp2 = 6.*w1*(-A_01 - A_10);
+                            const Scalar tmp3 = 6.*w1*(-A_01 + A_10);
+                            EM_S[INDEX2(0,0,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp1;
+                            EM_S[INDEX2(0,1,4)]+=8.*A_00*w6 + 4.*A_11*w3 + tmp0;
+                            EM_S[INDEX2(0,2,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp3;
+                            EM_S[INDEX2(0,3,4)]+=4.*A_00*w6 - 4.*A_11*w3 + tmp2;
+                            EM_S[INDEX2(1,0,4)]+=8.*A_00*w6 + 4.*A_11*w3 + tmp3;
+                            EM_S[INDEX2(1,1,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp2;
+                            EM_S[INDEX2(1,2,4)]+=4.*A_00*w6 - 4.*A_11*w3 + tmp1;
+                            EM_S[INDEX2(1,3,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp0;
+                            EM_S[INDEX2(2,0,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp0;
+                            EM_S[INDEX2(2,1,4)]+=4.*A_00*w6 - 4.*A_11*w3 + tmp1;
+                            EM_S[INDEX2(2,2,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp2;
+                            EM_S[INDEX2(2,3,4)]+=8.*A_00*w6 + 4.*A_11*w3 + tmp3;
+                            EM_S[INDEX2(3,0,4)]+=4.*A_00*w6 - 4.*A_11*w3 + tmp2;
+                            EM_S[INDEX2(3,1,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp3;
+                            EM_S[INDEX2(3,2,4)]+=8.*A_00*w6 + 4.*A_11*w3 + tmp0;
+                            EM_S[INDEX2(3,3,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp1;
                         }
                     }
                     ///////////////
                     // process B //
                     ///////////////
                     if (!B.isEmpty()) {
-                        const double* B_p=B.getSampleDataRO(e);
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
                         if (B.actsExpanded()) {
-                            const double B_0_0 = B_p[INDEX2(0,0,2)];
-                            const double B_1_0 = B_p[INDEX2(1,0,2)];
-                            const double B_0_1 = B_p[INDEX2(0,1,2)];
-                            const double B_1_1 = B_p[INDEX2(1,1,2)];
-                            const double B_0_2 = B_p[INDEX2(0,2,2)];
-                            const double B_1_2 = B_p[INDEX2(1,2,2)];
-                            const double B_0_3 = B_p[INDEX2(0,3,2)];
-                            const double B_1_3 = B_p[INDEX2(1,3,2)];
-                            const double tmp0 = w11*(B_1_0 + B_1_1);
-                            const double tmp1 = w14*(B_1_2 + B_1_3);
-                            const double tmp2 = w15*(-B_0_1 - B_0_3);
-                            const double tmp3 = w10*(-B_0_0 - B_0_2);
-                            const double tmp4 = w11*(B_1_2 + B_1_3);
-                            const double tmp5 = w14*(B_1_0 + B_1_1);
-                            const double tmp6 = w11*(-B_1_2 - B_1_3);
-                            const double tmp7 = w14*(-B_1_0 - B_1_1);
-                            const double tmp8 = w11*(-B_1_0 - B_1_1);
-                            const double tmp9 = w14*(-B_1_2 - B_1_3);
-                            const double tmp10 = w10*(-B_0_1 - B_0_3);
-                            const double tmp11 = w15*(-B_0_0 - B_0_2);
-                            const double tmp12 = w15*(B_0_0 + B_0_2);
-                            const double tmp13 = w10*(B_0_1 + B_0_3);
-                            const double tmp14 = w10*(B_0_0 + B_0_2);
-                            const double tmp15 = w15*(B_0_1 + B_0_3);
+                            const Scalar B_0_0 = B_p[INDEX2(0,0,2)];
+                            const Scalar B_1_0 = B_p[INDEX2(1,0,2)];
+                            const Scalar B_0_1 = B_p[INDEX2(0,1,2)];
+                            const Scalar B_1_1 = B_p[INDEX2(1,1,2)];
+                            const Scalar B_0_2 = B_p[INDEX2(0,2,2)];
+                            const Scalar B_1_2 = B_p[INDEX2(1,2,2)];
+                            const Scalar B_0_3 = B_p[INDEX2(0,3,2)];
+                            const Scalar B_1_3 = B_p[INDEX2(1,3,2)];
+                            const Scalar tmp0 = w11*(B_1_0 + B_1_1);
+                            const Scalar tmp1 = w14*(B_1_2 + B_1_3);
+                            const Scalar tmp2 = w15*(-B_0_1 - B_0_3);
+                            const Scalar tmp3 = w10*(-B_0_0 - B_0_2);
+                            const Scalar tmp4 = w11*(B_1_2 + B_1_3);
+                            const Scalar tmp5 = w14*(B_1_0 + B_1_1);
+                            const Scalar tmp6 = w11*(-B_1_2 - B_1_3);
+                            const Scalar tmp7 = w14*(-B_1_0 - B_1_1);
+                            const Scalar tmp8 = w11*(-B_1_0 - B_1_1);
+                            const Scalar tmp9 = w14*(-B_1_2 - B_1_3);
+                            const Scalar tmp10 = w10*(-B_0_1 - B_0_3);
+                            const Scalar tmp11 = w15*(-B_0_0 - B_0_2);
+                            const Scalar tmp12 = w15*(B_0_0 + B_0_2);
+                            const Scalar tmp13 = w10*(B_0_1 + B_0_3);
+                            const Scalar tmp14 = w10*(B_0_0 + B_0_2);
+                            const Scalar tmp15 = w15*(B_0_1 + B_0_3);
                             EM_S[INDEX2(0,0,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
                             EM_S[INDEX2(0,1,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
                             EM_S[INDEX2(0,2,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
@@ -356,56 +371,56 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
                             EM_S[INDEX2(3,2,4)]+=-B_0_0*w15 - B_0_1*w13 - B_0_2*w12 - B_0_3*w10 + tmp6 + tmp7;
                             EM_S[INDEX2(3,3,4)]+=-B_0_0*w13 - B_0_1*w15 - B_0_2*w10 - B_0_3*w12 - B_1_0*w17 - B_1_1*w11 - B_1_2*w14 - B_1_3*w16;
                         } else { // constant data
-                            const double B_0 = B_p[0];
-                            const double B_1 = B_p[1];
-                            EM_S[INDEX2(0,0,4)]+= 2*B_0*w18 + 2*B_1*w19;
-                            EM_S[INDEX2(0,1,4)]+= 2*B_0*w18 +   B_1*w19;
-                            EM_S[INDEX2(0,2,4)]+=   B_0*w18 + 2*B_1*w19;
-                            EM_S[INDEX2(0,3,4)]+=   B_0*w18 +   B_1*w19;
-                            EM_S[INDEX2(1,0,4)]+=-2*B_0*w18 +   B_1*w19;
-                            EM_S[INDEX2(1,1,4)]+=-2*B_0*w18 + 2*B_1*w19;
-                            EM_S[INDEX2(1,2,4)]+=  -B_0*w18 +   B_1*w19;
-                            EM_S[INDEX2(1,3,4)]+=  -B_0*w18 + 2*B_1*w19;
-                            EM_S[INDEX2(2,0,4)]+=   B_0*w18 - 2*B_1*w19;
-                            EM_S[INDEX2(2,1,4)]+=   B_0*w18 -   B_1*w19;
-                            EM_S[INDEX2(2,2,4)]+= 2*B_0*w18 - 2*B_1*w19;
-                            EM_S[INDEX2(2,3,4)]+= 2*B_0*w18 -   B_1*w19;
-                            EM_S[INDEX2(3,0,4)]+=  -B_0*w18 -   B_1*w19;
-                            EM_S[INDEX2(3,1,4)]+=  -B_0*w18 - 2*B_1*w19;
-                            EM_S[INDEX2(3,2,4)]+=-2*B_0*w18 -   B_1*w19;
-                            EM_S[INDEX2(3,3,4)]+=-2*B_0*w18 - 2*B_1*w19;
+                            const Scalar B_0 = B_p[0];
+                            const Scalar B_1 = B_p[1];
+                            EM_S[INDEX2(0,0,4)]+= 2.*B_0*w18 + 2.*B_1*w19;
+                            EM_S[INDEX2(0,1,4)]+= 2.*B_0*w18 +    B_1*w19;
+                            EM_S[INDEX2(0,2,4)]+=    B_0*w18 + 2.*B_1*w19;
+                            EM_S[INDEX2(0,3,4)]+=    B_0*w18 +    B_1*w19;
+                            EM_S[INDEX2(1,0,4)]+=-2.*B_0*w18 +    B_1*w19;
+                            EM_S[INDEX2(1,1,4)]+=-2.*B_0*w18 + 2.*B_1*w19;
+                            EM_S[INDEX2(1,2,4)]+=   -B_0*w18 +    B_1*w19;
+                            EM_S[INDEX2(1,3,4)]+=   -B_0*w18 + 2.*B_1*w19;
+                            EM_S[INDEX2(2,0,4)]+=    B_0*w18 - 2.*B_1*w19;
+                            EM_S[INDEX2(2,1,4)]+=    B_0*w18 -    B_1*w19;
+                            EM_S[INDEX2(2,2,4)]+= 2.*B_0*w18 - 2.*B_1*w19;
+                            EM_S[INDEX2(2,3,4)]+= 2.*B_0*w18 -    B_1*w19;
+                            EM_S[INDEX2(3,0,4)]+=   -B_0*w18 -    B_1*w19;
+                            EM_S[INDEX2(3,1,4)]+=   -B_0*w18 - 2.*B_1*w19;
+                            EM_S[INDEX2(3,2,4)]+=-2.*B_0*w18 -    B_1*w19;
+                            EM_S[INDEX2(3,3,4)]+=-2.*B_0*w18 - 2.*B_1*w19;
                         }
                     }
                     ///////////////
                     // process C //
                     ///////////////
                     if (!C.isEmpty()) {
-                        const double* C_p=C.getSampleDataRO(e);
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
                         if (C.actsExpanded()) {
-                            const double C_0_0 = C_p[INDEX2(0,0,2)];
-                            const double C_1_0 = C_p[INDEX2(1,0,2)];
-                            const double C_0_1 = C_p[INDEX2(0,1,2)];
-                            const double C_1_1 = C_p[INDEX2(1,1,2)];
-                            const double C_0_2 = C_p[INDEX2(0,2,2)];
-                            const double C_1_2 = C_p[INDEX2(1,2,2)];
-                            const double C_0_3 = C_p[INDEX2(0,3,2)];
-                            const double C_1_3 = C_p[INDEX2(1,3,2)];
-                            const double tmp0 = w11*(C_1_0 + C_1_1);
-                            const double tmp1 = w14*(C_1_2 + C_1_3);
-                            const double tmp2 = w15*(C_0_0 + C_0_2);
-                            const double tmp3 = w10*(C_0_1 + C_0_3);
-                            const double tmp4 = w11*(-C_1_0 - C_1_1);
-                            const double tmp5 = w14*(-C_1_2 - C_1_3);
-                            const double tmp6 = w11*(-C_1_2 - C_1_3);
-                            const double tmp7 = w14*(-C_1_0 - C_1_1);
-                            const double tmp8 = w11*(C_1_2 + C_1_3);
-                            const double tmp9 = w14*(C_1_0 + C_1_1);
-                            const double tmp10 = w10*(-C_0_1 - C_0_3);
-                            const double tmp11 = w15*(-C_0_0 - C_0_2);
-                            const double tmp12 = w15*(-C_0_1 - C_0_3);
-                            const double tmp13 = w10*(-C_0_0 - C_0_2);
-                            const double tmp14 = w10*(C_0_0 + C_0_2);
-                            const double tmp15 = w15*(C_0_1 + C_0_3);
+                            const Scalar C_0_0 = C_p[INDEX2(0,0,2)];
+                            const Scalar C_1_0 = C_p[INDEX2(1,0,2)];
+                            const Scalar C_0_1 = C_p[INDEX2(0,1,2)];
+                            const Scalar C_1_1 = C_p[INDEX2(1,1,2)];
+                            const Scalar C_0_2 = C_p[INDEX2(0,2,2)];
+                            const Scalar C_1_2 = C_p[INDEX2(1,2,2)];
+                            const Scalar C_0_3 = C_p[INDEX2(0,3,2)];
+                            const Scalar C_1_3 = C_p[INDEX2(1,3,2)];
+                            const Scalar tmp0 = w11*(C_1_0 + C_1_1);
+                            const Scalar tmp1 = w14*(C_1_2 + C_1_3);
+                            const Scalar tmp2 = w15*(C_0_0 + C_0_2);
+                            const Scalar tmp3 = w10*(C_0_1 + C_0_3);
+                            const Scalar tmp4 = w11*(-C_1_0 - C_1_1);
+                            const Scalar tmp5 = w14*(-C_1_2 - C_1_3);
+                            const Scalar tmp6 = w11*(-C_1_2 - C_1_3);
+                            const Scalar tmp7 = w14*(-C_1_0 - C_1_1);
+                            const Scalar tmp8 = w11*(C_1_2 + C_1_3);
+                            const Scalar tmp9 = w14*(C_1_0 + C_1_1);
+                            const Scalar tmp10 = w10*(-C_0_1 - C_0_3);
+                            const Scalar tmp11 = w15*(-C_0_0 - C_0_2);
+                            const Scalar tmp12 = w15*(-C_0_1 - C_0_3);
+                            const Scalar tmp13 = w10*(-C_0_0 - C_0_2);
+                            const Scalar tmp14 = w10*(C_0_0 + C_0_2);
+                            const Scalar tmp15 = w15*(C_0_1 + C_0_3);
                             EM_S[INDEX2(0,0,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
                             EM_S[INDEX2(0,1,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
                             EM_S[INDEX2(0,2,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
@@ -423,47 +438,47 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
                             EM_S[INDEX2(3,2,4)]+=C_0_0*w13 + C_0_1*w15 + C_0_2*w10 + C_0_3*w12 + tmp6 + tmp7;
                             EM_S[INDEX2(3,3,4)]+=-C_0_0*w13 - C_0_1*w15 - C_0_2*w10 - C_0_3*w12 - C_1_0*w17 - C_1_1*w11 - C_1_2*w14 - C_1_3*w16;
                         } else { // constant data
-                            const double C_0 = C_p[0];
-                            const double C_1 = C_p[1];
-                            EM_S[INDEX2(0,0,4)]+= 2*C_0*w18 + 2*C_1*w19;
-                            EM_S[INDEX2(0,1,4)]+=-2*C_0*w18 +   C_1*w19;
-                            EM_S[INDEX2(0,2,4)]+=   C_0*w18 - 2*C_1*w19;
-                            EM_S[INDEX2(0,3,4)]+=  -C_0*w18 -   C_1*w19;
-                            EM_S[INDEX2(1,0,4)]+= 2*C_0*w18 +   C_1*w19;
-                            EM_S[INDEX2(1,1,4)]+=-2*C_0*w18 + 2*C_1*w19;
-                            EM_S[INDEX2(1,2,4)]+=   C_0*w18 -   C_1*w19;
-                            EM_S[INDEX2(1,3,4)]+=  -C_0*w18 - 2*C_1*w19;
-                            EM_S[INDEX2(2,0,4)]+=   C_0*w18 + 2*C_1*w19;
-                            EM_S[INDEX2(2,1,4)]+=  -C_0*w18 +   C_1*w19;
-                            EM_S[INDEX2(2,2,4)]+= 2*C_0*w18 - 2*C_1*w19;
-                            EM_S[INDEX2(2,3,4)]+=-2*C_0*w18 -   C_1*w19;
-                            EM_S[INDEX2(3,0,4)]+=   C_0*w18 +   C_1*w19;
-                            EM_S[INDEX2(3,1,4)]+=  -C_0*w18 + 2*C_1*w19;
-                            EM_S[INDEX2(3,2,4)]+= 2*C_0*w18 -   C_1*w19;
-                            EM_S[INDEX2(3,3,4)]+=-2*C_0*w18 - 2*C_1*w19;
+                            const Scalar C_0 = C_p[0];
+                            const Scalar C_1 = C_p[1];
+                            EM_S[INDEX2(0,0,4)]+= 2.*C_0*w18 + 2.*C_1*w19;
+                            EM_S[INDEX2(0,1,4)]+=-2.*C_0*w18 +    C_1*w19;
+                            EM_S[INDEX2(0,2,4)]+=    C_0*w18 - 2.*C_1*w19;
+                            EM_S[INDEX2(0,3,4)]+=   -C_0*w18 -    C_1*w19;
+                            EM_S[INDEX2(1,0,4)]+= 2.*C_0*w18 +    C_1*w19;
+                            EM_S[INDEX2(1,1,4)]+=-2.*C_0*w18 + 2.*C_1*w19;
+                            EM_S[INDEX2(1,2,4)]+=    C_0*w18 -    C_1*w19;
+                            EM_S[INDEX2(1,3,4)]+=   -C_0*w18 - 2.*C_1*w19;
+                            EM_S[INDEX2(2,0,4)]+=    C_0*w18 + 2.*C_1*w19;
+                            EM_S[INDEX2(2,1,4)]+=   -C_0*w18 +    C_1*w19;
+                            EM_S[INDEX2(2,2,4)]+= 2.*C_0*w18 - 2.*C_1*w19;
+                            EM_S[INDEX2(2,3,4)]+=-2.*C_0*w18 -    C_1*w19;
+                            EM_S[INDEX2(3,0,4)]+=    C_0*w18 +    C_1*w19;
+                            EM_S[INDEX2(3,1,4)]+=   -C_0*w18 + 2.*C_1*w19;
+                            EM_S[INDEX2(3,2,4)]+= 2.*C_0*w18 -    C_1*w19;
+                            EM_S[INDEX2(3,3,4)]+=-2.*C_0*w18 - 2.*C_1*w19;
                         }
                     }
                     ///////////////
                     // process D //
                     ///////////////
                     if (!D.isEmpty()) {
-                        const double* D_p=D.getSampleDataRO(e);
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
                         if (D.actsExpanded()) {
-                            const double D_0 = D_p[0];
-                            const double D_1 = D_p[1];
-                            const double D_2 = D_p[2];
-                            const double D_3 = D_p[3];
-                            const double tmp0 = w21*(D_2 + D_3);
-                            const double tmp1 = w20*(D_0 + D_1);
-                            const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
-                            const double tmp3 = w21*(D_0 + D_1);
-                            const double tmp4 = w20*(D_2 + D_3);
-                            const double tmp5 = w22*(D_1 + D_2);
-                            const double tmp6 = w21*(D_0 + D_2);
-                            const double tmp7 = w20*(D_1 + D_3);
-                            const double tmp8 = w21*(D_1 + D_3);
-                            const double tmp9 = w20*(D_0 + D_2);
-                            const double tmp10 = w22*(D_0 + D_3);
+                            const Scalar D_0 = D_p[0];
+                            const Scalar D_1 = D_p[1];
+                            const Scalar D_2 = D_p[2];
+                            const Scalar D_3 = D_p[3];
+                            const Scalar tmp0 = w21*(D_2 + D_3);
+                            const Scalar tmp1 = w20*(D_0 + D_1);
+                            const Scalar tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
+                            const Scalar tmp3 = w21*(D_0 + D_1);
+                            const Scalar tmp4 = w20*(D_2 + D_3);
+                            const Scalar tmp5 = w22*(D_1 + D_2);
+                            const Scalar tmp6 = w21*(D_0 + D_2);
+                            const Scalar tmp7 = w20*(D_1 + D_3);
+                            const Scalar tmp8 = w21*(D_1 + D_3);
+                            const Scalar tmp9 = w20*(D_0 + D_2);
+                            const Scalar tmp10 = w22*(D_0 + D_3);
                             EM_S[INDEX2(0,0,4)]+=D_0*w23 + D_3*w24 + tmp5;
                             EM_S[INDEX2(0,1,4)]+=tmp0 + tmp1;
                             EM_S[INDEX2(0,2,4)]+=tmp8 + tmp9;
@@ -481,94 +496,94 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
                             EM_S[INDEX2(3,2,4)]+=tmp3 + tmp4;
                             EM_S[INDEX2(3,3,4)]+=D_0*w24 + D_3*w23 + tmp5;
                         } else { // constant data
-                            const double D_0 = D_p[0];
-                            EM_S[INDEX2(0,0,4)]+=16*D_0*w22;
-                            EM_S[INDEX2(0,1,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(0,2,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(0,3,4)]+=4*D_0*w22;
-                            EM_S[INDEX2(1,0,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(1,1,4)]+=16*D_0*w22;
-                            EM_S[INDEX2(1,2,4)]+=4*D_0*w22;
-                            EM_S[INDEX2(1,3,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(2,0,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(2,1,4)]+=4*D_0*w22;
-                            EM_S[INDEX2(2,2,4)]+=16*D_0*w22;
-                            EM_S[INDEX2(2,3,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(3,0,4)]+=4*D_0*w22;
-                            EM_S[INDEX2(3,1,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(3,2,4)]+=8*D_0*w22;
-                            EM_S[INDEX2(3,3,4)]+=16*D_0*w22;
+                            const Scalar D_0 = D_p[0];
+                            EM_S[INDEX2(0,0,4)]+=16.*D_0*w22;
+                            EM_S[INDEX2(0,1,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(0,2,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(0,3,4)]+= 4.*D_0*w22;
+                            EM_S[INDEX2(1,0,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(1,1,4)]+=16.*D_0*w22;
+                            EM_S[INDEX2(1,2,4)]+= 4.*D_0*w22;
+                            EM_S[INDEX2(1,3,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(2,0,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(2,1,4)]+= 4.*D_0*w22;
+                            EM_S[INDEX2(2,2,4)]+=16.*D_0*w22;
+                            EM_S[INDEX2(2,3,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(3,0,4)]+= 4.*D_0*w22;
+                            EM_S[INDEX2(3,1,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(3,2,4)]+= 8.*D_0*w22;
+                            EM_S[INDEX2(3,3,4)]+=16.*D_0*w22;
                         }
                     }
                     ///////////////
                     // process X //
                     ///////////////
                     if (!X.isEmpty()) {
-                        const double* X_p=X.getSampleDataRO(e);
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
                         if (X.actsExpanded()) {
-                            const double X_0_0 = X_p[INDEX2(0,0,2)];
-                            const double X_1_0 = X_p[INDEX2(1,0,2)];
-                            const double X_0_1 = X_p[INDEX2(0,1,2)];
-                            const double X_1_1 = X_p[INDEX2(1,1,2)];
-                            const double X_0_2 = X_p[INDEX2(0,2,2)];
-                            const double X_1_2 = X_p[INDEX2(1,2,2)];
-                            const double X_0_3 = X_p[INDEX2(0,3,2)];
-                            const double X_1_3 = X_p[INDEX2(1,3,2)];
-                            const double tmp0 = 6*w15*(X_0_2 + X_0_3);
-                            const double tmp1 = 6*w10*(X_0_0 + X_0_1);
-                            const double tmp2 = 6*w11*(X_1_0 + X_1_2);
-                            const double tmp3 = 6*w14*(X_1_1 + X_1_3);
-                            const double tmp4 = 6*w11*(X_1_1 + X_1_3);
-                            const double tmp5 = w25*(X_0_0 + X_0_1);
-                            const double tmp6 = w26*(X_0_2 + X_0_3);
-                            const double tmp7 = 6*w14*(X_1_0 + X_1_2);
-                            const double tmp8 = w27*(X_1_0 + X_1_2);
-                            const double tmp9 = w28*(X_1_1 + X_1_3);
-                            const double tmp10 = w25*(-X_0_2 - X_0_3);
-                            const double tmp11 = w26*(-X_0_0 - X_0_1);
-                            const double tmp12 = w27*(X_1_1 + X_1_3);
-                            const double tmp13 = w28*(X_1_0 + X_1_2);
-                            const double tmp14 = w25*(X_0_2 + X_0_3);
-                            const double tmp15 = w26*(X_0_0 + X_0_1);
+                            const Scalar X_0_0 = X_p[INDEX2(0,0,2)];
+                            const Scalar X_1_0 = X_p[INDEX2(1,0,2)];
+                            const Scalar X_0_1 = X_p[INDEX2(0,1,2)];
+                            const Scalar X_1_1 = X_p[INDEX2(1,1,2)];
+                            const Scalar X_0_2 = X_p[INDEX2(0,2,2)];
+                            const Scalar X_1_2 = X_p[INDEX2(1,2,2)];
+                            const Scalar X_0_3 = X_p[INDEX2(0,3,2)];
+                            const Scalar X_1_3 = X_p[INDEX2(1,3,2)];
+                            const Scalar tmp0 = 6.*w15*(X_0_2 + X_0_3);
+                            const Scalar tmp1 = 6.*w10*(X_0_0 + X_0_1);
+                            const Scalar tmp2 = 6.*w11*(X_1_0 + X_1_2);
+                            const Scalar tmp3 = 6.*w14*(X_1_1 + X_1_3);
+                            const Scalar tmp4 = 6.*w11*(X_1_1 + X_1_3);
+                            const Scalar tmp5 = w25*(X_0_0 + X_0_1);
+                            const Scalar tmp6 = w26*(X_0_2 + X_0_3);
+                            const Scalar tmp7 = 6.*w14*(X_1_0 + X_1_2);
+                            const Scalar tmp8 = w27*(X_1_0 + X_1_2);
+                            const Scalar tmp9 = w28*(X_1_1 + X_1_3);
+                            const Scalar tmp10 = w25*(-X_0_2 - X_0_3);
+                            const Scalar tmp11 = w26*(-X_0_0 - X_0_1);
+                            const Scalar tmp12 = w27*(X_1_1 + X_1_3);
+                            const Scalar tmp13 = w28*(X_1_0 + X_1_2);
+                            const Scalar tmp14 = w25*(X_0_2 + X_0_3);
+                            const Scalar tmp15 = w26*(X_0_0 + X_0_1);
                             EM_F[0]+=tmp0 + tmp1 + tmp2 + tmp3;
                             EM_F[1]+=tmp4 + tmp5 + tmp6 + tmp7;
                             EM_F[2]+=tmp10 + tmp11 + tmp8 + tmp9;
                             EM_F[3]+=tmp12 + tmp13 + tmp14 + tmp15;
                         } else { // constant data
-                            const double X_0 = X_p[0];
-                            const double X_1 = X_p[1];
-                            EM_F[0]+= 6*X_0*w18 + 6*X_1*w19;
-                            EM_F[1]+=-6*X_0*w18 + 6*X_1*w19;
-                            EM_F[2]+= 6*X_0*w18 - 6*X_1*w19;
-                            EM_F[3]+=-6*X_0*w18 - 6*X_1*w19;
+                            const Scalar X_0 = X_p[0];
+                            const Scalar X_1 = X_p[1];
+                            EM_F[0]+= 6.*X_0*w18 + 6.*X_1*w19;
+                            EM_F[1]+=-6.*X_0*w18 + 6.*X_1*w19;
+                            EM_F[2]+= 6.*X_0*w18 - 6.*X_1*w19;
+                            EM_F[3]+=-6.*X_0*w18 - 6.*X_1*w19;
                         }
                     }
                     ///////////////
                     // process Y //
                     ///////////////
                     if (!Y.isEmpty()) {
-                        const double* Y_p=Y.getSampleDataRO(e);
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                         if (Y.actsExpanded()) {
-                            const double Y_0 = Y_p[0];
-                            const double Y_1 = Y_p[1];
-                            const double Y_2 = Y_p[2];
-                            const double Y_3 = Y_p[3];
-                            const double tmp0 = 6*w22*(Y_1 + Y_2);
-                            const double tmp1 = 6*w22*(Y_0 + Y_3);
-                            EM_F[0]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
-                            EM_F[1]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
-                            EM_F[2]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
-                            EM_F[3]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
+                            const Scalar Y_0 = Y_p[0];
+                            const Scalar Y_1 = Y_p[1];
+                            const Scalar Y_2 = Y_p[2];
+                            const Scalar Y_3 = Y_p[3];
+                            const Scalar tmp0 = 6.*w22*(Y_1 + Y_2);
+                            const Scalar tmp1 = 6.*w22*(Y_0 + Y_3);
+                            EM_F[0]+=6.*Y_0*w20 + 6.*Y_3*w21 + tmp0;
+                            EM_F[1]+=6.*Y_1*w20 + 6.*Y_2*w21 + tmp1;
+                            EM_F[2]+=6.*Y_1*w21 + 6.*Y_2*w20 + tmp1;
+                            EM_F[3]+=6.*Y_0*w21 + 6.*Y_3*w20 + tmp0;
                         } else { // constant data
-                            EM_F[0]+=36*Y_p[0]*w22;
-                            EM_F[1]+=36*Y_p[0]*w22;
-                            EM_F[2]+=36*Y_p[0]*w22;
-                            EM_F[3]+=36*Y_p[0]*w22;
+                            EM_F[0]+=36.*Y_p[0]*w22;
+                            EM_F[1]+=36.*Y_p[0]*w22;
+                            EM_F[2]+=36.*Y_p[0]*w22;
+                            EM_F[3]+=36.*Y_p[0]*w22;
                         }
                     }
 
                     // add to matrix (if addEM_S) and RHS (if addEM_F)
-                    const index_t firstNode=m_NN[0]*k1+k0;
+                    const index_t firstNode = m_NN[0]*k1+k0;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
                                               addEM_F, firstNode);
                 } // end k0 loop
@@ -581,8 +596,10 @@ void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
 // PDE SINGLE BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
-                                Data& rhs, const Data& d, const Data& y) const
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySingle(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& d, const Data& y) const
 {
     const double SQRT3 = 1.73205080756887719318;
     const double w5 = m_dx[0]/12;
@@ -599,19 +616,20 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = !d.isEmpty();
     const bool addEM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4);
-        vector<double> EM_F(4);
+        vector<Scalar> EM_S(4*4);
+        vector<Scalar> EM_F(4);
 
         if (domain->m_faceOffset[0] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[1] = 0;
-                EM_F[3] = 0;
+                EM_F[1] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -621,33 +639,33 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(k1);
+                        const Scalar* d_p = d.getSampleDataRO(k1, zero);
                         if (d.actsExpanded()) {
-                            const double d_0 = d_p[0];
-                            const double d_1 = d_p[1];
-                            const double tmp0 = w2*(d_0 + d_1);
+                            const Scalar d_0 = d_p[0];
+                            const Scalar d_1 = d_p[1];
+                            const Scalar tmp0 = w2*(d_0 + d_1);
                             EM_S[INDEX2(0,0,4)] = d_0*w0 + d_1*w1;
                             EM_S[INDEX2(2,0,4)] = tmp0;
                             EM_S[INDEX2(0,2,4)] = tmp0;
                             EM_S[INDEX2(2,2,4)] = d_0*w1 + d_1*w0;
                         } else { // constant data
-                            EM_S[INDEX2(0,0,4)] = 4*d_p[0]*w2;
-                            EM_S[INDEX2(2,0,4)] = 2*d_p[0]*w2;
-                            EM_S[INDEX2(0,2,4)] = 2*d_p[0]*w2;
-                            EM_S[INDEX2(2,2,4)] = 4*d_p[0]*w2;
+                            EM_S[INDEX2(0,0,4)] = 4.*d_p[0]*w2;
+                            EM_S[INDEX2(2,0,4)] = 2.*d_p[0]*w2;
+                            EM_S[INDEX2(0,2,4)] = 2.*d_p[0]*w2;
+                            EM_S[INDEX2(2,2,4)] = 4.*d_p[0]*w2;
                         }
                     }
                     ///////////////
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(k1);
+                        const Scalar* y_p = y.getSampleDataRO(k1, zero);
                         if (y.actsExpanded()) {
                             EM_F[0] = w3*y_p[0] + w4*y_p[1];
                             EM_F[2] = w3*y_p[1] + w4*y_p[0];
                         } else { // constant data
-                            EM_F[0] = 6*w2*y_p[0];
-                            EM_F[2] = 6*w2*y_p[0];
+                            EM_F[0] = 6.*w2*y_p[0];
+                            EM_F[2] = 6.*w2*y_p[0];
                         }
                     }
                     const index_t firstNode=m_NN[0]*k1;
@@ -659,10 +677,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[1] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[0] = 0;
-                EM_F[2] = 0;
+                EM_F[0] = zero;
+                EM_F[2] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring            
@@ -673,36 +691,36 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
-                            const double d_0 = d_p[0];
-                            const double d_1 = d_p[1];
-                            const double tmp0 = w2*(d_0 + d_1);
+                            const Scalar d_0 = d_p[0];
+                            const Scalar d_1 = d_p[1];
+                            const Scalar tmp0 = w2*(d_0 + d_1);
                             EM_S[INDEX2(1,1,4)] = d_0*w0 + d_1*w1;
                             EM_S[INDEX2(3,1,4)] = tmp0;
                             EM_S[INDEX2(1,3,4)] = tmp0;
                             EM_S[INDEX2(3,3,4)] = d_0*w1 + d_1*w0;
                         } else { // constant data
-                            EM_S[INDEX2(1,1,4)] = 4*d_p[0]*w2;
-                            EM_S[INDEX2(3,1,4)] = 2*d_p[0]*w2;
-                            EM_S[INDEX2(1,3,4)] = 2*d_p[0]*w2;
-                            EM_S[INDEX2(3,3,4)] = 4*d_p[0]*w2;
+                            EM_S[INDEX2(1,1,4)] = 4.*d_p[0]*w2;
+                            EM_S[INDEX2(3,1,4)] = 2.*d_p[0]*w2;
+                            EM_S[INDEX2(1,3,4)] = 2.*d_p[0]*w2;
+                            EM_S[INDEX2(3,3,4)] = 4.*d_p[0]*w2;
                         }
                     }
                     ///////////////
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             EM_F[1] = w3*y_p[0] + w4*y_p[1];
                             EM_F[3] = w3*y_p[1] + w4*y_p[0];
                         } else { // constant data
-                            EM_F[1] = 6*w2*y_p[0];
-                            EM_F[3] = 6*w2*y_p[0];
+                            EM_F[1] = 6.*w2*y_p[0];
+                            EM_F[3] = 6.*w2*y_p[0];
                         }
                     }
-                    const index_t firstNode=m_NN[0]*(k1+1)-2;
+                    const index_t firstNode = m_NN[0]*(k1+1)-2;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
                                               addEM_F, firstNode);
                 }
@@ -711,10 +729,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[2] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[2] = 0;
-                EM_F[3] = 0;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
@@ -725,36 +743,36 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
-                            const double d_0 = d_p[0];
-                            const double d_1 = d_p[1];
-                            const double tmp0 = w5*(d_0 + d_1);
+                            const Scalar d_0 = d_p[0];
+                            const Scalar d_1 = d_p[1];
+                            const Scalar tmp0 = w5*(d_0 + d_1);
                             EM_S[INDEX2(0,0,4)] = d_0*w6 + d_1*w7;
                             EM_S[INDEX2(1,0,4)] = tmp0;
                             EM_S[INDEX2(0,1,4)] = tmp0;
                             EM_S[INDEX2(1,1,4)] = d_0*w7 + d_1*w6;
                         } else { // constant data
-                            EM_S[INDEX2(0,0,4)] = 4*d_p[0]*w5;
-                            EM_S[INDEX2(1,0,4)] = 2*d_p[0]*w5;
-                            EM_S[INDEX2(0,1,4)] = 2*d_p[0]*w5;
-                            EM_S[INDEX2(1,1,4)] = 4*d_p[0]*w5;
+                            EM_S[INDEX2(0,0,4)] = 4.*d_p[0]*w5;
+                            EM_S[INDEX2(1,0,4)] = 2.*d_p[0]*w5;
+                            EM_S[INDEX2(0,1,4)] = 2.*d_p[0]*w5;
+                            EM_S[INDEX2(1,1,4)] = 4.*d_p[0]*w5;
                         }
                     }
                     ///////////////
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             EM_F[0] = w8*y_p[0] + w9*y_p[1];
                             EM_F[1] = w8*y_p[1] + w9*y_p[0];
                         } else { // constant data
-                            EM_F[0] = 6*w5*y_p[0];
-                            EM_F[1] = 6*w5*y_p[0];
+                            EM_F[0] = 6.*w5*y_p[0];
+                            EM_F[1] = 6.*w5*y_p[0];
                         }
                     }
-                    const index_t firstNode=k0;
+                    const index_t firstNode = k0;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
                                               addEM_F, firstNode);
                 }
@@ -763,10 +781,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[3] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
             }
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
@@ -777,33 +795,33 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
-                            const double d_0 = d_p[0];
-                            const double d_1 = d_p[1];
-                            const double tmp0 = w5*(d_0 + d_1);
+                            const Scalar d_0 = d_p[0];
+                            const Scalar d_1 = d_p[1];
+                            const Scalar tmp0 = w5*(d_0 + d_1);
                             EM_S[INDEX2(2,2,4)] = d_0*w6 + d_1*w7;
                             EM_S[INDEX2(3,2,4)] = tmp0;
                             EM_S[INDEX2(2,3,4)] = tmp0;
                             EM_S[INDEX2(3,3,4)] = d_0*w7 + d_1*w6;
                         } else { // constant data
-                            EM_S[INDEX2(2,2,4)] = 4*d_p[0]*w5;
-                            EM_S[INDEX2(3,2,4)] = 2*d_p[0]*w5;
-                            EM_S[INDEX2(2,3,4)] = 2*d_p[0]*w5;
-                            EM_S[INDEX2(3,3,4)] = 4*d_p[0]*w5;
+                            EM_S[INDEX2(2,2,4)] = 4.*d_p[0]*w5;
+                            EM_S[INDEX2(3,2,4)] = 2.*d_p[0]*w5;
+                            EM_S[INDEX2(2,3,4)] = 2.*d_p[0]*w5;
+                            EM_S[INDEX2(3,3,4)] = 4.*d_p[0]*w5;
                         }
                     }
                     ///////////////
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             EM_F[2] = w8*y_p[0] + w9*y_p[1];
                             EM_F[3] = w8*y_p[1] + w9*y_p[0];
                         } else { // constant data
-                            EM_F[2] = 6*w5*y_p[0];
-                            EM_F[3] = 6*w5*y_p[0];
+                            EM_F[2] = 6.*w5*y_p[0];
+                            EM_F[3] = 6.*w5*y_p[0];
                         }
                     }
                     const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
@@ -819,7 +837,9 @@ void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 // PDE SINGLE REDUCED
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESingleReduced(
+                                    AbstractSystemMatrix* mat,
                                     Data& rhs, const Data& A, const Data& B,
                                     const Data& C, const Data& D,
                                     const Data& X, const Data& Y) const
@@ -834,12 +854,13 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool addEM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4, 0);
-        vector<double> EM_F(4, 0);
+        vector<Scalar> EM_S(4*4, zero);
+        vector<Scalar> EM_F(4, zero);
 
         for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -847,23 +868,23 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                 for (index_t k0=0; k0<NE0; ++k0)  {
                     const index_t e = k0 + NE0*k1;
                     if (addEM_S)
-                        fill(EM_S.begin(), EM_S.end(), 0);
+                        fill(EM_S.begin(), EM_S.end(), zero);
                     if (addEM_F)
-                        fill(EM_F.begin(), EM_F.end(), 0);
+                        fill(EM_F.begin(), EM_F.end(), zero);
                     ///////////////
                     // process A //
                     ///////////////
                     if (!A.isEmpty()) {
-                        const double* A_p=A.getSampleDataRO(e);
-                        const double A_00 = A_p[INDEX2(0,0,2)];
-                        const double A_10 = A_p[INDEX2(1,0,2)];
-                        const double A_01 = A_p[INDEX2(0,1,2)];
-                        const double A_11 = A_p[INDEX2(1,1,2)];
-                        const double tmp0 = (A_01 + A_10)*w0;
-                        const double tmp1 = A_00*w5;
-                        const double tmp2 = A_01*w0;
-                        const double tmp3 = A_10*w0;
-                        const double tmp4 = A_11*w4;
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
+                        const Scalar A_00 = A_p[INDEX2(0,0,2)];
+                        const Scalar A_10 = A_p[INDEX2(1,0,2)];
+                        const Scalar A_01 = A_p[INDEX2(0,1,2)];
+                        const Scalar A_11 = A_p[INDEX2(1,1,2)];
+                        const Scalar tmp0 = (A_01 + A_10)*w0;
+                        const Scalar tmp1 = A_00*w5;
+                        const Scalar tmp2 = A_01*w0;
+                        const Scalar tmp3 = A_10*w0;
+                        const Scalar tmp4 = A_11*w4;
                         EM_S[INDEX2(0,0,4)]+=tmp4 + tmp0 + tmp1;
                         EM_S[INDEX2(1,0,4)]+=tmp4 - tmp1 + tmp3 - tmp2;
                         EM_S[INDEX2(2,0,4)]+=tmp2 - tmp3 - tmp4 + tmp1;
@@ -885,9 +906,9 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     // process B //
                     ///////////////
                     if (!B.isEmpty()) {
-                        const double* B_p=B.getSampleDataRO(e);
-                        const double tmp0 = B_p[0]*w2;
-                        const double tmp1 = B_p[1]*w1;
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
+                        const Scalar tmp0 = B_p[0]*w2;
+                        const Scalar tmp1 = B_p[1]*w1;
                         EM_S[INDEX2(0,0,4)]+=-tmp0 - tmp1;
                         EM_S[INDEX2(1,0,4)]+= tmp0 - tmp1;
                         EM_S[INDEX2(2,0,4)]+= tmp1 - tmp0;
@@ -909,9 +930,9 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     // process C //
                     ///////////////
                     if (!C.isEmpty()) {
-                        const double* C_p=C.getSampleDataRO(e);
-                        const double tmp0 = C_p[0]*w2;
-                        const double tmp1 = C_p[1]*w1;
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
+                        const Scalar tmp0 = C_p[0]*w2;
+                        const Scalar tmp1 = C_p[1]*w1;
                         EM_S[INDEX2(0,0,4)]+=-tmp1 - tmp0;
                         EM_S[INDEX2(1,0,4)]+=-tmp1 - tmp0;
                         EM_S[INDEX2(2,0,4)]+=-tmp1 - tmp0;
@@ -933,7 +954,7 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     // process D //
                     ///////////////
                     if (!D.isEmpty()) {
-                        const double* D_p=D.getSampleDataRO(e);
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
                         EM_S[INDEX2(0,0,4)]+=D_p[0]*w3;
                         EM_S[INDEX2(1,0,4)]+=D_p[0]*w3;
                         EM_S[INDEX2(2,0,4)]+=D_p[0]*w3;
@@ -955,9 +976,9 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     // process X //
                     ///////////////
                     if (!X.isEmpty()) {
-                        const double* X_p=X.getSampleDataRO(e);
-                        const double wX0 = 4*X_p[0]*w2;
-                        const double wX1 = 4*X_p[1]*w1;
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
+                        const Scalar wX0 = 4.*X_p[0]*w2;
+                        const Scalar wX1 = 4.*X_p[1]*w1;
                         EM_F[0]+=-wX0 - wX1;
                         EM_F[1]+=-wX1 + wX0;
                         EM_F[2]+=-wX0 + wX1;
@@ -967,11 +988,11 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     // process Y //
                     ///////////////
                     if (!Y.isEmpty()) {
-                        const double* Y_p=Y.getSampleDataRO(e);
-                        EM_F[0]+=4*Y_p[0]*w3;
-                        EM_F[1]+=4*Y_p[0]*w3;
-                        EM_F[2]+=4*Y_p[0]*w3;
-                        EM_F[3]+=4*Y_p[0]*w3;
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                        EM_F[0]+=4.*Y_p[0]*w3;
+                        EM_F[1]+=4.*Y_p[0]*w3;
+                        EM_F[2]+=4.*Y_p[0]*w3;
+                        EM_F[3]+=4.*Y_p[0]*w3;
                     }
 
                     // add to matrix (if addEM_S) and RHS (if addEM_F)
@@ -988,7 +1009,8 @@ void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
 // PDE SINGLE REDUCED BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySingleReduced(
                                         AbstractSystemMatrix* mat, Data& rhs,
                                         const Data& d, const Data& y) const
 {
@@ -998,19 +1020,20 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
     const int NE1 = m_NE[1];
     const bool addEM_S = !d.isEmpty();
     const bool addEM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4, 0);
-        vector<double> EM_F(4, 0);
+        vector<Scalar> EM_S(4*4, zero);
+        vector<Scalar> EM_F(4, zero);
 
         if (domain->m_faceOffset[0] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[1] = 0;
-                EM_F[3] = 0;
+                EM_F[1] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -1020,7 +1043,7 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(k1);
+                        const Scalar* d_p = d.getSampleDataRO(k1, zero);
                         EM_S[INDEX2(0,0,4)] = d_p[0]*w1;
                         EM_S[INDEX2(2,0,4)] = d_p[0]*w1;
                         EM_S[INDEX2(0,2,4)] = d_p[0]*w1;
@@ -1030,9 +1053,9 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(k1);
-                        EM_F[0] = 2*w1*y_p[0];
-                        EM_F[2] = 2*w1*y_p[0];
+                        const Scalar* y_p = y.getSampleDataRO(k1, zero);
+                        EM_F[0] = 2.*w1*y_p[0];
+                        EM_F[2] = 2.*w1*y_p[0];
                     }
                     const index_t firstNode=m_NN[0]*k1;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
@@ -1043,10 +1066,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[1] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[0] = 0;
-                EM_F[2] = 0;
+                EM_F[0] = zero;
+                EM_F[2] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring            
@@ -1057,7 +1080,7 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         EM_S[INDEX2(1,1,4)] = d_p[0]*w1;
                         EM_S[INDEX2(3,1,4)] = d_p[0]*w1;
                         EM_S[INDEX2(1,3,4)] = d_p[0]*w1;
@@ -1067,9 +1090,9 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
-                        EM_F[1] = 2*w1*y_p[0];
-                        EM_F[3] = 2*w1*y_p[0];
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
+                        EM_F[1] = 2.*w1*y_p[0];
+                        EM_F[3] = 2.*w1*y_p[0];
                     }
                     const index_t firstNode=m_NN[0]*(k1+1)-2;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
@@ -1080,10 +1103,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[2] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[2] = 0;
-                EM_F[3] = 0;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
@@ -1094,7 +1117,7 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         EM_S[INDEX2(0,0,4)] = d_p[0]*w0;
                         EM_S[INDEX2(1,0,4)] = d_p[0]*w0;
                         EM_S[INDEX2(0,1,4)] = d_p[0]*w0;
@@ -1104,11 +1127,11 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
-                        EM_F[0] = 2*w0*y_p[0];
-                        EM_F[1] = 2*w0*y_p[0];
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
+                        EM_F[0] = 2.*w0*y_p[0];
+                        EM_F[1] = 2.*w0*y_p[0];
                     }
-                    const index_t firstNode=k0;
+                    const index_t firstNode = k0;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
                                               addEM_F, firstNode);
                 }
@@ -1117,10 +1140,10 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[3] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
             }
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
@@ -1131,7 +1154,7 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         EM_S[INDEX2(2,2,4)] = d_p[0]*w0;
                         EM_S[INDEX2(3,2,4)] = d_p[0]*w0;
                         EM_S[INDEX2(2,3,4)] = d_p[0]*w0;
@@ -1141,9 +1164,9 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
-                        EM_F[2] = 2*w0*y_p[0];
-                        EM_F[3] = 2*w0*y_p[0];
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
+                        EM_F[2] = 2.*w0*y_p[0];
+                        EM_F[3] = 2.*w0*y_p[0];
                     }
                     const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
                     domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
@@ -1158,7 +1181,8 @@ void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
 // PDE SYSTEM
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESystem(AbstractSystemMatrix* mat,
                                      Data& rhs, const Data& A, const Data& B,
                                      const Data& C, const Data& D,
                                      const Data& X, const Data& Y) const
@@ -1203,12 +1227,13 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool addEM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4*numEq*numComp, 0);
-        vector<double> EM_F(4*numEq, 0);
+        vector<Scalar> EM_S(4*4*numEq*numComp, zero);
+        vector<Scalar> EM_F(4*numEq, zero);
 
         for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -1216,89 +1241,89 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                 for (index_t k0=0; k0 < NE0; ++k0)  {
                     const index_t e = k0 + NE0*k1;
                     if (addEM_S)
-                        fill(EM_S.begin(), EM_S.end(), 0);
+                        fill(EM_S.begin(), EM_S.end(), zero);
                     if (addEM_F)
-                        fill(EM_F.begin(), EM_F.end(), 0);
+                        fill(EM_F.begin(), EM_F.end(), zero);
                     ///////////////
                     // process A //
                     ///////////////
                     if (!A.isEmpty()) {
-                        const double* A_p = A.getSampleDataRO(e);
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
                         if (A.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,2,numComp,2)];
-                                    const double A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,2,numComp,2)];
-                                    const double A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,2,numComp,2)];
-                                    const double A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,2,numComp,2)];
-                                    const double A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,2,numComp,2)];
-                                    const double A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,2,numComp,2)];
-                                    const double A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,2,numComp,2)];
-                                    const double A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,2,numComp,2)];
-                                    const double A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,2,numComp,2)];
-                                    const double A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,2,numComp,2)];
-                                    const double A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,2,numComp,2)];
-                                    const double A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,2,numComp,2)];
-                                    const double A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,2,numComp,2)];
-                                    const double A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,2,numComp,2)];
-                                    const double A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,2,numComp,2)];
-                                    const double A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,2,numComp,2)];
-                                    const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                                    const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                                    const double tmp2 = w4*(A_00_2 + A_00_3);
-                                    const double tmp3 = w0*(A_00_0 + A_00_1);
-                                    const double tmp4 = w5*(A_01_2 - A_10_3);
-                                    const double tmp5 = w2*(-A_01_1 + A_10_0);
-                                    const double tmp6 = w5*(A_01_3 + A_10_0);
-                                    const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                                    const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                                    const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                                    const double tmp10 = w2*(-A_01_0 - A_10_3);
-                                    const double tmp11 = w4*(A_00_0 + A_00_1);
-                                    const double tmp12 = w0*(A_00_2 + A_00_3);
-                                    const double tmp13 = w5*(A_01_1 - A_10_0);
-                                    const double tmp14 = w2*(-A_01_2 + A_10_3);
-                                    const double tmp15 = w7*(A_11_0 + A_11_2);
-                                    const double tmp16 = w4*(-A_00_2 - A_00_3);
-                                    const double tmp17 = w0*(-A_00_0 - A_00_1);
-                                    const double tmp18 = w5*(A_01_3 + A_10_3);
-                                    const double tmp19 = w8*(A_11_1 + A_11_3);
-                                    const double tmp20 = w2*(-A_01_0 - A_10_0);
-                                    const double tmp21 = w7*(A_11_1 + A_11_3);
-                                    const double tmp22 = w4*(-A_00_0 - A_00_1);
-                                    const double tmp23 = w0*(-A_00_2 - A_00_3);
-                                    const double tmp24 = w5*(A_01_0 + A_10_0);
-                                    const double tmp25 = w8*(A_11_0 + A_11_2);
-                                    const double tmp26 = w2*(-A_01_3 - A_10_3);
-                                    const double tmp27 = w5*(-A_01_1 - A_10_2);
-                                    const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                                    const double tmp29 = w2*(A_01_2 + A_10_1);
-                                    const double tmp30 = w7*(-A_11_1 - A_11_3);
-                                    const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                                    const double tmp32 = w5*(-A_01_0 + A_10_2);
-                                    const double tmp33 = w8*(-A_11_0 - A_11_2);
-                                    const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                                    const double tmp35 = w2*(A_01_3 - A_10_1);
-                                    const double tmp36 = w5*(A_01_0 + A_10_3);
-                                    const double tmp37 = w2*(-A_01_3 - A_10_0);
-                                    const double tmp38 = w7*(-A_11_0 - A_11_2);
-                                    const double tmp39 = w5*(-A_01_3 + A_10_1);
-                                    const double tmp40 = w8*(-A_11_1 - A_11_3);
-                                    const double tmp41 = w2*(A_01_0 - A_10_2);
-                                    const double tmp42 = w5*(A_01_1 - A_10_3);
-                                    const double tmp43 = w2*(-A_01_2 + A_10_0);
-                                    const double tmp44 = w5*(A_01_2 - A_10_0);
-                                    const double tmp45 = w2*(-A_01_1 + A_10_3);
-                                    const double tmp46 = w5*(-A_01_0 + A_10_1);
-                                    const double tmp47 = w2*(A_01_3 - A_10_2);
-                                    const double tmp48 = w5*(-A_01_1 - A_10_1);
-                                    const double tmp49 = w2*(A_01_2 + A_10_2);
-                                    const double tmp50 = w5*(-A_01_3 + A_10_2);
-                                    const double tmp51 = w2*(A_01_0 - A_10_1);
-                                    const double tmp52 = w5*(-A_01_2 - A_10_1);
-                                    const double tmp53 = w2*(A_01_1 + A_10_2);
-                                    const double tmp54 = w5*(-A_01_2 - A_10_2);
-                                    const double tmp55 = w2*(A_01_1 + A_10_1);
+                                    const Scalar A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,2,numComp,2)];
+                                    const Scalar A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,2,numComp,2)];
+                                    const Scalar A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,2,numComp,2)];
+                                    const Scalar A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,2,numComp,2)];
+                                    const Scalar A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,2,numComp,2)];
+                                    const Scalar A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,2,numComp,2)];
+                                    const Scalar A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,2,numComp,2)];
+                                    const Scalar A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,2,numComp,2)];
+                                    const Scalar A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,2,numComp,2)];
+                                    const Scalar A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,2,numComp,2)];
+                                    const Scalar A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,2,numComp,2)];
+                                    const Scalar A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,2,numComp,2)];
+                                    const Scalar A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,2,numComp,2)];
+                                    const Scalar A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,2,numComp,2)];
+                                    const Scalar A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,2,numComp,2)];
+                                    const Scalar A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,2,numComp,2)];
+                                    const Scalar tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                                    const Scalar tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                                    const Scalar tmp2 = w4*(A_00_2 + A_00_3);
+                                    const Scalar tmp3 = w0*(A_00_0 + A_00_1);
+                                    const Scalar tmp4 = w5*(A_01_2 - A_10_3);
+                                    const Scalar tmp5 = w2*(-A_01_1 + A_10_0);
+                                    const Scalar tmp6 = w5*(A_01_3 + A_10_0);
+                                    const Scalar tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                                    const Scalar tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                                    const Scalar tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                                    const Scalar tmp10 = w2*(-A_01_0 - A_10_3);
+                                    const Scalar tmp11 = w4*(A_00_0 + A_00_1);
+                                    const Scalar tmp12 = w0*(A_00_2 + A_00_3);
+                                    const Scalar tmp13 = w5*(A_01_1 - A_10_0);
+                                    const Scalar tmp14 = w2*(-A_01_2 + A_10_3);
+                                    const Scalar tmp15 = w7*(A_11_0 + A_11_2);
+                                    const Scalar tmp16 = w4*(-A_00_2 - A_00_3);
+                                    const Scalar tmp17 = w0*(-A_00_0 - A_00_1);
+                                    const Scalar tmp18 = w5*(A_01_3 + A_10_3);
+                                    const Scalar tmp19 = w8*(A_11_1 + A_11_3);
+                                    const Scalar tmp20 = w2*(-A_01_0 - A_10_0);
+                                    const Scalar tmp21 = w7*(A_11_1 + A_11_3);
+                                    const Scalar tmp22 = w4*(-A_00_0 - A_00_1);
+                                    const Scalar tmp23 = w0*(-A_00_2 - A_00_3);
+                                    const Scalar tmp24 = w5*(A_01_0 + A_10_0);
+                                    const Scalar tmp25 = w8*(A_11_0 + A_11_2);
+                                    const Scalar tmp26 = w2*(-A_01_3 - A_10_3);
+                                    const Scalar tmp27 = w5*(-A_01_1 - A_10_2);
+                                    const Scalar tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                                    const Scalar tmp29 = w2*(A_01_2 + A_10_1);
+                                    const Scalar tmp30 = w7*(-A_11_1 - A_11_3);
+                                    const Scalar tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                                    const Scalar tmp32 = w5*(-A_01_0 + A_10_2);
+                                    const Scalar tmp33 = w8*(-A_11_0 - A_11_2);
+                                    const Scalar tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                                    const Scalar tmp35 = w2*(A_01_3 - A_10_1);
+                                    const Scalar tmp36 = w5*(A_01_0 + A_10_3);
+                                    const Scalar tmp37 = w2*(-A_01_3 - A_10_0);
+                                    const Scalar tmp38 = w7*(-A_11_0 - A_11_2);
+                                    const Scalar tmp39 = w5*(-A_01_3 + A_10_1);
+                                    const Scalar tmp40 = w8*(-A_11_1 - A_11_3);
+                                    const Scalar tmp41 = w2*(A_01_0 - A_10_2);
+                                    const Scalar tmp42 = w5*(A_01_1 - A_10_3);
+                                    const Scalar tmp43 = w2*(-A_01_2 + A_10_0);
+                                    const Scalar tmp44 = w5*(A_01_2 - A_10_0);
+                                    const Scalar tmp45 = w2*(-A_01_1 + A_10_3);
+                                    const Scalar tmp46 = w5*(-A_01_0 + A_10_1);
+                                    const Scalar tmp47 = w2*(A_01_3 - A_10_2);
+                                    const Scalar tmp48 = w5*(-A_01_1 - A_10_1);
+                                    const Scalar tmp49 = w2*(A_01_2 + A_10_2);
+                                    const Scalar tmp50 = w5*(-A_01_3 + A_10_2);
+                                    const Scalar tmp51 = w2*(A_01_0 - A_10_1);
+                                    const Scalar tmp52 = w5*(-A_01_2 - A_10_1);
+                                    const Scalar tmp53 = w2*(A_01_1 + A_10_2);
+                                    const Scalar tmp54 = w5*(-A_01_2 - A_10_2);
+                                    const Scalar tmp55 = w2*(A_01_1 + A_10_1);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
@@ -1320,30 +1345,30 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)];
-                                    const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)];
-                                    const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)];
-                                    const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)];
-                                    const double tmp0 = 6*w1*(A_01 - A_10);
-                                    const double tmp1 = 6*w1*(A_01 + A_10);
-                                    const double tmp2 = 6*w1*(-A_01 - A_10);
-                                    const double tmp3 = 6*w1*(-A_01 + A_10);
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
-                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
-                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
-                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
-                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
-                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
-                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
-                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
-                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
-                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
-                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
-                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
-                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
+                                    const Scalar A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)];
+                                    const Scalar A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)];
+                                    const Scalar A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)];
+                                    const Scalar A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)];
+                                    const Scalar tmp0 = 6.*w1*(A_01 - A_10);
+                                    const Scalar tmp1 = 6.*w1*(A_01 + A_10);
+                                    const Scalar tmp2 = 6.*w1*(-A_01 - A_10);
+                                    const Scalar tmp3 = 6.*w1*(-A_01 + A_10);
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp1;
+                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 8.*A_00*w6 + 4.*A_11*w3 + tmp0;
+                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp3;
+                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= 4.*A_00*w6 - 4.*A_11*w3 + tmp2;
+                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 8.*A_00*w6 + 4.*A_11*w3 + tmp3;
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp2;
+                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= 4.*A_00*w6 - 4.*A_11*w3 + tmp1;
+                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp0;
+                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp0;
+                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= 4.*A_00*w6 - 4.*A_11*w3 + tmp1;
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp2;
+                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 8.*A_00*w6 + 4.*A_11*w3 + tmp3;
+                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= 4.*A_00*w6 - 4.*A_11*w3 + tmp2;
+                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-4.*A_00*w6 - 8.*A_11*w3 + tmp3;
+                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 8.*A_00*w6 + 4.*A_11*w3 + tmp0;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-8.*A_00*w6 + 8.*A_11*w3 + tmp1;
                                 }
                             }
                         }
@@ -1352,34 +1377,34 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                     // process B //
                     ///////////////
                     if (!B.isEmpty()) {
-                        const double* B_p=B.getSampleDataRO(e);
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
                         if (B.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
-                                    const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
-                                    const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
-                                    const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
-                                    const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
-                                    const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
-                                    const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
-                                    const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
-                                    const double tmp0 = w11*(B_1_0 + B_1_1);
-                                    const double tmp1 = w14*(B_1_2 + B_1_3);
-                                    const double tmp2 = w15*(-B_0_1 - B_0_3);
-                                    const double tmp3 = w10*(-B_0_0 - B_0_2);
-                                    const double tmp4 = w11*(B_1_2 + B_1_3);
-                                    const double tmp5 = w14*(B_1_0 + B_1_1);
-                                    const double tmp6 = w11*(-B_1_2 - B_1_3);
-                                    const double tmp7 = w14*(-B_1_0 - B_1_1);
-                                    const double tmp8 = w11*(-B_1_0 - B_1_1);
-                                    const double tmp9 = w14*(-B_1_2 - B_1_3);
-                                    const double tmp10 = w10*(-B_0_1 - B_0_3);
-                                    const double tmp11 = w15*(-B_0_0 - B_0_2);
-                                    const double tmp12 = w15*(B_0_0 + B_0_2);
-                                    const double tmp13 = w10*(B_0_1 + B_0_3);
-                                    const double tmp14 = w10*(B_0_0 + B_0_2);
-                                    const double tmp15 = w15*(B_0_1 + B_0_3);
+                                    const Scalar B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
+                                    const Scalar B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
+                                    const Scalar B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
+                                    const Scalar B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
+                                    const Scalar B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
+                                    const Scalar B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
+                                    const Scalar B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
+                                    const Scalar B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
+                                    const Scalar tmp0 = w11*(B_1_0 + B_1_1);
+                                    const Scalar tmp1 = w14*(B_1_2 + B_1_3);
+                                    const Scalar tmp2 = w15*(-B_0_1 - B_0_3);
+                                    const Scalar tmp3 = w10*(-B_0_0 - B_0_2);
+                                    const Scalar tmp4 = w11*(B_1_2 + B_1_3);
+                                    const Scalar tmp5 = w14*(B_1_0 + B_1_1);
+                                    const Scalar tmp6 = w11*(-B_1_2 - B_1_3);
+                                    const Scalar tmp7 = w14*(-B_1_0 - B_1_1);
+                                    const Scalar tmp8 = w11*(-B_1_0 - B_1_1);
+                                    const Scalar tmp9 = w14*(-B_1_2 - B_1_3);
+                                    const Scalar tmp10 = w10*(-B_0_1 - B_0_3);
+                                    const Scalar tmp11 = w15*(-B_0_0 - B_0_2);
+                                    const Scalar tmp12 = w15*(B_0_0 + B_0_2);
+                                    const Scalar tmp13 = w10*(B_0_1 + B_0_3);
+                                    const Scalar tmp14 = w10*(B_0_0 + B_0_2);
+                                    const Scalar tmp15 = w15*(B_0_1 + B_0_3);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
@@ -1401,24 +1426,24 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
-                                    const double wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wB0 + 2*wB1;
-                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2*wB0 +   wB1;
-                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=   wB0 + 2*wB1;
-                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=   wB0 +   wB1;
-                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2*wB0 +   wB1;
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wB0 + 2*wB1;
-                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=  -wB0 +   wB1;
-                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=  -wB0 + 2*wB1;
-                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=   wB0 - 2*wB1;
-                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=   wB0 -   wB1;
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wB0 - 2*wB1;
-                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2*wB0 -   wB1;
-                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=  -wB0 -   wB1;
-                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=  -wB0 - 2*wB1;
-                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2*wB0 -   wB1;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wB0 - 2*wB1;
+                                    const Scalar wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
+                                    const Scalar wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2.*wB0 + 2.*wB1;
+                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2.*wB0 +    wB1;
+                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=    wB0 + 2.*wB1;
+                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=    wB0 +    wB1;
+                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2.*wB0 +    wB1;
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2.*wB0 + 2.*wB1;
+                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=   -wB0 +    wB1;
+                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=   -wB0 + 2.*wB1;
+                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=    wB0 - 2.*wB1;
+                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=    wB0 -    wB1;
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2.*wB0 - 2.*wB1;
+                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2.*wB0 -    wB1;
+                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=   -wB0 -    wB1;
+                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=   -wB0 - 2.*wB1;
+                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2.*wB0 -    wB1;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2.*wB0 - 2.*wB1;
                                 }
                             }
                         }
@@ -1427,34 +1452,34 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                     // process C //
                     ///////////////
                     if (!C.isEmpty()) {
-                        const double* C_p=C.getSampleDataRO(e);
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
                         if (C.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
-                                    const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
-                                    const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
-                                    const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
-                                    const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
-                                    const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
-                                    const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
-                                    const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
-                                    const double tmp0 = w11*(C_1_0 + C_1_1);
-                                    const double tmp1 = w14*(C_1_2 + C_1_3);
-                                    const double tmp2 = w15*(C_0_0 + C_0_2);
-                                    const double tmp3 = w10*(C_0_1 + C_0_3);
-                                    const double tmp4 = w11*(-C_1_0 - C_1_1);
-                                    const double tmp5 = w14*(-C_1_2 - C_1_3);
-                                    const double tmp6 = w11*(-C_1_2 - C_1_3);
-                                    const double tmp7 = w14*(-C_1_0 - C_1_1);
-                                    const double tmp8 = w11*(C_1_2 + C_1_3);
-                                    const double tmp9 = w14*(C_1_0 + C_1_1);
-                                    const double tmp10 = w10*(-C_0_1 - C_0_3);
-                                    const double tmp11 = w15*(-C_0_0 - C_0_2);
-                                    const double tmp12 = w15*(-C_0_1 - C_0_3);
-                                    const double tmp13 = w10*(-C_0_0 - C_0_2);
-                                    const double tmp14 = w10*(C_0_0 + C_0_2);
-                                    const double tmp15 = w15*(C_0_1 + C_0_3);
+                                    const Scalar C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
+                                    const Scalar C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
+                                    const Scalar C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
+                                    const Scalar C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
+                                    const Scalar C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
+                                    const Scalar C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
+                                    const Scalar C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
+                                    const Scalar C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
+                                    const Scalar tmp0 = w11*(C_1_0 + C_1_1);
+                                    const Scalar tmp1 = w14*(C_1_2 + C_1_3);
+                                    const Scalar tmp2 = w15*(C_0_0 + C_0_2);
+                                    const Scalar tmp3 = w10*(C_0_1 + C_0_3);
+                                    const Scalar tmp4 = w11*(-C_1_0 - C_1_1);
+                                    const Scalar tmp5 = w14*(-C_1_2 - C_1_3);
+                                    const Scalar tmp6 = w11*(-C_1_2 - C_1_3);
+                                    const Scalar tmp7 = w14*(-C_1_0 - C_1_1);
+                                    const Scalar tmp8 = w11*(C_1_2 + C_1_3);
+                                    const Scalar tmp9 = w14*(C_1_0 + C_1_1);
+                                    const Scalar tmp10 = w10*(-C_0_1 - C_0_3);
+                                    const Scalar tmp11 = w15*(-C_0_0 - C_0_2);
+                                    const Scalar tmp12 = w15*(-C_0_1 - C_0_3);
+                                    const Scalar tmp13 = w10*(-C_0_0 - C_0_2);
+                                    const Scalar tmp14 = w10*(C_0_0 + C_0_2);
+                                    const Scalar tmp15 = w15*(C_0_1 + C_0_3);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
@@ -1476,24 +1501,24 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
-                                    const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wC0 + 2*wC1;
-                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2*wC0 +   wC1;
-                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=   wC0 - 2*wC1;
-                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=  -wC0 -   wC1;
-                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2*wC0 +   wC1;
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wC0 + 2*wC1;
-                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=   wC0 -   wC1;
-                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=  -wC0 - 2*wC1;
-                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=   wC0 + 2*wC1;
-                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=  -wC0 +   wC1;
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wC0 - 2*wC1;
-                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2*wC0 -   wC1;
-                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=   wC0 +   wC1;
-                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=  -wC0 + 2*wC1;
-                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2*wC0 -   wC1;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wC0 - 2*wC1;
+                                    const Scalar wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
+                                    const Scalar wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2.*wC0 + 2.*wC1;
+                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2.*wC0 +    wC1;
+                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=    wC0 - 2.*wC1;
+                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=   -wC0 -    wC1;
+                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2.*wC0 +    wC1;
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2.*wC0 + 2.*wC1;
+                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=    wC0 -    wC1;
+                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=   -wC0 - 2.*wC1;
+                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=    wC0 + 2.*wC1;
+                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=   -wC0 +    wC1;
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2.*wC0 - 2.*wC1;
+                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2.*wC0 -    wC1;
+                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=    wC0 +    wC1;
+                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=   -wC0 + 2.*wC1;
+                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2.*wC0 -    wC1;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2.*wC0 - 2.*wC1;
                                 }
                             }
                         }
@@ -1502,25 +1527,25 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                     // process D //
                     ///////////////
                     if (!D.isEmpty()) {
-                        const double* D_p=D.getSampleDataRO(e);
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
                         if (D.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
-                                    const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
-                                    const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
-                                    const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
-                                    const double tmp0 = w21*(D_2 + D_3);
-                                    const double tmp1 = w20*(D_0 + D_1);
-                                    const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
-                                    const double tmp3 = w21*(D_0 + D_1);
-                                    const double tmp4 = w20*(D_2 + D_3);
-                                    const double tmp5 = w22*(D_1 + D_2);
-                                    const double tmp6 = w21*(D_0 + D_2);
-                                    const double tmp7 = w20*(D_1 + D_3);
-                                    const double tmp8 = w21*(D_1 + D_3);
-                                    const double tmp9 = w20*(D_0 + D_2);
-                                    const double tmp10 = w22*(D_0 + D_3);
+                                    const Scalar D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
+                                    const Scalar D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
+                                    const Scalar D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
+                                    const Scalar D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
+                                    const Scalar tmp0 = w21*(D_2 + D_3);
+                                    const Scalar tmp1 = w20*(D_0 + D_1);
+                                    const Scalar tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
+                                    const Scalar tmp3 = w21*(D_0 + D_1);
+                                    const Scalar tmp4 = w20*(D_2 + D_3);
+                                    const Scalar tmp5 = w22*(D_1 + D_2);
+                                    const Scalar tmp6 = w21*(D_0 + D_2);
+                                    const Scalar tmp7 = w20*(D_1 + D_3);
+                                    const Scalar tmp8 = w21*(D_1 + D_3);
+                                    const Scalar tmp9 = w20*(D_0 + D_2);
+                                    const Scalar tmp10 = w22*(D_0 + D_3);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=D_0*w23 + D_3*w24 + tmp5;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp8 + tmp9;
@@ -1542,23 +1567,23 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double D_0 = D_p[INDEX2(k, m, numEq)];
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16*D_0*w22;
-                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*D_0*w22;
-                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16*D_0*w22;
-                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*D_0*w22;
-                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*D_0*w22;
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16*D_0*w22;
-                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*D_0*w22;
-                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*D_0*w22;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16*D_0*w22;
+                                    const Scalar D_0 = D_p[INDEX2(k, m, numEq)];
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16.*D_0*w22;
+                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= 4.*D_0*w22;
+                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16.*D_0*w22;
+                                    EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= 4.*D_0*w22;
+                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= 4.*D_0*w22;
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16.*D_0*w22;
+                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= 4.*D_0*w22;
+                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 8.*D_0*w22;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16.*D_0*w22;
                                 }
                             }
                         }
@@ -1567,33 +1592,33 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                     // process X //
                     ///////////////
                     if (!X.isEmpty()) {
-                        const double* X_p=X.getSampleDataRO(e);
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
                         if (X.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double X_0_0 = X_p[INDEX3(k,0,0,numEq,2)];
-                                const double X_1_0 = X_p[INDEX3(k,1,0,numEq,2)];
-                                const double X_0_1 = X_p[INDEX3(k,0,1,numEq,2)];
-                                const double X_1_1 = X_p[INDEX3(k,1,1,numEq,2)];
-                                const double X_0_2 = X_p[INDEX3(k,0,2,numEq,2)];
-                                const double X_1_2 = X_p[INDEX3(k,1,2,numEq,2)];
-                                const double X_0_3 = X_p[INDEX3(k,0,3,numEq,2)];
-                                const double X_1_3 = X_p[INDEX3(k,1,3,numEq,2)];
-                                const double tmp0 = 6*w15*(X_0_2 + X_0_3);
-                                const double tmp1 = 6*w10*(X_0_0 + X_0_1);
-                                const double tmp2 = 6*w11*(X_1_0 + X_1_2);
-                                const double tmp3 = 6*w14*(X_1_1 + X_1_3);
-                                const double tmp4 = 6*w11*(X_1_1 + X_1_3);
-                                const double tmp5 = w25*(X_0_0 + X_0_1);
-                                const double tmp6 = w26*(X_0_2 + X_0_3);
-                                const double tmp7 = 6*w14*(X_1_0 + X_1_2);
-                                const double tmp8 = w27*(X_1_0 + X_1_2);
-                                const double tmp9 = w28*(X_1_1 + X_1_3);
-                                const double tmp10 = w25*(-X_0_2 - X_0_3);
-                                const double tmp11 = w26*(-X_0_0 - X_0_1);
-                                const double tmp12 = w27*(X_1_1 + X_1_3);
-                                const double tmp13 = w28*(X_1_0 + X_1_2);
-                                const double tmp14 = w25*(X_0_2 + X_0_3);
-                                const double tmp15 = w26*(X_0_0 + X_0_1);
+                                const Scalar X_0_0 = X_p[INDEX3(k,0,0,numEq,2)];
+                                const Scalar X_1_0 = X_p[INDEX3(k,1,0,numEq,2)];
+                                const Scalar X_0_1 = X_p[INDEX3(k,0,1,numEq,2)];
+                                const Scalar X_1_1 = X_p[INDEX3(k,1,1,numEq,2)];
+                                const Scalar X_0_2 = X_p[INDEX3(k,0,2,numEq,2)];
+                                const Scalar X_1_2 = X_p[INDEX3(k,1,2,numEq,2)];
+                                const Scalar X_0_3 = X_p[INDEX3(k,0,3,numEq,2)];
+                                const Scalar X_1_3 = X_p[INDEX3(k,1,3,numEq,2)];
+                                const Scalar tmp0 = 6.*w15*(X_0_2 + X_0_3);
+                                const Scalar tmp1 = 6.*w10*(X_0_0 + X_0_1);
+                                const Scalar tmp2 = 6.*w11*(X_1_0 + X_1_2);
+                                const Scalar tmp3 = 6.*w14*(X_1_1 + X_1_3);
+                                const Scalar tmp4 = 6.*w11*(X_1_1 + X_1_3);
+                                const Scalar tmp5 =    w25*(X_0_0 + X_0_1);
+                                const Scalar tmp6 =    w26*(X_0_2 + X_0_3);
+                                const Scalar tmp7 = 6.*w14*(X_1_0 + X_1_2);
+                                const Scalar tmp8 =    w27*(X_1_0 + X_1_2);
+                                const Scalar tmp9 =    w28*(X_1_1 + X_1_3);
+                                const Scalar tmp10 =   w25*(-X_0_2- X_0_3);
+                                const Scalar tmp11 =   w26*(-X_0_0- X_0_1);
+                                const Scalar tmp12 =   w27*(X_1_1 + X_1_3);
+                                const Scalar tmp13 =   w28*(X_1_0 + X_1_2);
+                                const Scalar tmp14 =   w25*(X_0_2 + X_0_3);
+                                const Scalar tmp15 =   w26*(X_0_0 + X_0_1);
                                 EM_F[INDEX2(k,0,numEq)]+=tmp0 + tmp1 + tmp2 + tmp3;
                                 EM_F[INDEX2(k,1,numEq)]+=tmp4 + tmp5 + tmp6 + tmp7;
                                 EM_F[INDEX2(k,2,numEq)]+=tmp10 + tmp11 + tmp8 + tmp9;
@@ -1601,12 +1626,12 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                             }
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
-                                const double wX0 = X_p[INDEX2(k, 0, numEq)]*w18;
-                                const double wX1 = X_p[INDEX2(k, 1, numEq)]*w19;
-                                EM_F[INDEX2(k,0,numEq)]+= 6*wX0 + 6*wX1;
-                                EM_F[INDEX2(k,1,numEq)]+=-6*wX0 + 6*wX1;
-                                EM_F[INDEX2(k,2,numEq)]+= 6*wX0 - 6*wX1;
-                                EM_F[INDEX2(k,3,numEq)]+=-6*wX0 - 6*wX1;
+                                const Scalar wX0 = X_p[INDEX2(k, 0, numEq)]*w18;
+                                const Scalar wX1 = X_p[INDEX2(k, 1, numEq)]*w19;
+                                EM_F[INDEX2(k,0,numEq)]+= 6.*wX0 + 6.*wX1;
+                                EM_F[INDEX2(k,1,numEq)]+=-6.*wX0 + 6.*wX1;
+                                EM_F[INDEX2(k,2,numEq)]+= 6.*wX0 - 6.*wX1;
+                                EM_F[INDEX2(k,3,numEq)]+=-6.*wX0 - 6.*wX1;
                             }
                         }
                     }
@@ -1614,26 +1639,26 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
                     // process Y //
                     ///////////////
                     if (!Y.isEmpty()) {
-                        const double* Y_p=Y.getSampleDataRO(e);
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                         if (Y.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
-                                const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
-                                const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
-                                const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
-                                const double tmp0 = 6*w22*(Y_1 + Y_2);
-                                const double tmp1 = 6*w22*(Y_0 + Y_3);
-                                EM_F[INDEX2(k,0,numEq)]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
-                                EM_F[INDEX2(k,1,numEq)]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
-                                EM_F[INDEX2(k,2,numEq)]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
-                                EM_F[INDEX2(k,3,numEq)]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
+                                const Scalar Y_0 = Y_p[INDEX2(k, 0, numEq)];
+                                const Scalar Y_1 = Y_p[INDEX2(k, 1, numEq)];
+                                const Scalar Y_2 = Y_p[INDEX2(k, 2, numEq)];
+                                const Scalar Y_3 = Y_p[INDEX2(k, 3, numEq)];
+                                const Scalar tmp0 = 6.*w22*(Y_1 + Y_2);
+                                const Scalar tmp1 = 6.*w22*(Y_0 + Y_3);
+                                EM_F[INDEX2(k,0,numEq)]+=6.*Y_0*w20 + 6.*Y_3*w21 + tmp0;
+                                EM_F[INDEX2(k,1,numEq)]+=6.*Y_1*w20 + 6.*Y_2*w21 + tmp1;
+                                EM_F[INDEX2(k,2,numEq)]+=6.*Y_1*w21 + 6.*Y_2*w20 + tmp1;
+                                EM_F[INDEX2(k,3,numEq)]+=6.*Y_0*w21 + 6.*Y_3*w20 + tmp0;
                             }
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,0,numEq)]+=36*Y_p[k]*w22;
-                                EM_F[INDEX2(k,1,numEq)]+=36*Y_p[k]*w22;
-                                EM_F[INDEX2(k,2,numEq)]+=36*Y_p[k]*w22;
-                                EM_F[INDEX2(k,3,numEq)]+=36*Y_p[k]*w22;
+                                EM_F[INDEX2(k,0,numEq)]+=36.*Y_p[k]*w22;
+                                EM_F[INDEX2(k,1,numEq)]+=36.*Y_p[k]*w22;
+                                EM_F[INDEX2(k,2,numEq)]+=36.*Y_p[k]*w22;
+                                EM_F[INDEX2(k,3,numEq)]+=36.*Y_p[k]*w22;
                             }
                         }
                     }
@@ -1652,8 +1677,10 @@ void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
 // PDE SYSTEM BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
-                               Data& rhs, const Data& d, const Data& y) const
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySystem(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& d, const Data& y) const
 {
     dim_t numEq, numComp;
     if (!mat) {
@@ -1677,18 +1704,19 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = !d.isEmpty();
     const bool addEM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4*numEq*numComp, 0);
-        vector<double> EM_F(4*numEq, 0);
+        vector<Scalar> EM_S(4*4*numEq*numComp, zero);
+        vector<Scalar> EM_F(4*numEq, zero);
 
         if (domain->m_faceOffset[0] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -1698,13 +1726,13 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                    const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                    const double tmp0 = w2*(d_0 + d_1);
+                                    const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                    const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                    const Scalar tmp0 = w2*(d_0 + d_1);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = d_0*w0 + d_1*w1;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,4)] = tmp0;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,4)] = tmp0;
@@ -1714,11 +1742,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX2(k, m, numEq)];
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = 4*d_0*w2;
-                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)] = 2*d_0*w2;
-                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)] = 2*d_0*w2;
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = 4*d_0*w2;
+                                    const Scalar d_0 = d_p[INDEX2(k, m, numEq)];
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = 4.*d_0*w2;
+                                    EM_S[INDEX4(k,m,2,0,numEq,numComp,4)] = 2.*d_0*w2;
+                                    EM_S[INDEX4(k,m,0,2,numEq,numComp,4)] = 2.*d_0*w2;
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = 4.*d_0*w2;
                                 }
                             }
                         }
@@ -1727,11 +1755,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                const double y_1 = y_p[INDEX2(k, 1, numEq)];
+                                const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
                                 EM_F[INDEX2(k,0,numEq)] = w3*y_0 + w4*y_1;
                                 EM_F[INDEX2(k,2,numEq)] = w3*y_1 + w4*y_0;
                             }
@@ -1751,9 +1779,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[1] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring            
 #pragma omp for
@@ -1763,13 +1791,13 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                    const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                    const double tmp0 = w2*(d_0 + d_1);
+                                    const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                    const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                    const Scalar tmp0 = w2*(d_0 + d_1);
                                     EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = d_0*w0 + d_1*w1;
                                     EM_S[INDEX4(k,m,3,1,numEq,numComp,4)] = tmp0;
                                     EM_S[INDEX4(k,m,1,3,numEq,numComp,4)] = tmp0;
@@ -1779,11 +1807,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX2(k, m, numEq)];
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = 4*d_0*w2;
-                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)] = 2*d_0*w2;
-                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)] = 2*d_0*w2;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)] = 4*d_0*w2;
+                                    const Scalar d_0 = d_p[INDEX2(k, m, numEq)];
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = 4.*d_0*w2;
+                                    EM_S[INDEX4(k,m,3,1,numEq,numComp,4)] = 2.*d_0*w2;
+                                    EM_S[INDEX4(k,m,1,3,numEq,numComp,4)] = 2.*d_0*w2;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)] = 4.*d_0*w2;
                                 }
                             }
                         }
@@ -1792,11 +1820,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                const double y_1 = y_p[INDEX2(k, 1, numEq)];
+                                const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
                                 EM_F[INDEX2(k,1,numEq)] = w3*y_0 + w4*y_1;
                                 EM_F[INDEX2(k,3,numEq)] = w3*y_1 + w4*y_0;
                             }
@@ -1816,9 +1844,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[2] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
 #pragma omp for
@@ -1828,13 +1856,13 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                    const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                    const double tmp0 = w5*(d_0 + d_1);
+                                    const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                    const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                    const Scalar tmp0 = w5*(d_0 + d_1);
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = d_0*w6 + d_1*w7;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,4)] = tmp0;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,4)] = tmp0;
@@ -1844,11 +1872,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX2(k, m, numEq)];
-                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = 4*d_0*w5;
-                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)] = 2*d_0*w5;
-                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)] = 2*d_0*w5;
-                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = 4*d_0*w5;
+                                    const Scalar d_0 = d_p[INDEX2(k, m, numEq)];
+                                    EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = 4.*d_0*w5;
+                                    EM_S[INDEX4(k,m,1,0,numEq,numComp,4)] = 2.*d_0*w5;
+                                    EM_S[INDEX4(k,m,0,1,numEq,numComp,4)] = 2.*d_0*w5;
+                                    EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = 4.*d_0*w5;
                                 }
                             }
                         }
@@ -1857,11 +1885,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                const double y_1 = y_p[INDEX2(k, 1, numEq)];
+                                const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
                                 EM_F[INDEX2(k,0,numEq)] = w8*y_0 + w9*y_1;
                                 EM_F[INDEX2(k,1,numEq)] = w8*y_1 + w9*y_0;
                             }
@@ -1881,9 +1909,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[3] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
 #pragma omp for
@@ -1893,13 +1921,13 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         if (d.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                    const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                    const double tmp0 = w5*(d_0 + d_1);
+                                    const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                    const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                    const Scalar tmp0 = w5*(d_0 + d_1);
                                     EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = d_0*w6 + d_1*w7;
                                     EM_S[INDEX4(k,m,3,2,numEq,numComp,4)] = tmp0;
                                     EM_S[INDEX4(k,m,2,3,numEq,numComp,4)] = tmp0;
@@ -1909,11 +1937,11 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double d_0 = d_p[INDEX2(k, m, numEq)];
-                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = 4*d_0*w5;
-                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)] = 2*d_0*w5;
-                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)] = 2*d_0*w5;
-                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)] = 4*d_0*w5;
+                                    const Scalar d_0 = d_p[INDEX2(k, m, numEq)];
+                                    EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = 4.*d_0*w5;
+                                    EM_S[INDEX4(k,m,3,2,numEq,numComp,4)] = 2.*d_0*w5;
+                                    EM_S[INDEX4(k,m,2,3,numEq,numComp,4)] = 2.*d_0*w5;
+                                    EM_S[INDEX4(k,m,3,3,numEq,numComp,4)] = 4.*d_0*w5;
                                 }
                             }
                         }
@@ -1922,18 +1950,18 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         if (y.actsExpanded()) {
                             for (index_t k=0; k<numEq; k++) {
-                                const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                const double y_1 = y_p[INDEX2(k, 1, numEq)];
+                                const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
                                 EM_F[INDEX2(k,2,numEq)] = w8*y_0 + w9*y_1;
                                 EM_F[INDEX2(k,3,numEq)] = w8*y_1 + w9*y_0;
                             }
                         } else { // constant data
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,2,numEq)] = 6*w5*y_p[k];
-                                EM_F[INDEX2(k,3,numEq)] = 6*w5*y_p[k];
+                                EM_F[INDEX2(k,2,numEq)] = 6.*w5*y_p[k];
+                                EM_F[INDEX2(k,3,numEq)] = 6.*w5*y_p[k];
                             }
                         }
                     }
@@ -1950,7 +1978,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 // PDE SYSTEM REDUCED
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDESystemReduced(
+                                    AbstractSystemMatrix* mat,
                                     Data& rhs, const Data& A, const Data& B,
                                     const Data& C, const Data& D,
                                     const Data& X, const Data& Y) const
@@ -1973,33 +2003,34 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
     const int NE1 = m_NE[1];
     const bool addEM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool addEM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4*numEq*numComp, 0);
-        vector<double> EM_F(4*numEq, 0);
+        vector<Scalar> EM_S(4*4*numEq*numComp, zero);
+        vector<Scalar> EM_F(4*numEq, zero);
 
         for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
             for (index_t k1=k1_0; k1<NE1; k1+=2) {
                 for (index_t k0=0; k0<NE0; ++k0)  {
                     if (addEM_S)
-                        fill(EM_S.begin(), EM_S.end(), 0);
+                        fill(EM_S.begin(), EM_S.end(), zero);
                     if (addEM_F)
-                        fill(EM_F.begin(), EM_F.end(), 0);
+                        fill(EM_F.begin(), EM_F.end(), zero);
                     const index_t e = k0 + NE0*k1;
                     ///////////////
                     // process A //
                     ///////////////
                     if (!A.isEmpty()) {
-                        const double* A_p=A.getSampleDataRO(e);
+                        const Scalar* A_p = A.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double Aw00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]*w5;
-                                const double Aw01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]*w0;
-                                const double Aw10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]*w0;
-                                const double Aw11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]*w4;
+                                const Scalar Aw00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]*w5;
+                                const Scalar Aw01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]*w0;
+                                const Scalar Aw10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]*w0;
+                                const Scalar Aw11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]*w4;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= Aw00 + Aw01 + Aw10 + Aw11;
                                 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-Aw00 - Aw01 + Aw10 + Aw11;
                                 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= Aw00 + Aw01 - Aw10 - Aw11;
@@ -2023,11 +2054,11 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     // process B //
                     ///////////////
                     if (!B.isEmpty()) {
-                        const double* B_p=B.getSampleDataRO(e);
+                        const Scalar* B_p = B.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double wB0 = B_p[INDEX3(k,0,m, numEq, 2)]*w2;
-                                const double wB1 = B_p[INDEX3(k,1,m, numEq, 2)]*w1;
+                                const Scalar wB0 = B_p[INDEX3(k,0,m, numEq, 2)]*w2;
+                                const Scalar wB1 = B_p[INDEX3(k,1,m, numEq, 2)]*w1;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wB0 - wB1;
                                 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-wB0 - wB1;
                                 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-wB0 - wB1;
@@ -2051,11 +2082,11 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     // process C //
                     ///////////////
                     if (!C.isEmpty()) {
-                        const double* C_p=C.getSampleDataRO(e);
+                        const Scalar* C_p = C.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w2;
-                                const double wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w1;
+                                const Scalar wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w2;
+                                const Scalar wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w1;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wC0 - wC1;
                                 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-wC0 - wC1;
                                 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-wC0 - wC1;
@@ -2079,10 +2110,10 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     // process D //
                     ///////////////
                     if (!D.isEmpty()) {
-                        const double* D_p=D.getSampleDataRO(e);
+                        const Scalar* D_p = D.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double wD0 = D_p[INDEX2(k, m, numEq)]*w3;
+                                const Scalar wD0 = D_p[INDEX2(k, m, numEq)]*w3;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=wD0;
                                 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=wD0;
                                 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=wD0;
@@ -2106,10 +2137,10 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     // process X //
                     ///////////////
                     if (!X.isEmpty()) {
-                        const double* X_p=X.getSampleDataRO(e);
+                        const Scalar* X_p = X.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
-                            const double wX0 = 4*X_p[INDEX2(k, 0, numEq)]*w2;
-                            const double wX1 = 4*X_p[INDEX2(k, 1, numEq)]*w1;
+                            const Scalar wX0 = 4.*X_p[INDEX2(k, 0, numEq)]*w2;
+                            const Scalar wX1 = 4.*X_p[INDEX2(k, 1, numEq)]*w1;
                             EM_F[INDEX2(k,0,numEq)]+=-wX0 - wX1;
                             EM_F[INDEX2(k,1,numEq)]+= wX0 - wX1;
                             EM_F[INDEX2(k,2,numEq)]+=-wX0 + wX1;
@@ -2120,12 +2151,12 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     // process Y //
                     ///////////////
                     if (!Y.isEmpty()) {
-                        const double* Y_p=Y.getSampleDataRO(e);
+                        const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
-                            EM_F[INDEX2(k,0,numEq)]+=4*Y_p[k]*w3;
-                            EM_F[INDEX2(k,1,numEq)]+=4*Y_p[k]*w3;
-                            EM_F[INDEX2(k,2,numEq)]+=4*Y_p[k]*w3;
-                            EM_F[INDEX2(k,3,numEq)]+=4*Y_p[k]*w3;
+                            EM_F[INDEX2(k,0,numEq)]+=4.*Y_p[k]*w3;
+                            EM_F[INDEX2(k,1,numEq)]+=4.*Y_p[k]*w3;
+                            EM_F[INDEX2(k,2,numEq)]+=4.*Y_p[k]*w3;
+                            EM_F[INDEX2(k,3,numEq)]+=4.*Y_p[k]*w3;
                         }
                     }
 
@@ -2143,7 +2174,8 @@ void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
 // PDE SYSTEM REDUCED BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
+template<class Scalar>
+void DefaultAssembler2D<Scalar>::assemblePDEBoundarySystemReduced(
                                          AbstractSystemMatrix* mat, Data& rhs,
                                          const Data& d, const Data& y) const
 {
@@ -2160,18 +2192,19 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
     const int NE1 = m_NE[1];
     const bool addEM_S = !d.isEmpty();
     const bool addEM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(4*4*numEq*numComp, 0);
-        vector<double> EM_F(4*numEq, 0);
+        vector<Scalar> EM_S(4*4*numEq*numComp, zero);
+        vector<Scalar> EM_F(4*numEq, zero);
 
         if (domain->m_faceOffset[0] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -2180,10 +2213,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(k1);
+                        const Scalar* d_p = d.getSampleDataRO(k1, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
+                                const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)] = tmp0;
@@ -2195,7 +2228,7 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(k1);
+                        const Scalar* y_p = y.getSampleDataRO(k1, zero);
                         for (index_t k=0; k<numEq; k++) {
                             EM_F[INDEX2(k,0,numEq)] = 2*w1*y_p[k];
                             EM_F[INDEX2(k,2,numEq)] = 2*w1*y_p[k];
@@ -2210,9 +2243,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[1] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring            
 #pragma omp for
@@ -2222,10 +2255,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
+                                const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
                                 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)] = tmp0;
@@ -2237,10 +2270,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
-                            EM_F[INDEX2(k,1,numEq)] = 2*w1*y_p[k];
-                            EM_F[INDEX2(k,3,numEq)] = 2*w1*y_p[k];
+                            EM_F[INDEX2(k,1,numEq)] = 2.*w1*y_p[k];
+                            EM_F[INDEX2(k,3,numEq)] = 2.*w1*y_p[k];
                         }
                     }
                     const index_t firstNode=m_NN[0]*(k1+1)-2;
@@ -2252,9 +2285,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[2] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
 #pragma omp for
@@ -2264,10 +2297,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
+                                const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
                                 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)] = tmp0;
@@ -2279,10 +2312,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
-                            EM_F[INDEX2(k,0,numEq)] = 2*w0*y_p[k];
-                            EM_F[INDEX2(k,1,numEq)] = 2*w0*y_p[k];
+                            EM_F[INDEX2(k,0,numEq)] = 2.*w0*y_p[k];
+                            EM_F[INDEX2(k,1,numEq)] = 2.*w0*y_p[k];
                         }
                     }
                     const index_t firstNode=k0;
@@ -2294,9 +2327,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[3] > -1) {
             if (addEM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (addEM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
 #pragma omp for
@@ -2306,10 +2339,10 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process d //
                     ///////////////
                     if (addEM_S) {
-                        const double* d_p=d.getSampleDataRO(e);
+                        const Scalar* d_p = d.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             for (index_t m=0; m<numComp; m++) {
-                                const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
+                                const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
                                 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)] = tmp0;
                                 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)] = tmp0;
@@ -2321,7 +2354,7 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
                     // process y //
                     ///////////////
                     if (addEM_F) {
-                        const double* y_p=y.getSampleDataRO(e);
+                        const Scalar* y_p = y.getSampleDataRO(e, zero);
                         for (index_t k=0; k<numEq; k++) {
                             EM_F[INDEX2(k,2,numEq)] = 2*w0*y_p[k];
                             EM_F[INDEX2(k,3,numEq)] = 2*w0*y_p[k];
@@ -2336,6 +2369,9 @@ void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
     } // end of parallel section
 }
 
+// instantiate our two supported versions
+template class DefaultAssembler2D<escript::DataTypes::real_t>;
+template class DefaultAssembler2D<escript::DataTypes::cplx_t>;
 
 } // namespace ripley
 
diff --git a/ripley/src/DefaultAssembler2D.h b/ripley/src/DefaultAssembler2D.h
index 25a892f..963951d 100644
--- a/ripley/src/DefaultAssembler2D.h
+++ b/ripley/src/DefaultAssembler2D.h
@@ -20,18 +20,18 @@
 
 namespace ripley {
 
-
+template<class Scalar = double>
 class DefaultAssembler2D : public AbstractAssembler
 {
 public:
-    DefaultAssembler2D(escript::const_Domain_ptr dom, const double *dx,
-                       const dim_t *NE, const dim_t *NN)
+    DefaultAssembler2D<Scalar>(escript::const_Domain_ptr dom, const double* dx,
+                               const dim_t* NE, const dim_t* NN)
         : AbstractAssembler(),
         m_dx(dx),
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Rectangle>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Rectangle>(dom);
     }
 
     ~DefaultAssembler2D() {}
@@ -123,7 +123,7 @@ public:
                                            const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Rectangle> domain;
+    POINTER_WRAPPER_CLASS(const Rectangle) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/DefaultAssembler3D.cpp b/ripley/src/DefaultAssembler3D.cpp
index 1345707..f39f1dd 100644
--- a/ripley/src/DefaultAssembler3D.cpp
+++ b/ripley/src/DefaultAssembler3D.cpp
@@ -14,12 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <ripley/DefaultAssembler3D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/index.h>
+
 using namespace std;
 
 using escript::AbstractSystemMatrix;
@@ -27,7 +26,9 @@ using escript::Data;
 
 namespace ripley {
 
-void DefaultAssembler3D::collateFunctionSpaceTypes(vector<int>& fsTypes,
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::collateFunctionSpaceTypes(
+                                                   vector<int>& fsTypes,
                                                    const DataMap& coefs) const
 {
     if (isNotEmpty("A", coefs))
@@ -48,7 +49,8 @@ void DefaultAssembler3D::collateFunctionSpaceTypes(vector<int>& fsTypes,
 // wrappers
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESingle(AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
@@ -61,16 +63,20 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat,
 
 }
 
-void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
-                                        Data& rhs, const DataMap& coefs) const 
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySingle(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const DataMap& coefs) const 
 {
     const Data& d = unpackData("d", coefs);
     const Data& y = unpackData("y", coefs);
     assemblePDEBoundarySingle(mat, rhs, d, y);
 }
 
-void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
-                                        Data& rhs, const DataMap& coefs) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESingleReduced(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
     const Data& B = unpackData("B", coefs);
@@ -81,16 +87,18 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
     assemblePDESingleReduced(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
-                                        AbstractSystemMatrix* mat,
-                                        Data& rhs, const DataMap& coefs) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySingleReduced(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const DataMap& coefs) const
 {
     const Data& d = unpackData("d", coefs);
     const Data& y = unpackData("y", coefs);
     assemblePDEBoundarySingleReduced(mat, rhs, d, y);
 }
 
-void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat,
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESystem(AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
@@ -102,16 +110,20 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat,
     assemblePDESystem(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
-                                        Data& rhs, const DataMap& coefs) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySystem(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const DataMap& coefs) const
 {
     const Data& d = unpackData("d", coefs);
     const Data& y = unpackData("y", coefs);
     assemblePDEBoundarySystem(mat, rhs, d, y);
 }
 
-void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
-                                        Data& rhs, const DataMap& coefs) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESystemReduced(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const DataMap& coefs) const
 {
     const Data& A = unpackData("A", coefs);
     const Data& B = unpackData("B", coefs);
@@ -122,7 +134,8 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
     assemblePDESystemReduced(mat, rhs, A, B, C, D, X, Y);
 }
 
-void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySystemReduced(
                                         AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
@@ -135,10 +148,11 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 // PDE SINGLE
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
-                                           const Data& A, const Data& B,
-                                           const Data& C, const Data& D,
-                                           const Data& X, const Data& Y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESingle(AbstractSystemMatrix* mat,
+                                       Data& rhs, const Data& A, const Data& B,
+                                       const Data& C, const Data& D,
+                                       const Data& X, const Data& Y) const
 {
     const double SQRT3 = 1.73205080756887719318;
     const double w10 = -m_dx[0]/288;
@@ -212,17 +226,18 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
     const double w14 = w27*(-SQRT3 - 2);
     const double w28 = w27*(-4*SQRT3 + 7);
     const double w29 = w27*(4*SQRT3 + 7);
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool add_EM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8);
-        vector<double> EM_F(8);
+        vector<Scalar> EM_S(8*8);
+        vector<Scalar> EM_F(8);
 
         for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -231,598 +246,598 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                     for (index_t k0=0; k0<NE0; ++k0)  {
                         const index_t e = k0 + NE0*k1 + NE0*NE1*k2;
                         if (add_EM_S)
-                            fill(EM_S.begin(), EM_S.end(), 0);
+                            fill(EM_S.begin(), EM_S.end(), zero);
                         if (add_EM_F)
-                            fill(EM_F.begin(), EM_F.end(), 0);
+                            fill(EM_F.begin(), EM_F.end(), zero);
 
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double* A_p = A.getSampleDataRO(e);
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
                             if (A.actsExpanded()) {
-                                const double A_00_0 = A_p[INDEX3(0,0,0,3,3)];
-                                const double A_01_0 = A_p[INDEX3(0,1,0,3,3)];
-                                const double A_02_0 = A_p[INDEX3(0,2,0,3,3)];
-                                const double A_10_0 = A_p[INDEX3(1,0,0,3,3)];
-                                const double A_11_0 = A_p[INDEX3(1,1,0,3,3)];
-                                const double A_12_0 = A_p[INDEX3(1,2,0,3,3)];
-                                const double A_20_0 = A_p[INDEX3(2,0,0,3,3)];
-                                const double A_21_0 = A_p[INDEX3(2,1,0,3,3)];
-                                const double A_22_0 = A_p[INDEX3(2,2,0,3,3)];
-                                const double A_00_1 = A_p[INDEX3(0,0,1,3,3)];
-                                const double A_01_1 = A_p[INDEX3(0,1,1,3,3)];
-                                const double A_02_1 = A_p[INDEX3(0,2,1,3,3)];
-                                const double A_10_1 = A_p[INDEX3(1,0,1,3,3)];
-                                const double A_11_1 = A_p[INDEX3(1,1,1,3,3)];
-                                const double A_12_1 = A_p[INDEX3(1,2,1,3,3)];
-                                const double A_20_1 = A_p[INDEX3(2,0,1,3,3)];
-                                const double A_21_1 = A_p[INDEX3(2,1,1,3,3)];
-                                const double A_22_1 = A_p[INDEX3(2,2,1,3,3)];
-                                const double A_00_2 = A_p[INDEX3(0,0,2,3,3)];
-                                const double A_01_2 = A_p[INDEX3(0,1,2,3,3)];
-                                const double A_02_2 = A_p[INDEX3(0,2,2,3,3)];
-                                const double A_10_2 = A_p[INDEX3(1,0,2,3,3)];
-                                const double A_11_2 = A_p[INDEX3(1,1,2,3,3)];
-                                const double A_12_2 = A_p[INDEX3(1,2,2,3,3)];
-                                const double A_20_2 = A_p[INDEX3(2,0,2,3,3)];
-                                const double A_21_2 = A_p[INDEX3(2,1,2,3,3)];
-                                const double A_22_2 = A_p[INDEX3(2,2,2,3,3)];
-                                const double A_00_3 = A_p[INDEX3(0,0,3,3,3)];
-                                const double A_01_3 = A_p[INDEX3(0,1,3,3,3)];
-                                const double A_02_3 = A_p[INDEX3(0,2,3,3,3)];
-                                const double A_10_3 = A_p[INDEX3(1,0,3,3,3)];
-                                const double A_11_3 = A_p[INDEX3(1,1,3,3,3)];
-                                const double A_12_3 = A_p[INDEX3(1,2,3,3,3)];
-                                const double A_20_3 = A_p[INDEX3(2,0,3,3,3)];
-                                const double A_21_3 = A_p[INDEX3(2,1,3,3,3)];
-                                const double A_22_3 = A_p[INDEX3(2,2,3,3,3)];
-                                const double A_00_4 = A_p[INDEX3(0,0,4,3,3)];
-                                const double A_01_4 = A_p[INDEX3(0,1,4,3,3)];
-                                const double A_02_4 = A_p[INDEX3(0,2,4,3,3)];
-                                const double A_10_4 = A_p[INDEX3(1,0,4,3,3)];
-                                const double A_11_4 = A_p[INDEX3(1,1,4,3,3)];
-                                const double A_12_4 = A_p[INDEX3(1,2,4,3,3)];
-                                const double A_20_4 = A_p[INDEX3(2,0,4,3,3)];
-                                const double A_21_4 = A_p[INDEX3(2,1,4,3,3)];
-                                const double A_22_4 = A_p[INDEX3(2,2,4,3,3)];
-                                const double A_00_5 = A_p[INDEX3(0,0,5,3,3)];
-                                const double A_01_5 = A_p[INDEX3(0,1,5,3,3)];
-                                const double A_02_5 = A_p[INDEX3(0,2,5,3,3)];
-                                const double A_10_5 = A_p[INDEX3(1,0,5,3,3)];
-                                const double A_11_5 = A_p[INDEX3(1,1,5,3,3)];
-                                const double A_12_5 = A_p[INDEX3(1,2,5,3,3)];
-                                const double A_20_5 = A_p[INDEX3(2,0,5,3,3)];
-                                const double A_21_5 = A_p[INDEX3(2,1,5,3,3)];
-                                const double A_22_5 = A_p[INDEX3(2,2,5,3,3)];
-                                const double A_00_6 = A_p[INDEX3(0,0,6,3,3)];
-                                const double A_01_6 = A_p[INDEX3(0,1,6,3,3)];
-                                const double A_02_6 = A_p[INDEX3(0,2,6,3,3)];
-                                const double A_10_6 = A_p[INDEX3(1,0,6,3,3)];
-                                const double A_11_6 = A_p[INDEX3(1,1,6,3,3)];
-                                const double A_12_6 = A_p[INDEX3(1,2,6,3,3)];
-                                const double A_20_6 = A_p[INDEX3(2,0,6,3,3)];
-                                const double A_21_6 = A_p[INDEX3(2,1,6,3,3)];
-                                const double A_22_6 = A_p[INDEX3(2,2,6,3,3)];
-                                const double A_00_7 = A_p[INDEX3(0,0,7,3,3)];
-                                const double A_01_7 = A_p[INDEX3(0,1,7,3,3)];
-                                const double A_02_7 = A_p[INDEX3(0,2,7,3,3)];
-                                const double A_10_7 = A_p[INDEX3(1,0,7,3,3)];
-                                const double A_11_7 = A_p[INDEX3(1,1,7,3,3)];
-                                const double A_12_7 = A_p[INDEX3(1,2,7,3,3)];
-                                const double A_20_7 = A_p[INDEX3(2,0,7,3,3)];
-                                const double A_21_7 = A_p[INDEX3(2,1,7,3,3)];
-                                const double A_22_7 = A_p[INDEX3(2,2,7,3,3)];
-                                const double tmp0 = w18*(-A_12_7 + A_21_3);
-                                const double tmp1 = w13*(A_22_1 + A_22_2 + A_22_5 + A_22_6);
-                                const double tmp2 = w11*(-A_02_2 - A_02_5 + A_20_1 + A_20_6);
-                                const double tmp3 = w14*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
-                                const double tmp4 = w7*(A_22_0 + A_22_4);
-                                const double tmp5 = w10*(A_12_1 + A_12_6 - A_21_2 - A_21_5);
-                                const double tmp6 = w3*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
-                                const double tmp7 = w1*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
-                                const double tmp8 = w4*(A_12_0 - A_21_4);
-                                const double tmp9 = w15*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
-                                const double tmp10 = w0*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
-                                const double tmp11 = w16*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
-                                const double tmp12 = w9*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
-                                const double tmp13 = w12*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
-                                const double tmp14 = w5*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
-                                const double tmp15 = w8*(A_01_1 + A_01_2 + A_01_5 + A_01_6 + A_10_1 + A_10_2 + A_10_5 + A_10_6);
-                                const double tmp16 = w6*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
-                                const double tmp17 = w19*(A_22_3 + A_22_7);
-                                const double tmp18 = w17*(-A_02_7 + A_20_3);
-                                const double tmp19 = w2*(A_02_0 - A_20_4);
-                                const double tmp20 = w13*(-A_22_0 - A_22_1 - A_22_2 - A_22_3 - A_22_4 - A_22_5 - A_22_6 - A_22_7);
-                                const double tmp21 = w11*(-A_02_1 - A_02_3 - A_02_4 - A_02_6 + A_20_0 + A_20_2 + A_20_5 + A_20_7);
-                                const double tmp22 = w14*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                const double tmp23 = w20*(A_01_2 + A_10_1);
-                                const double tmp24 = w10*(A_12_2 + A_12_3 + A_12_4 + A_12_5 - A_21_0 - A_21_1 - A_21_6 - A_21_7);
-                                const double tmp25 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                                const double tmp26 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                                const double tmp27 = w15*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
-                                const double tmp28 = w0*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                                const double tmp29 = w16*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
-                                const double tmp30 = w9*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                const double tmp31 = w21*(A_01_5 + A_10_6);
-                                const double tmp32 = w12*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
-                                const double tmp33 = w5*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
-                                const double tmp34 = w8*(-A_01_1 - A_01_6 - A_10_2 - A_10_5);
-                                const double tmp35 = w6*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
-                                const double tmp36 = w20*(-A_01_6 + A_10_4);
-                                const double tmp37 = w18*(A_12_3 - A_21_1);
-                                const double tmp38 = w11*(-A_02_0 - A_02_2 - A_02_5 - A_02_7 - A_20_0 - A_20_2 - A_20_5 - A_20_7);
-                                const double tmp39 = w14*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                                const double tmp40 = w26*(A_11_4 + A_11_6);
-                                const double tmp41 = w0*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                const double tmp42 = w10*(-A_12_2 - A_12_5 + A_21_0 + A_21_7);
-                                const double tmp43 = w22*(A_11_0 + A_11_2 + A_11_5 + A_11_7);
-                                const double tmp44 = w1*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
-                                const double tmp45 = w25*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
-                                const double tmp46 = w4*(-A_12_4 + A_21_6);
-                                const double tmp47 = w15*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
-                                const double tmp48 = w21*(-A_01_1 + A_10_3);
-                                const double tmp49 = w16*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                                const double tmp50 = w5*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
-                                const double tmp51 = w12*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
-                                const double tmp52 = w24*(A_11_1 + A_11_3);
-                                const double tmp53 = w8*(A_01_2 + A_01_5 - A_10_0 - A_10_7);
-                                const double tmp54 = w6*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
-                                const double tmp55 = w23*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
-                                const double tmp56 = w18*(A_12_4 - A_21_6);
-                                const double tmp57 = w14*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                const double tmp58 = w26*(A_11_1 + A_11_3);
-                                const double tmp59 = w20*(-A_01_1 + A_10_3);
-                                const double tmp60 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                                const double tmp61 = w25*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
-                                const double tmp62 = w4*(-A_12_3 + A_21_1);
-                                const double tmp63 = w15*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
-                                const double tmp64 = w0*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                                const double tmp65 = w16*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
-                                const double tmp66 = w24*(A_11_4 + A_11_6);
-                                const double tmp67 = w21*(-A_01_6 + A_10_4);
-                                const double tmp68 = w12*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
-                                const double tmp69 = w5*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
-                                const double tmp70 = w6*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
-                                const double tmp71 = w23*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
-                                const double tmp72 = w20*(A_01_5 + A_10_6);
-                                const double tmp73 = w14*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                                const double tmp74 = w0*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                const double tmp75 = w3*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                const double tmp76 = w1*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
-                                const double tmp77 = w15*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
-                                const double tmp78 = w21*(A_01_2 + A_10_1);
-                                const double tmp79 = w16*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                                const double tmp80 = w9*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                                const double tmp81 = w12*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
-                                const double tmp82 = w5*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
-                                const double tmp83 = w6*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
-                                const double tmp84 = w6*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
-                                const double tmp85 = w11*(A_02_1 + A_02_6 - A_20_0 - A_20_7);
-                                const double tmp86 = w20*(A_01_3 - A_10_2);
-                                const double tmp87 = w10*(A_12_0 + A_12_1 + A_12_6 + A_12_7 + A_21_0 + A_21_1 + A_21_6 + A_21_7);
-                                const double tmp88 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                                const double tmp89 = w23*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
-                                const double tmp90 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                                const double tmp91 = w25*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
-                                const double tmp92 = w15*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
-                                const double tmp93 = w21*(A_01_4 - A_10_5);
-                                const double tmp94 = w16*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
-                                const double tmp95 = w28*(A_00_2 + A_00_3);
-                                const double tmp96 = w12*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
-                                const double tmp97 = w29*(A_00_4 + A_00_5);
-                                const double tmp98 = w5*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
-                                const double tmp99 = w8*(-A_01_0 - A_01_7 + A_10_1 + A_10_6);
-                                const double tmp100 = w9*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                const double tmp101 = w27*(A_00_0 + A_00_1 + A_00_6 + A_00_7);
-                                const double tmp102 = w17*(A_02_4 - A_20_5);
-                                const double tmp103 = w2*(-A_02_3 + A_20_2);
-                                const double tmp104 = w13*(A_22_0 + A_22_1 + A_22_2 + A_22_3 + A_22_4 + A_22_5 + A_22_6 + A_22_7);
-                                const double tmp105 = w6*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
-                                const double tmp106 = w22*(A_11_0 + A_11_1 + A_11_2 + A_11_3 + A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                const double tmp107 = w1*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
-                                const double tmp108 = w15*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
-                                const double tmp109 = w16*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
-                                const double tmp110 = w12*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
-                                const double tmp111 = w5*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
-                                const double tmp112 = w8*(-A_01_0 - A_01_3 - A_01_4 - A_01_7 - A_10_0 - A_10_3 - A_10_4 - A_10_7);
-                                const double tmp113 = w27*(A_00_0 + A_00_1 + A_00_2 + A_00_3 + A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                const double tmp114 = w11*(A_02_0 + A_02_2 + A_02_5 + A_02_7 - A_20_1 - A_20_3 - A_20_4 - A_20_6);
-                                const double tmp115 = w21*(-A_01_4 - A_10_7);
-                                const double tmp116 = w20*(-A_01_3 - A_10_0);
-                                const double tmp117 = w15*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
-                                const double tmp118 = w16*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
-                                const double tmp119 = w5*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
-                                const double tmp120 = w8*(A_01_0 + A_01_7 + A_10_3 + A_10_4);
-                                const double tmp121 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                                const double tmp122 = w18*(A_12_2 - A_21_6);
-                                const double tmp123 = w13*(A_22_0 + A_22_3 + A_22_4 + A_22_7);
-                                const double tmp124 = w11*(-A_02_0 - A_02_7 + A_20_3 + A_20_4);
-                                const double tmp125 = w7*(A_22_1 + A_22_5);
-                                const double tmp126 = w10*(-A_12_3 - A_12_4 + A_21_0 + A_21_7);
-                                const double tmp127 = w3*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
-                                const double tmp128 = w1*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
-                                const double tmp129 = w4*(-A_12_5 + A_21_1);
-                                const double tmp130 = w16*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
-                                const double tmp131 = w9*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
-                                const double tmp132 = w19*(A_22_2 + A_22_6);
-                                const double tmp133 = w17*(-A_02_2 + A_20_6);
-                                const double tmp134 = w2*(A_02_5 - A_20_1);
-                                const double tmp135 = w11*(A_02_1 + A_02_3 + A_02_4 + A_02_6 + A_20_1 + A_20_3 + A_20_4 + A_20_6);
-                                const double tmp136 = w1*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
-                                const double tmp137 = w15*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
-                                const double tmp138 = w16*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
-                                const double tmp139 = w5*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
-                                const double tmp140 = w18*(A_12_5 - A_21_1);
-                                const double tmp141 = w14*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
-                                const double tmp142 = w7*(A_22_2 + A_22_6);
-                                const double tmp143 = w1*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
-                                const double tmp144 = w4*(-A_12_2 + A_21_6);
-                                const double tmp145 = w15*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
-                                const double tmp146 = w0*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
-                                const double tmp147 = w16*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
-                                const double tmp148 = w5*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
-                                const double tmp149 = w19*(A_22_1 + A_22_5);
-                                const double tmp150 = w17*(-A_02_5 + A_20_1);
-                                const double tmp151 = w2*(A_02_2 - A_20_6);
-                                const double tmp152 = w18*(A_12_3 - A_21_7);
-                                const double tmp153 = w11*(A_02_1 + A_02_6 - A_20_2 - A_20_5);
-                                const double tmp154 = w10*(-A_12_2 - A_12_5 + A_21_1 + A_21_6);
-                                const double tmp155 = w4*(-A_12_4 + A_21_0);
-                                const double tmp156 = w15*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
-                                const double tmp157 = w5*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
-                                const double tmp158 = w17*(A_02_3 - A_20_7);
-                                const double tmp159 = w2*(-A_02_4 + A_20_0);
-                                const double tmp160 = w6*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
-                                const double tmp161 = w10*(-A_12_2 - A_12_3 - A_12_4 - A_12_5 - A_21_2 - A_21_3 - A_21_4 - A_21_5);
-                                const double tmp162 = w1*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
-                                const double tmp163 = w16*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
-                                const double tmp164 = w12*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
-                                const double tmp165 = w20*(A_01_6 + A_10_5);
-                                const double tmp166 = w10*(-A_12_0 - A_12_1 - A_12_6 - A_12_7 + A_21_2 + A_21_3 + A_21_4 + A_21_5);
-                                const double tmp167 = w15*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
-                                const double tmp168 = w21*(A_01_1 + A_10_2);
-                                const double tmp169 = w12*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
-                                const double tmp170 = w5*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
-                                const double tmp171 = w8*(-A_01_2 - A_01_5 - A_10_1 - A_10_6);
-                                const double tmp172 = w6*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
-                                const double tmp173 = w2*(A_02_1 + A_20_4);
-                                const double tmp174 = w11*(-A_02_3 - A_02_4 - A_20_1 - A_20_6);
-                                const double tmp175 = w14*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
-                                const double tmp176 = w22*(-A_11_0 - A_11_1 - A_11_2 - A_11_3 - A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                const double tmp177 = w1*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
-                                const double tmp178 = w25*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
-                                const double tmp179 = w15*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
-                                const double tmp180 = w0*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
-                                const double tmp181 = w16*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
-                                const double tmp182 = w12*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
-                                const double tmp183 = w5*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
-                                const double tmp184 = w8*(A_01_0 + A_01_3 + A_01_4 + A_01_7 - A_10_1 - A_10_2 - A_10_5 - A_10_6);
-                                const double tmp185 = w6*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
-                                const double tmp186 = w17*(-A_02_6 - A_20_3);
-                                const double tmp187 = w23*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
-                                const double tmp188 = w18*(A_12_4 - A_21_0);
-                                const double tmp189 = w7*(A_22_3 + A_22_7);
-                                const double tmp190 = w1*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
-                                const double tmp191 = w4*(-A_12_3 + A_21_7);
-                                const double tmp192 = w16*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
-                                const double tmp193 = w19*(A_22_0 + A_22_4);
-                                const double tmp194 = w17*(A_02_4 - A_20_0);
-                                const double tmp195 = w2*(-A_02_3 + A_20_7);
-                                const double tmp196 = w20*(-A_01_7 - A_10_4);
-                                const double tmp197 = w21*(-A_01_0 - A_10_3);
-                                const double tmp198 = w16*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                                const double tmp199 = w8*(A_01_3 + A_01_4 + A_10_0 + A_10_7);
-                                const double tmp200 = w1*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
-                                const double tmp201 = w27*(A_00_2 + A_00_3 + A_00_4 + A_00_5);
-                                const double tmp202 = w11*(-A_02_2 - A_02_5 + A_20_3 + A_20_4);
-                                const double tmp203 = w20*(A_01_0 - A_10_1);
-                                const double tmp204 = w23*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
-                                const double tmp205 = w25*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
-                                const double tmp206 = w21*(A_01_7 - A_10_6);
-                                const double tmp207 = w12*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
-                                const double tmp208 = w28*(A_00_0 + A_00_1);
-                                const double tmp209 = w29*(A_00_6 + A_00_7);
-                                const double tmp210 = w8*(-A_01_3 - A_01_4 + A_10_2 + A_10_5);
-                                const double tmp211 = w6*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
-                                const double tmp212 = w17*(-A_02_7 + A_20_6);
-                                const double tmp213 = w2*(A_02_0 - A_20_1);
-                                const double tmp214 = w13*(-A_22_1 - A_22_2 - A_22_5 - A_22_6);
-                                const double tmp215 = w22*(-A_11_0 - A_11_2 - A_11_5 - A_11_7);
-                                const double tmp216 = w8*(A_01_0 + A_01_7 + A_10_0 + A_10_7);
-                                const double tmp217 = w27*(-A_00_0 - A_00_1 - A_00_6 - A_00_7);
-                                const double tmp218 = w17*(-A_02_3 - A_20_3);
-                                const double tmp219 = w2*(A_02_4 + A_20_4);
-                                const double tmp220 = w11*(-A_02_1 - A_02_6 - A_20_1 - A_20_6);
-                                const double tmp221 = w26*(-A_11_4 - A_11_6);
-                                const double tmp222 = w10*(A_12_2 + A_12_5 + A_21_2 + A_21_5);
-                                const double tmp223 = w20*(-A_01_4 - A_10_4);
-                                const double tmp224 = w21*(-A_01_3 - A_10_3);
-                                const double tmp225 = w6*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
-                                const double tmp226 = w7*(-A_22_0 - A_22_4);
-                                const double tmp227 = w24*(-A_11_1 - A_11_3);
-                                const double tmp228 = w19*(-A_22_3 - A_22_7);
-                                const double tmp229 = w18*(-A_12_3 - A_21_3);
-                                const double tmp230 = w4*(A_12_4 + A_21_4);
-                                const double tmp231 = w28*(-A_00_4 - A_00_5);
-                                const double tmp232 = w12*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
-                                const double tmp233 = w29*(-A_00_2 - A_00_3);
-                                const double tmp234 = w20*(-A_01_5 + A_10_7);
-                                const double tmp235 = w18*(-A_12_0 + A_21_2);
-                                const double tmp236 = w26*(A_11_5 + A_11_7);
-                                const double tmp237 = w10*(A_12_1 + A_12_6 - A_21_3 - A_21_4);
-                                const double tmp238 = w22*(A_11_1 + A_11_3 + A_11_4 + A_11_6);
-                                const double tmp239 = w4*(A_12_7 - A_21_5);
-                                const double tmp240 = w15*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
-                                const double tmp241 = w21*(-A_01_2 + A_10_0);
-                                const double tmp242 = w5*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
-                                const double tmp243 = w12*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
-                                const double tmp244 = w24*(A_11_0 + A_11_2);
-                                const double tmp245 = w8*(A_01_1 + A_01_6 - A_10_3 - A_10_4);
-                                const double tmp246 = w6*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
-                                const double tmp247 = w11*(A_02_3 + A_02_4 - A_20_2 - A_20_5);
-                                const double tmp248 = w20*(-A_01_1 + A_10_0);
-                                const double tmp249 = w21*(-A_01_6 + A_10_7);
-                                const double tmp250 = w8*(A_01_2 + A_01_5 - A_10_3 - A_10_4);
-                                const double tmp251 = w17*(A_02_6 - A_20_7);
-                                const double tmp252 = w2*(-A_02_1 + A_20_0);
-                                const double tmp253 = w17*(-A_02_4 - A_20_4);
-                                const double tmp254 = w2*(A_02_3 + A_20_3);
-                                const double tmp255 = w26*(-A_11_1 - A_11_3);
-                                const double tmp256 = w20*(-A_01_3 - A_10_3);
-                                const double tmp257 = w21*(-A_01_4 - A_10_4);
-                                const double tmp258 = w6*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
-                                const double tmp259 = w7*(-A_22_3 - A_22_7);
-                                const double tmp260 = w15*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
-                                const double tmp261 = w24*(-A_11_4 - A_11_6);
-                                const double tmp262 = w19*(-A_22_0 - A_22_4);
-                                const double tmp263 = w18*(-A_12_4 - A_21_4);
-                                const double tmp264 = w4*(A_12_3 + A_21_3);
-                                const double tmp265 = w28*(-A_00_2 - A_00_3);
-                                const double tmp266 = w12*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
-                                const double tmp267 = w5*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
-                                const double tmp268 = w29*(-A_00_4 - A_00_5);
-                                const double tmp269 = w11*(A_02_2 + A_02_5 + A_20_0 + A_20_7);
-                                const double tmp270 = w1*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
-                                const double tmp271 = w15*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
-                                const double tmp272 = w16*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
-                                const double tmp273 = w5*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
-                                const double tmp274 = w8*(-A_01_1 - A_01_2 - A_01_5 - A_01_6 + A_10_0 + A_10_3 + A_10_4 + A_10_7);
-                                const double tmp275 = w17*(A_02_7 + A_20_2);
-                                const double tmp276 = w2*(-A_02_0 - A_20_5);
-                                const double tmp277 = w18*(-A_12_1 + A_21_5);
-                                const double tmp278 = w11*(A_02_3 + A_02_4 - A_20_0 - A_20_7);
-                                const double tmp279 = w10*(A_12_0 + A_12_7 - A_21_3 - A_21_4);
-                                const double tmp280 = w4*(A_12_6 - A_21_2);
-                                const double tmp281 = w17*(A_02_1 - A_20_5);
-                                const double tmp282 = w2*(-A_02_6 + A_20_2);
-                                const double tmp283 = w11*(A_02_0 + A_02_7 + A_20_2 + A_20_5);
-                                const double tmp284 = w12*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
-                                const double tmp285 = w6*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
-                                const double tmp286 = w17*(A_02_2 + A_20_7);
-                                const double tmp287 = w2*(-A_02_5 - A_20_0);
-                                const double tmp288 = w13*(-A_22_0 - A_22_3 - A_22_4 - A_22_7);
-                                const double tmp289 = w22*(-A_11_1 - A_11_3 - A_11_4 - A_11_6);
-                                const double tmp290 = w8*(-A_01_1 - A_01_6 - A_10_1 - A_10_6);
-                                const double tmp291 = w17*(A_02_2 + A_20_2);
-                                const double tmp292 = w2*(-A_02_5 - A_20_5);
-                                const double tmp293 = w11*(A_02_0 + A_02_7 + A_20_0 + A_20_7);
-                                const double tmp294 = w26*(-A_11_5 - A_11_7);
-                                const double tmp295 = w10*(A_12_3 + A_12_4 + A_21_3 + A_21_4);
-                                const double tmp296 = w20*(A_01_5 + A_10_5);
-                                const double tmp297 = w21*(A_01_2 + A_10_2);
-                                const double tmp298 = w7*(-A_22_1 - A_22_5);
-                                const double tmp299 = w24*(-A_11_0 - A_11_2);
-                                const double tmp300 = w19*(-A_22_2 - A_22_6);
-                                const double tmp301 = w18*(-A_12_2 - A_21_2);
-                                const double tmp302 = w4*(A_12_5 + A_21_5);
-                                const double tmp303 = w8*(A_01_3 + A_01_4 + A_10_3 + A_10_4);
-                                const double tmp304 = w27*(-A_00_2 - A_00_3 - A_00_4 - A_00_5);
-                                const double tmp305 = w17*(A_02_7 + A_20_7);
-                                const double tmp306 = w2*(-A_02_0 - A_20_0);
-                                const double tmp307 = w11*(A_02_2 + A_02_5 + A_20_2 + A_20_5);
-                                const double tmp308 = w26*(-A_11_0 - A_11_2);
-                                const double tmp309 = w10*(-A_12_1 - A_12_6 - A_21_1 - A_21_6);
-                                const double tmp310 = w20*(-A_01_0 - A_10_0);
-                                const double tmp311 = w21*(-A_01_7 - A_10_7);
-                                const double tmp312 = w6*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
-                                const double tmp313 = w24*(-A_11_5 - A_11_7);
-                                const double tmp314 = w18*(A_12_7 + A_21_7);
-                                const double tmp315 = w4*(-A_12_0 - A_21_0);
-                                const double tmp316 = w28*(-A_00_0 - A_00_1);
-                                const double tmp317 = w12*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
-                                const double tmp318 = w29*(-A_00_6 - A_00_7);
-                                const double tmp319 = w18*(-A_12_7 + A_21_5);
-                                const double tmp320 = w26*(A_11_0 + A_11_2);
-                                const double tmp321 = w21*(-A_01_5 + A_10_7);
-                                const double tmp322 = w20*(-A_01_2 + A_10_0);
-                                const double tmp323 = w4*(A_12_0 - A_21_2);
-                                const double tmp324 = w15*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
-                                const double tmp325 = w24*(A_11_5 + A_11_7);
-                                const double tmp326 = w5*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
-                                const double tmp327 = w18*(A_12_7 + A_21_1);
-                                const double tmp328 = w10*(-A_12_1 - A_12_6 - A_21_0 - A_21_7);
-                                const double tmp329 = w3*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
-                                const double tmp330 = w1*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
-                                const double tmp331 = w4*(-A_12_0 - A_21_6);
-                                const double tmp332 = w25*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
-                                const double tmp333 = w15*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
-                                const double tmp334 = w16*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
-                                const double tmp335 = w9*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
-                                const double tmp336 = w5*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
-                                const double tmp337 = w27*(-A_00_0 - A_00_1 - A_00_2 - A_00_3 - A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                const double tmp338 = w23*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
-                                const double tmp339 = w14*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
-                                const double tmp340 = w23*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
-                                const double tmp341 = w1*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
-                                const double tmp342 = w25*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
-                                const double tmp343 = w15*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
-                                const double tmp344 = w0*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
-                                const double tmp345 = w16*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
-                                const double tmp346 = w12*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
-                                const double tmp347 = w5*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
-                                const double tmp348 = w6*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
-                                const double tmp349 = w17*(A_02_5 + A_20_0);
-                                const double tmp350 = w2*(-A_02_2 - A_20_7);
-                                const double tmp351 = w8*(-A_01_2 - A_01_5 - A_10_2 - A_10_5);
-                                const double tmp352 = w17*(-A_02_1 - A_20_1);
-                                const double tmp353 = w2*(A_02_6 + A_20_6);
-                                const double tmp354 = w11*(-A_02_3 - A_02_4 - A_20_3 - A_20_4);
-                                const double tmp355 = w10*(-A_12_0 - A_12_7 - A_21_0 - A_21_7);
-                                const double tmp356 = w20*(A_01_6 + A_10_6);
-                                const double tmp357 = w21*(A_01_1 + A_10_1);
-                                const double tmp358 = w7*(-A_22_2 - A_22_6);
-                                const double tmp359 = w19*(-A_22_1 - A_22_5);
-                                const double tmp360 = w18*(A_12_1 + A_21_1);
-                                const double tmp361 = w4*(-A_12_6 - A_21_6);
-                                const double tmp362 = w28*(-A_00_6 - A_00_7);
-                                const double tmp363 = w29*(-A_00_0 - A_00_1);
-                                const double tmp364 = w2*(A_02_4 + A_20_1);
-                                const double tmp365 = w11*(-A_02_1 - A_02_6 - A_20_3 - A_20_4);
-                                const double tmp366 = w17*(-A_02_3 - A_20_6);
-                                const double tmp367 = w2*(A_02_5 - A_20_4);
-                                const double tmp368 = w6*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
-                                const double tmp369 = w11*(-A_02_0 - A_02_7 + A_20_1 + A_20_6);
-                                const double tmp370 = w20*(-A_01_5 + A_10_4);
-                                const double tmp371 = w3*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                const double tmp372 = w12*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
-                                const double tmp373 = w21*(-A_01_2 + A_10_3);
-                                const double tmp374 = w9*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                                const double tmp375 = w29*(A_00_2 + A_00_3);
-                                const double tmp376 = w8*(A_01_1 + A_01_6 - A_10_0 - A_10_7);
-                                const double tmp377 = w28*(A_00_4 + A_00_5);
-                                const double tmp378 = w17*(-A_02_2 + A_20_3);
-                                const double tmp379 = w17*(A_02_0 + A_20_0);
-                                const double tmp380 = w2*(-A_02_7 - A_20_7);
-                                const double tmp381 = w20*(-A_01_7 - A_10_7);
-                                const double tmp382 = w21*(-A_01_0 - A_10_0);
-                                const double tmp383 = w6*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
-                                const double tmp384 = w18*(A_12_0 + A_21_0);
-                                const double tmp385 = w4*(-A_12_7 - A_21_7);
-                                const double tmp386 = w12*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
-                                const double tmp387 = w17*(-A_02_6 - A_20_6);
-                                const double tmp388 = w2*(A_02_1 + A_20_1);
-                                const double tmp389 = w20*(A_01_1 + A_10_1);
-                                const double tmp390 = w21*(A_01_6 + A_10_6);
-                                const double tmp391 = w18*(A_12_6 + A_21_6);
-                                const double tmp392 = w4*(-A_12_1 - A_21_1);
-                                const double tmp393 = w2*(A_02_3 + A_20_6);
-                                const double tmp394 = w1*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
-                                const double tmp395 = w16*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
-                                const double tmp396 = w17*(-A_02_4 - A_20_1);
-                                const double tmp397 = w18*(-A_12_5 - A_21_3);
-                                const double tmp398 = w10*(A_12_3 + A_12_4 + A_21_2 + A_21_5);
-                                const double tmp399 = w1*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
-                                const double tmp400 = w4*(A_12_2 + A_21_4);
-                                const double tmp401 = w16*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
-                                const double tmp402 = w20*(-A_01_2 + A_10_3);
-                                const double tmp403 = w21*(-A_01_5 + A_10_4);
-                                const double tmp404 = w17*(-A_02_5 + A_20_4);
-                                const double tmp405 = w2*(A_02_2 - A_20_3);
-                                const double tmp406 = w18*(-A_12_0 + A_21_4);
-                                const double tmp407 = w4*(A_12_7 - A_21_3);
-                                const double tmp408 = w17*(-A_02_0 + A_20_4);
-                                const double tmp409 = w2*(A_02_7 - A_20_3);
-                                const double tmp410 = w17*(A_02_5 + A_20_5);
-                                const double tmp411 = w2*(-A_02_2 - A_20_2);
-                                const double tmp412 = w20*(A_01_2 + A_10_2);
-                                const double tmp413 = w21*(A_01_5 + A_10_5);
-                                const double tmp414 = w18*(-A_12_5 - A_21_5);
-                                const double tmp415 = w4*(A_12_2 + A_21_2);
-                                const double tmp416 = w12*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
-                                const double tmp417 = w6*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
-                                const double tmp418 = w17*(A_02_0 + A_20_5);
-                                const double tmp419 = w2*(-A_02_7 - A_20_2);
-                                const double tmp420 = w18*(-A_12_4 - A_21_2);
-                                const double tmp421 = w10*(A_12_2 + A_12_5 + A_21_3 + A_21_4);
-                                const double tmp422 = w3*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
-                                const double tmp423 = w1*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
-                                const double tmp424 = w25*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
-                                const double tmp425 = w4*(A_12_3 + A_21_5);
-                                const double tmp426 = w15*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
-                                const double tmp427 = w16*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
-                                const double tmp428 = w9*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
-                                const double tmp429 = w5*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
-                                const double tmp430 = w23*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
-                                const double tmp431 = w18*(A_12_5 - A_21_7);
-                                const double tmp432 = w10*(-A_12_3 - A_12_4 + A_21_1 + A_21_6);
-                                const double tmp433 = w21*(A_01_7 - A_10_5);
-                                const double tmp434 = w20*(A_01_0 - A_10_2);
-                                const double tmp435 = w4*(-A_12_2 + A_21_0);
-                                const double tmp436 = w8*(-A_01_3 - A_01_4 + A_10_1 + A_10_6);
-                                const double tmp437 = w2*(-A_02_4 + A_20_5);
-                                const double tmp438 = w20*(A_01_4 - A_10_5);
-                                const double tmp439 = w21*(A_01_3 - A_10_2);
-                                const double tmp440 = w16*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                                const double tmp441 = w1*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
-                                const double tmp442 = w17*(A_02_3 - A_20_2);
-                                const double tmp443 = w20*(-A_01_4 - A_10_7);
-                                const double tmp444 = w21*(-A_01_3 - A_10_0);
-                                const double tmp445 = w18*(A_12_6 + A_21_0);
-                                const double tmp446 = w10*(-A_12_0 - A_12_7 - A_21_1 - A_21_6);
-                                const double tmp447 = w1*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
-                                const double tmp448 = w4*(-A_12_1 - A_21_7);
-                                const double tmp449 = w16*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
-                                const double tmp450 = w2*(A_02_7 - A_20_6);
-                                const double tmp451 = w6*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
-                                const double tmp452 = w20*(A_01_7 - A_10_6);
-                                const double tmp453 = w21*(A_01_0 - A_10_1);
-                                const double tmp454 = w12*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
-                                const double tmp455 = w29*(A_00_0 + A_00_1);
-                                const double tmp456 = w28*(A_00_6 + A_00_7);
-                                const double tmp457 = w17*(-A_02_0 + A_20_1);
-                                const double tmp458 = w21*(-A_01_7 - A_10_4);
-                                const double tmp459 = w20*(-A_01_0 - A_10_3);
-                                const double tmp460 = w12*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
-                                const double tmp461 = w6*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
-                                const double tmp462 = w18*(A_12_1 + A_21_7);
-                                const double tmp463 = w4*(-A_12_6 - A_21_0);
-                                const double tmp464 = w15*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
-                                const double tmp465 = w5*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
-                                const double tmp466 = w2*(-A_02_6 + A_20_7);
-                                const double tmp467 = w20*(-A_01_6 + A_10_7);
-                                const double tmp468 = w21*(-A_01_1 + A_10_0);
-                                const double tmp469 = w17*(A_02_1 - A_20_0);
-                                const double tmp470 = w6*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
-                                const double tmp471 = w1*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
-                                const double tmp472 = w15*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
-                                const double tmp473 = w16*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
-                                const double tmp474 = w12*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
-                                const double tmp475 = w5*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
-                                const double tmp476 = w18*(-A_12_6 + A_21_4);
-                                const double tmp477 = w20*(A_01_3 - A_10_1);
-                                const double tmp478 = w10*(A_12_0 + A_12_7 - A_21_2 - A_21_5);
-                                const double tmp479 = w4*(A_12_1 - A_21_3);
-                                const double tmp480 = w21*(A_01_4 - A_10_6);
-                                const double tmp481 = w8*(-A_01_0 - A_01_7 + A_10_2 + A_10_5);
-                                const double tmp482 = w6*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
-                                const double tmp483 = w12*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
-                                const double tmp484 = w15*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
-                                const double tmp485 = w5*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
-                                const double tmp486 = w18*(-A_12_1 + A_21_3);
-                                const double tmp487 = w20*(A_01_4 - A_10_6);
-                                const double tmp488 = w4*(A_12_6 - A_21_4);
-                                const double tmp489 = w21*(A_01_3 - A_10_1);
-                                const double tmp490 = w20*(A_01_7 - A_10_5);
-                                const double tmp491 = w18*(A_12_2 - A_21_0);
-                                const double tmp492 = w4*(-A_12_5 + A_21_7);
-                                const double tmp493 = w21*(A_01_0 - A_10_2);
-                                const double tmp494 = w20*(A_01_1 + A_10_2);
-                                const double tmp495 = w21*(A_01_6 + A_10_5);
-                                const double tmp496 = w18*(-A_12_2 - A_21_4);
-                                const double tmp497 = w4*(A_12_5 + A_21_3);
-                                const double tmp498 = w15*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
-                                const double tmp499 = w5*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
-                                const double tmp500 = w18*(-A_12_6 + A_21_2);
-                                const double tmp501 = w4*(A_12_1 - A_21_5);
-                                const double tmp502 = w17*(A_02_6 - A_20_2);
-                                const double tmp503 = w2*(-A_02_1 + A_20_5);
-                                const double tmp504 = w18*(-A_12_3 - A_21_5);
-                                const double tmp505 = w4*(A_12_4 + A_21_2);
-                                const double tmp506 = w2*(A_02_6 + A_20_3);
-                                const double tmp507 = w17*(-A_02_1 - A_20_4);
-                                const double tmp508 = w18*(A_12_0 + A_21_6);
-                                const double tmp509 = w4*(-A_12_7 - A_21_1);
+                                const Scalar A_00_0 = A_p[INDEX3(0,0,0,3,3)];
+                                const Scalar A_01_0 = A_p[INDEX3(0,1,0,3,3)];
+                                const Scalar A_02_0 = A_p[INDEX3(0,2,0,3,3)];
+                                const Scalar A_10_0 = A_p[INDEX3(1,0,0,3,3)];
+                                const Scalar A_11_0 = A_p[INDEX3(1,1,0,3,3)];
+                                const Scalar A_12_0 = A_p[INDEX3(1,2,0,3,3)];
+                                const Scalar A_20_0 = A_p[INDEX3(2,0,0,3,3)];
+                                const Scalar A_21_0 = A_p[INDEX3(2,1,0,3,3)];
+                                const Scalar A_22_0 = A_p[INDEX3(2,2,0,3,3)];
+                                const Scalar A_00_1 = A_p[INDEX3(0,0,1,3,3)];
+                                const Scalar A_01_1 = A_p[INDEX3(0,1,1,3,3)];
+                                const Scalar A_02_1 = A_p[INDEX3(0,2,1,3,3)];
+                                const Scalar A_10_1 = A_p[INDEX3(1,0,1,3,3)];
+                                const Scalar A_11_1 = A_p[INDEX3(1,1,1,3,3)];
+                                const Scalar A_12_1 = A_p[INDEX3(1,2,1,3,3)];
+                                const Scalar A_20_1 = A_p[INDEX3(2,0,1,3,3)];
+                                const Scalar A_21_1 = A_p[INDEX3(2,1,1,3,3)];
+                                const Scalar A_22_1 = A_p[INDEX3(2,2,1,3,3)];
+                                const Scalar A_00_2 = A_p[INDEX3(0,0,2,3,3)];
+                                const Scalar A_01_2 = A_p[INDEX3(0,1,2,3,3)];
+                                const Scalar A_02_2 = A_p[INDEX3(0,2,2,3,3)];
+                                const Scalar A_10_2 = A_p[INDEX3(1,0,2,3,3)];
+                                const Scalar A_11_2 = A_p[INDEX3(1,1,2,3,3)];
+                                const Scalar A_12_2 = A_p[INDEX3(1,2,2,3,3)];
+                                const Scalar A_20_2 = A_p[INDEX3(2,0,2,3,3)];
+                                const Scalar A_21_2 = A_p[INDEX3(2,1,2,3,3)];
+                                const Scalar A_22_2 = A_p[INDEX3(2,2,2,3,3)];
+                                const Scalar A_00_3 = A_p[INDEX3(0,0,3,3,3)];
+                                const Scalar A_01_3 = A_p[INDEX3(0,1,3,3,3)];
+                                const Scalar A_02_3 = A_p[INDEX3(0,2,3,3,3)];
+                                const Scalar A_10_3 = A_p[INDEX3(1,0,3,3,3)];
+                                const Scalar A_11_3 = A_p[INDEX3(1,1,3,3,3)];
+                                const Scalar A_12_3 = A_p[INDEX3(1,2,3,3,3)];
+                                const Scalar A_20_3 = A_p[INDEX3(2,0,3,3,3)];
+                                const Scalar A_21_3 = A_p[INDEX3(2,1,3,3,3)];
+                                const Scalar A_22_3 = A_p[INDEX3(2,2,3,3,3)];
+                                const Scalar A_00_4 = A_p[INDEX3(0,0,4,3,3)];
+                                const Scalar A_01_4 = A_p[INDEX3(0,1,4,3,3)];
+                                const Scalar A_02_4 = A_p[INDEX3(0,2,4,3,3)];
+                                const Scalar A_10_4 = A_p[INDEX3(1,0,4,3,3)];
+                                const Scalar A_11_4 = A_p[INDEX3(1,1,4,3,3)];
+                                const Scalar A_12_4 = A_p[INDEX3(1,2,4,3,3)];
+                                const Scalar A_20_4 = A_p[INDEX3(2,0,4,3,3)];
+                                const Scalar A_21_4 = A_p[INDEX3(2,1,4,3,3)];
+                                const Scalar A_22_4 = A_p[INDEX3(2,2,4,3,3)];
+                                const Scalar A_00_5 = A_p[INDEX3(0,0,5,3,3)];
+                                const Scalar A_01_5 = A_p[INDEX3(0,1,5,3,3)];
+                                const Scalar A_02_5 = A_p[INDEX3(0,2,5,3,3)];
+                                const Scalar A_10_5 = A_p[INDEX3(1,0,5,3,3)];
+                                const Scalar A_11_5 = A_p[INDEX3(1,1,5,3,3)];
+                                const Scalar A_12_5 = A_p[INDEX3(1,2,5,3,3)];
+                                const Scalar A_20_5 = A_p[INDEX3(2,0,5,3,3)];
+                                const Scalar A_21_5 = A_p[INDEX3(2,1,5,3,3)];
+                                const Scalar A_22_5 = A_p[INDEX3(2,2,5,3,3)];
+                                const Scalar A_00_6 = A_p[INDEX3(0,0,6,3,3)];
+                                const Scalar A_01_6 = A_p[INDEX3(0,1,6,3,3)];
+                                const Scalar A_02_6 = A_p[INDEX3(0,2,6,3,3)];
+                                const Scalar A_10_6 = A_p[INDEX3(1,0,6,3,3)];
+                                const Scalar A_11_6 = A_p[INDEX3(1,1,6,3,3)];
+                                const Scalar A_12_6 = A_p[INDEX3(1,2,6,3,3)];
+                                const Scalar A_20_6 = A_p[INDEX3(2,0,6,3,3)];
+                                const Scalar A_21_6 = A_p[INDEX3(2,1,6,3,3)];
+                                const Scalar A_22_6 = A_p[INDEX3(2,2,6,3,3)];
+                                const Scalar A_00_7 = A_p[INDEX3(0,0,7,3,3)];
+                                const Scalar A_01_7 = A_p[INDEX3(0,1,7,3,3)];
+                                const Scalar A_02_7 = A_p[INDEX3(0,2,7,3,3)];
+                                const Scalar A_10_7 = A_p[INDEX3(1,0,7,3,3)];
+                                const Scalar A_11_7 = A_p[INDEX3(1,1,7,3,3)];
+                                const Scalar A_12_7 = A_p[INDEX3(1,2,7,3,3)];
+                                const Scalar A_20_7 = A_p[INDEX3(2,0,7,3,3)];
+                                const Scalar A_21_7 = A_p[INDEX3(2,1,7,3,3)];
+                                const Scalar A_22_7 = A_p[INDEX3(2,2,7,3,3)];
+                                const Scalar tmp0 = w18*(-A_12_7 + A_21_3);
+                                const Scalar tmp1 = w13*(A_22_1 + A_22_2 + A_22_5 + A_22_6);
+                                const Scalar tmp2 = w11*(-A_02_2 - A_02_5 + A_20_1 + A_20_6);
+                                const Scalar tmp3 = w14*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
+                                const Scalar tmp4 = w7*(A_22_0 + A_22_4);
+                                const Scalar tmp5 = w10*(A_12_1 + A_12_6 - A_21_2 - A_21_5);
+                                const Scalar tmp6 = w3*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
+                                const Scalar tmp7 = w1*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
+                                const Scalar tmp8 = w4*(A_12_0 - A_21_4);
+                                const Scalar tmp9 = w15*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
+                                const Scalar tmp10 = w0*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
+                                const Scalar tmp11 = w16*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
+                                const Scalar tmp12 = w9*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
+                                const Scalar tmp13 = w12*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
+                                const Scalar tmp14 = w5*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
+                                const Scalar tmp15 = w8*(A_01_1 + A_01_2 + A_01_5 + A_01_6 + A_10_1 + A_10_2 + A_10_5 + A_10_6);
+                                const Scalar tmp16 = w6*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
+                                const Scalar tmp17 = w19*(A_22_3 + A_22_7);
+                                const Scalar tmp18 = w17*(-A_02_7 + A_20_3);
+                                const Scalar tmp19 = w2*(A_02_0 - A_20_4);
+                                const Scalar tmp20 = w13*(-A_22_0 - A_22_1 - A_22_2 - A_22_3 - A_22_4 - A_22_5 - A_22_6 - A_22_7);
+                                const Scalar tmp21 = w11*(-A_02_1 - A_02_3 - A_02_4 - A_02_6 + A_20_0 + A_20_2 + A_20_5 + A_20_7);
+                                const Scalar tmp22 = w14*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                const Scalar tmp23 = w20*(A_01_2 + A_10_1);
+                                const Scalar tmp24 = w10*(A_12_2 + A_12_3 + A_12_4 + A_12_5 - A_21_0 - A_21_1 - A_21_6 - A_21_7);
+                                const Scalar tmp25 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                                const Scalar tmp26 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                                const Scalar tmp27 = w15*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
+                                const Scalar tmp28 = w0*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                                const Scalar tmp29 = w16*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
+                                const Scalar tmp30 = w9*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                const Scalar tmp31 = w21*(A_01_5 + A_10_6);
+                                const Scalar tmp32 = w12*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
+                                const Scalar tmp33 = w5*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
+                                const Scalar tmp34 = w8*(-A_01_1 - A_01_6 - A_10_2 - A_10_5);
+                                const Scalar tmp35 = w6*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
+                                const Scalar tmp36 = w20*(-A_01_6 + A_10_4);
+                                const Scalar tmp37 = w18*(A_12_3 - A_21_1);
+                                const Scalar tmp38 = w11*(-A_02_0 - A_02_2 - A_02_5 - A_02_7 - A_20_0 - A_20_2 - A_20_5 - A_20_7);
+                                const Scalar tmp39 = w14*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                                const Scalar tmp40 = w26*(A_11_4 + A_11_6);
+                                const Scalar tmp41 = w0*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                const Scalar tmp42 = w10*(-A_12_2 - A_12_5 + A_21_0 + A_21_7);
+                                const Scalar tmp43 = w22*(A_11_0 + A_11_2 + A_11_5 + A_11_7);
+                                const Scalar tmp44 = w1*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
+                                const Scalar tmp45 = w25*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
+                                const Scalar tmp46 = w4*(-A_12_4 + A_21_6);
+                                const Scalar tmp47 = w15*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
+                                const Scalar tmp48 = w21*(-A_01_1 + A_10_3);
+                                const Scalar tmp49 = w16*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                                const Scalar tmp50 = w5*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
+                                const Scalar tmp51 = w12*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
+                                const Scalar tmp52 = w24*(A_11_1 + A_11_3);
+                                const Scalar tmp53 = w8*(A_01_2 + A_01_5 - A_10_0 - A_10_7);
+                                const Scalar tmp54 = w6*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
+                                const Scalar tmp55 = w23*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
+                                const Scalar tmp56 = w18*(A_12_4 - A_21_6);
+                                const Scalar tmp57 = w14*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                const Scalar tmp58 = w26*(A_11_1 + A_11_3);
+                                const Scalar tmp59 = w20*(-A_01_1 + A_10_3);
+                                const Scalar tmp60 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                                const Scalar tmp61 = w25*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
+                                const Scalar tmp62 = w4*(-A_12_3 + A_21_1);
+                                const Scalar tmp63 = w15*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
+                                const Scalar tmp64 = w0*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                                const Scalar tmp65 = w16*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
+                                const Scalar tmp66 = w24*(A_11_4 + A_11_6);
+                                const Scalar tmp67 = w21*(-A_01_6 + A_10_4);
+                                const Scalar tmp68 = w12*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
+                                const Scalar tmp69 = w5*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
+                                const Scalar tmp70 = w6*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
+                                const Scalar tmp71 = w23*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
+                                const Scalar tmp72 = w20*(A_01_5 + A_10_6);
+                                const Scalar tmp73 = w14*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                                const Scalar tmp74 = w0*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                const Scalar tmp75 = w3*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                const Scalar tmp76 = w1*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
+                                const Scalar tmp77 = w15*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
+                                const Scalar tmp78 = w21*(A_01_2 + A_10_1);
+                                const Scalar tmp79 = w16*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                                const Scalar tmp80 = w9*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                                const Scalar tmp81 = w12*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
+                                const Scalar tmp82 = w5*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
+                                const Scalar tmp83 = w6*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
+                                const Scalar tmp84 = w6*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
+                                const Scalar tmp85 = w11*(A_02_1 + A_02_6 - A_20_0 - A_20_7);
+                                const Scalar tmp86 = w20*(A_01_3 - A_10_2);
+                                const Scalar tmp87 = w10*(A_12_0 + A_12_1 + A_12_6 + A_12_7 + A_21_0 + A_21_1 + A_21_6 + A_21_7);
+                                const Scalar tmp88 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                                const Scalar tmp89 = w23*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
+                                const Scalar tmp90 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                                const Scalar tmp91 = w25*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
+                                const Scalar tmp92 = w15*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
+                                const Scalar tmp93 = w21*(A_01_4 - A_10_5);
+                                const Scalar tmp94 = w16*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
+                                const Scalar tmp95 = w28*(A_00_2 + A_00_3);
+                                const Scalar tmp96 = w12*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
+                                const Scalar tmp97 = w29*(A_00_4 + A_00_5);
+                                const Scalar tmp98 = w5*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
+                                const Scalar tmp99 = w8*(-A_01_0 - A_01_7 + A_10_1 + A_10_6);
+                                const Scalar tmp100 = w9*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                const Scalar tmp101 = w27*(A_00_0 + A_00_1 + A_00_6 + A_00_7);
+                                const Scalar tmp102 = w17*(A_02_4 - A_20_5);
+                                const Scalar tmp103 = w2*(-A_02_3 + A_20_2);
+                                const Scalar tmp104 = w13*(A_22_0 + A_22_1 + A_22_2 + A_22_3 + A_22_4 + A_22_5 + A_22_6 + A_22_7);
+                                const Scalar tmp105 = w6*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
+                                const Scalar tmp106 = w22*(A_11_0 + A_11_1 + A_11_2 + A_11_3 + A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                const Scalar tmp107 = w1*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
+                                const Scalar tmp108 = w15*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
+                                const Scalar tmp109 = w16*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
+                                const Scalar tmp110 = w12*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
+                                const Scalar tmp111 = w5*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
+                                const Scalar tmp112 = w8*(-A_01_0 - A_01_3 - A_01_4 - A_01_7 - A_10_0 - A_10_3 - A_10_4 - A_10_7);
+                                const Scalar tmp113 = w27*(A_00_0 + A_00_1 + A_00_2 + A_00_3 + A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                const Scalar tmp114 = w11*(A_02_0 + A_02_2 + A_02_5 + A_02_7 - A_20_1 - A_20_3 - A_20_4 - A_20_6);
+                                const Scalar tmp115 = w21*(-A_01_4 - A_10_7);
+                                const Scalar tmp116 = w20*(-A_01_3 - A_10_0);
+                                const Scalar tmp117 = w15*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
+                                const Scalar tmp118 = w16*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
+                                const Scalar tmp119 = w5*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
+                                const Scalar tmp120 = w8*(A_01_0 + A_01_7 + A_10_3 + A_10_4);
+                                const Scalar tmp121 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                                const Scalar tmp122 = w18*(A_12_2 - A_21_6);
+                                const Scalar tmp123 = w13*(A_22_0 + A_22_3 + A_22_4 + A_22_7);
+                                const Scalar tmp124 = w11*(-A_02_0 - A_02_7 + A_20_3 + A_20_4);
+                                const Scalar tmp125 = w7*(A_22_1 + A_22_5);
+                                const Scalar tmp126 = w10*(-A_12_3 - A_12_4 + A_21_0 + A_21_7);
+                                const Scalar tmp127 = w3*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
+                                const Scalar tmp128 = w1*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
+                                const Scalar tmp129 = w4*(-A_12_5 + A_21_1);
+                                const Scalar tmp130 = w16*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
+                                const Scalar tmp131 = w9*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
+                                const Scalar tmp132 = w19*(A_22_2 + A_22_6);
+                                const Scalar tmp133 = w17*(-A_02_2 + A_20_6);
+                                const Scalar tmp134 = w2*(A_02_5 - A_20_1);
+                                const Scalar tmp135 = w11*(A_02_1 + A_02_3 + A_02_4 + A_02_6 + A_20_1 + A_20_3 + A_20_4 + A_20_6);
+                                const Scalar tmp136 = w1*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
+                                const Scalar tmp137 = w15*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
+                                const Scalar tmp138 = w16*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
+                                const Scalar tmp139 = w5*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
+                                const Scalar tmp140 = w18*(A_12_5 - A_21_1);
+                                const Scalar tmp141 = w14*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
+                                const Scalar tmp142 = w7*(A_22_2 + A_22_6);
+                                const Scalar tmp143 = w1*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
+                                const Scalar tmp144 = w4*(-A_12_2 + A_21_6);
+                                const Scalar tmp145 = w15*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
+                                const Scalar tmp146 = w0*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
+                                const Scalar tmp147 = w16*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
+                                const Scalar tmp148 = w5*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
+                                const Scalar tmp149 = w19*(A_22_1 + A_22_5);
+                                const Scalar tmp150 = w17*(-A_02_5 + A_20_1);
+                                const Scalar tmp151 = w2*(A_02_2 - A_20_6);
+                                const Scalar tmp152 = w18*(A_12_3 - A_21_7);
+                                const Scalar tmp153 = w11*(A_02_1 + A_02_6 - A_20_2 - A_20_5);
+                                const Scalar tmp154 = w10*(-A_12_2 - A_12_5 + A_21_1 + A_21_6);
+                                const Scalar tmp155 = w4*(-A_12_4 + A_21_0);
+                                const Scalar tmp156 = w15*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
+                                const Scalar tmp157 = w5*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
+                                const Scalar tmp158 = w17*(A_02_3 - A_20_7);
+                                const Scalar tmp159 = w2*(-A_02_4 + A_20_0);
+                                const Scalar tmp160 = w6*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
+                                const Scalar tmp161 = w10*(-A_12_2 - A_12_3 - A_12_4 - A_12_5 - A_21_2 - A_21_3 - A_21_4 - A_21_5);
+                                const Scalar tmp162 = w1*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
+                                const Scalar tmp163 = w16*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
+                                const Scalar tmp164 = w12*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
+                                const Scalar tmp165 = w20*(A_01_6 + A_10_5);
+                                const Scalar tmp166 = w10*(-A_12_0 - A_12_1 - A_12_6 - A_12_7 + A_21_2 + A_21_3 + A_21_4 + A_21_5);
+                                const Scalar tmp167 = w15*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
+                                const Scalar tmp168 = w21*(A_01_1 + A_10_2);
+                                const Scalar tmp169 = w12*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
+                                const Scalar tmp170 = w5*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
+                                const Scalar tmp171 = w8*(-A_01_2 - A_01_5 - A_10_1 - A_10_6);
+                                const Scalar tmp172 = w6*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
+                                const Scalar tmp173 = w2*(A_02_1 + A_20_4);
+                                const Scalar tmp174 = w11*(-A_02_3 - A_02_4 - A_20_1 - A_20_6);
+                                const Scalar tmp175 = w14*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
+                                const Scalar tmp176 = w22*(-A_11_0 - A_11_1 - A_11_2 - A_11_3 - A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                const Scalar tmp177 = w1*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
+                                const Scalar tmp178 = w25*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
+                                const Scalar tmp179 = w15*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
+                                const Scalar tmp180 = w0*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
+                                const Scalar tmp181 = w16*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
+                                const Scalar tmp182 = w12*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
+                                const Scalar tmp183 = w5*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
+                                const Scalar tmp184 = w8*(A_01_0 + A_01_3 + A_01_4 + A_01_7 - A_10_1 - A_10_2 - A_10_5 - A_10_6);
+                                const Scalar tmp185 = w6*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
+                                const Scalar tmp186 = w17*(-A_02_6 - A_20_3);
+                                const Scalar tmp187 = w23*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
+                                const Scalar tmp188 = w18*(A_12_4 - A_21_0);
+                                const Scalar tmp189 = w7*(A_22_3 + A_22_7);
+                                const Scalar tmp190 = w1*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
+                                const Scalar tmp191 = w4*(-A_12_3 + A_21_7);
+                                const Scalar tmp192 = w16*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
+                                const Scalar tmp193 = w19*(A_22_0 + A_22_4);
+                                const Scalar tmp194 = w17*(A_02_4 - A_20_0);
+                                const Scalar tmp195 = w2*(-A_02_3 + A_20_7);
+                                const Scalar tmp196 = w20*(-A_01_7 - A_10_4);
+                                const Scalar tmp197 = w21*(-A_01_0 - A_10_3);
+                                const Scalar tmp198 = w16*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                                const Scalar tmp199 = w8*(A_01_3 + A_01_4 + A_10_0 + A_10_7);
+                                const Scalar tmp200 = w1*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
+                                const Scalar tmp201 = w27*(A_00_2 + A_00_3 + A_00_4 + A_00_5);
+                                const Scalar tmp202 = w11*(-A_02_2 - A_02_5 + A_20_3 + A_20_4);
+                                const Scalar tmp203 = w20*(A_01_0 - A_10_1);
+                                const Scalar tmp204 = w23*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
+                                const Scalar tmp205 = w25*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
+                                const Scalar tmp206 = w21*(A_01_7 - A_10_6);
+                                const Scalar tmp207 = w12*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
+                                const Scalar tmp208 = w28*(A_00_0 + A_00_1);
+                                const Scalar tmp209 = w29*(A_00_6 + A_00_7);
+                                const Scalar tmp210 = w8*(-A_01_3 - A_01_4 + A_10_2 + A_10_5);
+                                const Scalar tmp211 = w6*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
+                                const Scalar tmp212 = w17*(-A_02_7 + A_20_6);
+                                const Scalar tmp213 = w2*(A_02_0 - A_20_1);
+                                const Scalar tmp214 = w13*(-A_22_1 - A_22_2 - A_22_5 - A_22_6);
+                                const Scalar tmp215 = w22*(-A_11_0 - A_11_2 - A_11_5 - A_11_7);
+                                const Scalar tmp216 = w8*(A_01_0 + A_01_7 + A_10_0 + A_10_7);
+                                const Scalar tmp217 = w27*(-A_00_0 - A_00_1 - A_00_6 - A_00_7);
+                                const Scalar tmp218 = w17*(-A_02_3 - A_20_3);
+                                const Scalar tmp219 = w2*(A_02_4 + A_20_4);
+                                const Scalar tmp220 = w11*(-A_02_1 - A_02_6 - A_20_1 - A_20_6);
+                                const Scalar tmp221 = w26*(-A_11_4 - A_11_6);
+                                const Scalar tmp222 = w10*(A_12_2 + A_12_5 + A_21_2 + A_21_5);
+                                const Scalar tmp223 = w20*(-A_01_4 - A_10_4);
+                                const Scalar tmp224 = w21*(-A_01_3 - A_10_3);
+                                const Scalar tmp225 = w6*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
+                                const Scalar tmp226 = w7*(-A_22_0 - A_22_4);
+                                const Scalar tmp227 = w24*(-A_11_1 - A_11_3);
+                                const Scalar tmp228 = w19*(-A_22_3 - A_22_7);
+                                const Scalar tmp229 = w18*(-A_12_3 - A_21_3);
+                                const Scalar tmp230 = w4*(A_12_4 + A_21_4);
+                                const Scalar tmp231 = w28*(-A_00_4 - A_00_5);
+                                const Scalar tmp232 = w12*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
+                                const Scalar tmp233 = w29*(-A_00_2 - A_00_3);
+                                const Scalar tmp234 = w20*(-A_01_5 + A_10_7);
+                                const Scalar tmp235 = w18*(-A_12_0 + A_21_2);
+                                const Scalar tmp236 = w26*(A_11_5 + A_11_7);
+                                const Scalar tmp237 = w10*(A_12_1 + A_12_6 - A_21_3 - A_21_4);
+                                const Scalar tmp238 = w22*(A_11_1 + A_11_3 + A_11_4 + A_11_6);
+                                const Scalar tmp239 = w4*(A_12_7 - A_21_5);
+                                const Scalar tmp240 = w15*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
+                                const Scalar tmp241 = w21*(-A_01_2 + A_10_0);
+                                const Scalar tmp242 = w5*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
+                                const Scalar tmp243 = w12*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
+                                const Scalar tmp244 = w24*(A_11_0 + A_11_2);
+                                const Scalar tmp245 = w8*(A_01_1 + A_01_6 - A_10_3 - A_10_4);
+                                const Scalar tmp246 = w6*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
+                                const Scalar tmp247 = w11*(A_02_3 + A_02_4 - A_20_2 - A_20_5);
+                                const Scalar tmp248 = w20*(-A_01_1 + A_10_0);
+                                const Scalar tmp249 = w21*(-A_01_6 + A_10_7);
+                                const Scalar tmp250 = w8*(A_01_2 + A_01_5 - A_10_3 - A_10_4);
+                                const Scalar tmp251 = w17*(A_02_6 - A_20_7);
+                                const Scalar tmp252 = w2*(-A_02_1 + A_20_0);
+                                const Scalar tmp253 = w17*(-A_02_4 - A_20_4);
+                                const Scalar tmp254 = w2*(A_02_3 + A_20_3);
+                                const Scalar tmp255 = w26*(-A_11_1 - A_11_3);
+                                const Scalar tmp256 = w20*(-A_01_3 - A_10_3);
+                                const Scalar tmp257 = w21*(-A_01_4 - A_10_4);
+                                const Scalar tmp258 = w6*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
+                                const Scalar tmp259 = w7*(-A_22_3 - A_22_7);
+                                const Scalar tmp260 = w15*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
+                                const Scalar tmp261 = w24*(-A_11_4 - A_11_6);
+                                const Scalar tmp262 = w19*(-A_22_0 - A_22_4);
+                                const Scalar tmp263 = w18*(-A_12_4 - A_21_4);
+                                const Scalar tmp264 = w4*(A_12_3 + A_21_3);
+                                const Scalar tmp265 = w28*(-A_00_2 - A_00_3);
+                                const Scalar tmp266 = w12*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
+                                const Scalar tmp267 = w5*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
+                                const Scalar tmp268 = w29*(-A_00_4 - A_00_5);
+                                const Scalar tmp269 = w11*(A_02_2 + A_02_5 + A_20_0 + A_20_7);
+                                const Scalar tmp270 = w1*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
+                                const Scalar tmp271 = w15*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
+                                const Scalar tmp272 = w16*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
+                                const Scalar tmp273 = w5*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
+                                const Scalar tmp274 = w8*(-A_01_1 - A_01_2 - A_01_5 - A_01_6 + A_10_0 + A_10_3 + A_10_4 + A_10_7);
+                                const Scalar tmp275 = w17*(A_02_7 + A_20_2);
+                                const Scalar tmp276 = w2*(-A_02_0 - A_20_5);
+                                const Scalar tmp277 = w18*(-A_12_1 + A_21_5);
+                                const Scalar tmp278 = w11*(A_02_3 + A_02_4 - A_20_0 - A_20_7);
+                                const Scalar tmp279 = w10*(A_12_0 + A_12_7 - A_21_3 - A_21_4);
+                                const Scalar tmp280 = w4*(A_12_6 - A_21_2);
+                                const Scalar tmp281 = w17*(A_02_1 - A_20_5);
+                                const Scalar tmp282 = w2*(-A_02_6 + A_20_2);
+                                const Scalar tmp283 = w11*(A_02_0 + A_02_7 + A_20_2 + A_20_5);
+                                const Scalar tmp284 = w12*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
+                                const Scalar tmp285 = w6*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
+                                const Scalar tmp286 = w17*(A_02_2 + A_20_7);
+                                const Scalar tmp287 = w2*(-A_02_5 - A_20_0);
+                                const Scalar tmp288 = w13*(-A_22_0 - A_22_3 - A_22_4 - A_22_7);
+                                const Scalar tmp289 = w22*(-A_11_1 - A_11_3 - A_11_4 - A_11_6);
+                                const Scalar tmp290 = w8*(-A_01_1 - A_01_6 - A_10_1 - A_10_6);
+                                const Scalar tmp291 = w17*(A_02_2 + A_20_2);
+                                const Scalar tmp292 = w2*(-A_02_5 - A_20_5);
+                                const Scalar tmp293 = w11*(A_02_0 + A_02_7 + A_20_0 + A_20_7);
+                                const Scalar tmp294 = w26*(-A_11_5 - A_11_7);
+                                const Scalar tmp295 = w10*(A_12_3 + A_12_4 + A_21_3 + A_21_4);
+                                const Scalar tmp296 = w20*(A_01_5 + A_10_5);
+                                const Scalar tmp297 = w21*(A_01_2 + A_10_2);
+                                const Scalar tmp298 = w7*(-A_22_1 - A_22_5);
+                                const Scalar tmp299 = w24*(-A_11_0 - A_11_2);
+                                const Scalar tmp300 = w19*(-A_22_2 - A_22_6);
+                                const Scalar tmp301 = w18*(-A_12_2 - A_21_2);
+                                const Scalar tmp302 = w4*(A_12_5 + A_21_5);
+                                const Scalar tmp303 = w8*(A_01_3 + A_01_4 + A_10_3 + A_10_4);
+                                const Scalar tmp304 = w27*(-A_00_2 - A_00_3 - A_00_4 - A_00_5);
+                                const Scalar tmp305 = w17*(A_02_7 + A_20_7);
+                                const Scalar tmp306 = w2*(-A_02_0 - A_20_0);
+                                const Scalar tmp307 = w11*(A_02_2 + A_02_5 + A_20_2 + A_20_5);
+                                const Scalar tmp308 = w26*(-A_11_0 - A_11_2);
+                                const Scalar tmp309 = w10*(-A_12_1 - A_12_6 - A_21_1 - A_21_6);
+                                const Scalar tmp310 = w20*(-A_01_0 - A_10_0);
+                                const Scalar tmp311 = w21*(-A_01_7 - A_10_7);
+                                const Scalar tmp312 = w6*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
+                                const Scalar tmp313 = w24*(-A_11_5 - A_11_7);
+                                const Scalar tmp314 = w18*(A_12_7 + A_21_7);
+                                const Scalar tmp315 = w4*(-A_12_0 - A_21_0);
+                                const Scalar tmp316 = w28*(-A_00_0 - A_00_1);
+                                const Scalar tmp317 = w12*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
+                                const Scalar tmp318 = w29*(-A_00_6 - A_00_7);
+                                const Scalar tmp319 = w18*(-A_12_7 + A_21_5);
+                                const Scalar tmp320 = w26*(A_11_0 + A_11_2);
+                                const Scalar tmp321 = w21*(-A_01_5 + A_10_7);
+                                const Scalar tmp322 = w20*(-A_01_2 + A_10_0);
+                                const Scalar tmp323 = w4*(A_12_0 - A_21_2);
+                                const Scalar tmp324 = w15*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
+                                const Scalar tmp325 = w24*(A_11_5 + A_11_7);
+                                const Scalar tmp326 = w5*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
+                                const Scalar tmp327 = w18*(A_12_7 + A_21_1);
+                                const Scalar tmp328 = w10*(-A_12_1 - A_12_6 - A_21_0 - A_21_7);
+                                const Scalar tmp329 = w3*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
+                                const Scalar tmp330 = w1*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
+                                const Scalar tmp331 = w4*(-A_12_0 - A_21_6);
+                                const Scalar tmp332 = w25*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
+                                const Scalar tmp333 = w15*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
+                                const Scalar tmp334 = w16*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
+                                const Scalar tmp335 = w9*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
+                                const Scalar tmp336 = w5*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
+                                const Scalar tmp337 = w27*(-A_00_0 - A_00_1 - A_00_2 - A_00_3 - A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                const Scalar tmp338 = w23*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
+                                const Scalar tmp339 = w14*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
+                                const Scalar tmp340 = w23*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
+                                const Scalar tmp341 = w1*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
+                                const Scalar tmp342 = w25*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
+                                const Scalar tmp343 = w15*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
+                                const Scalar tmp344 = w0*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
+                                const Scalar tmp345 = w16*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
+                                const Scalar tmp346 = w12*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
+                                const Scalar tmp347 = w5*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
+                                const Scalar tmp348 = w6*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
+                                const Scalar tmp349 = w17*(A_02_5 + A_20_0);
+                                const Scalar tmp350 = w2*(-A_02_2 - A_20_7);
+                                const Scalar tmp351 = w8*(-A_01_2 - A_01_5 - A_10_2 - A_10_5);
+                                const Scalar tmp352 = w17*(-A_02_1 - A_20_1);
+                                const Scalar tmp353 = w2*(A_02_6 + A_20_6);
+                                const Scalar tmp354 = w11*(-A_02_3 - A_02_4 - A_20_3 - A_20_4);
+                                const Scalar tmp355 = w10*(-A_12_0 - A_12_7 - A_21_0 - A_21_7);
+                                const Scalar tmp356 = w20*(A_01_6 + A_10_6);
+                                const Scalar tmp357 = w21*(A_01_1 + A_10_1);
+                                const Scalar tmp358 = w7*(-A_22_2 - A_22_6);
+                                const Scalar tmp359 = w19*(-A_22_1 - A_22_5);
+                                const Scalar tmp360 = w18*(A_12_1 + A_21_1);
+                                const Scalar tmp361 = w4*(-A_12_6 - A_21_6);
+                                const Scalar tmp362 = w28*(-A_00_6 - A_00_7);
+                                const Scalar tmp363 = w29*(-A_00_0 - A_00_1);
+                                const Scalar tmp364 = w2*(A_02_4 + A_20_1);
+                                const Scalar tmp365 = w11*(-A_02_1 - A_02_6 - A_20_3 - A_20_4);
+                                const Scalar tmp366 = w17*(-A_02_3 - A_20_6);
+                                const Scalar tmp367 = w2*(A_02_5 - A_20_4);
+                                const Scalar tmp368 = w6*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
+                                const Scalar tmp369 = w11*(-A_02_0 - A_02_7 + A_20_1 + A_20_6);
+                                const Scalar tmp370 = w20*(-A_01_5 + A_10_4);
+                                const Scalar tmp371 = w3*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                const Scalar tmp372 = w12*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
+                                const Scalar tmp373 = w21*(-A_01_2 + A_10_3);
+                                const Scalar tmp374 = w9*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                                const Scalar tmp375 = w29*(A_00_2 + A_00_3);
+                                const Scalar tmp376 = w8*(A_01_1 + A_01_6 - A_10_0 - A_10_7);
+                                const Scalar tmp377 = w28*(A_00_4 + A_00_5);
+                                const Scalar tmp378 = w17*(-A_02_2 + A_20_3);
+                                const Scalar tmp379 = w17*(A_02_0 + A_20_0);
+                                const Scalar tmp380 = w2*(-A_02_7 - A_20_7);
+                                const Scalar tmp381 = w20*(-A_01_7 - A_10_7);
+                                const Scalar tmp382 = w21*(-A_01_0 - A_10_0);
+                                const Scalar tmp383 = w6*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
+                                const Scalar tmp384 = w18*(A_12_0 + A_21_0);
+                                const Scalar tmp385 = w4*(-A_12_7 - A_21_7);
+                                const Scalar tmp386 = w12*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
+                                const Scalar tmp387 = w17*(-A_02_6 - A_20_6);
+                                const Scalar tmp388 = w2*(A_02_1 + A_20_1);
+                                const Scalar tmp389 = w20*(A_01_1 + A_10_1);
+                                const Scalar tmp390 = w21*(A_01_6 + A_10_6);
+                                const Scalar tmp391 = w18*(A_12_6 + A_21_6);
+                                const Scalar tmp392 = w4*(-A_12_1 - A_21_1);
+                                const Scalar tmp393 = w2*(A_02_3 + A_20_6);
+                                const Scalar tmp394 = w1*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
+                                const Scalar tmp395 = w16*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
+                                const Scalar tmp396 = w17*(-A_02_4 - A_20_1);
+                                const Scalar tmp397 = w18*(-A_12_5 - A_21_3);
+                                const Scalar tmp398 = w10*(A_12_3 + A_12_4 + A_21_2 + A_21_5);
+                                const Scalar tmp399 = w1*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
+                                const Scalar tmp400 = w4*(A_12_2 + A_21_4);
+                                const Scalar tmp401 = w16*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
+                                const Scalar tmp402 = w20*(-A_01_2 + A_10_3);
+                                const Scalar tmp403 = w21*(-A_01_5 + A_10_4);
+                                const Scalar tmp404 = w17*(-A_02_5 + A_20_4);
+                                const Scalar tmp405 = w2*(A_02_2 - A_20_3);
+                                const Scalar tmp406 = w18*(-A_12_0 + A_21_4);
+                                const Scalar tmp407 = w4*(A_12_7 - A_21_3);
+                                const Scalar tmp408 = w17*(-A_02_0 + A_20_4);
+                                const Scalar tmp409 = w2*(A_02_7 - A_20_3);
+                                const Scalar tmp410 = w17*(A_02_5 + A_20_5);
+                                const Scalar tmp411 = w2*(-A_02_2 - A_20_2);
+                                const Scalar tmp412 = w20*(A_01_2 + A_10_2);
+                                const Scalar tmp413 = w21*(A_01_5 + A_10_5);
+                                const Scalar tmp414 = w18*(-A_12_5 - A_21_5);
+                                const Scalar tmp415 = w4*(A_12_2 + A_21_2);
+                                const Scalar tmp416 = w12*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
+                                const Scalar tmp417 = w6*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
+                                const Scalar tmp418 = w17*(A_02_0 + A_20_5);
+                                const Scalar tmp419 = w2*(-A_02_7 - A_20_2);
+                                const Scalar tmp420 = w18*(-A_12_4 - A_21_2);
+                                const Scalar tmp421 = w10*(A_12_2 + A_12_5 + A_21_3 + A_21_4);
+                                const Scalar tmp422 = w3*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
+                                const Scalar tmp423 = w1*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
+                                const Scalar tmp424 = w25*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
+                                const Scalar tmp425 = w4*(A_12_3 + A_21_5);
+                                const Scalar tmp426 = w15*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
+                                const Scalar tmp427 = w16*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
+                                const Scalar tmp428 = w9*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
+                                const Scalar tmp429 = w5*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
+                                const Scalar tmp430 = w23*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
+                                const Scalar tmp431 = w18*(A_12_5 - A_21_7);
+                                const Scalar tmp432 = w10*(-A_12_3 - A_12_4 + A_21_1 + A_21_6);
+                                const Scalar tmp433 = w21*(A_01_7 - A_10_5);
+                                const Scalar tmp434 = w20*(A_01_0 - A_10_2);
+                                const Scalar tmp435 = w4*(-A_12_2 + A_21_0);
+                                const Scalar tmp436 = w8*(-A_01_3 - A_01_4 + A_10_1 + A_10_6);
+                                const Scalar tmp437 = w2*(-A_02_4 + A_20_5);
+                                const Scalar tmp438 = w20*(A_01_4 - A_10_5);
+                                const Scalar tmp439 = w21*(A_01_3 - A_10_2);
+                                const Scalar tmp440 = w16*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                                const Scalar tmp441 = w1*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
+                                const Scalar tmp442 = w17*(A_02_3 - A_20_2);
+                                const Scalar tmp443 = w20*(-A_01_4 - A_10_7);
+                                const Scalar tmp444 = w21*(-A_01_3 - A_10_0);
+                                const Scalar tmp445 = w18*(A_12_6 + A_21_0);
+                                const Scalar tmp446 = w10*(-A_12_0 - A_12_7 - A_21_1 - A_21_6);
+                                const Scalar tmp447 = w1*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
+                                const Scalar tmp448 = w4*(-A_12_1 - A_21_7);
+                                const Scalar tmp449 = w16*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
+                                const Scalar tmp450 = w2*(A_02_7 - A_20_6);
+                                const Scalar tmp451 = w6*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
+                                const Scalar tmp452 = w20*(A_01_7 - A_10_6);
+                                const Scalar tmp453 = w21*(A_01_0 - A_10_1);
+                                const Scalar tmp454 = w12*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
+                                const Scalar tmp455 = w29*(A_00_0 + A_00_1);
+                                const Scalar tmp456 = w28*(A_00_6 + A_00_7);
+                                const Scalar tmp457 = w17*(-A_02_0 + A_20_1);
+                                const Scalar tmp458 = w21*(-A_01_7 - A_10_4);
+                                const Scalar tmp459 = w20*(-A_01_0 - A_10_3);
+                                const Scalar tmp460 = w12*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
+                                const Scalar tmp461 = w6*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
+                                const Scalar tmp462 = w18*(A_12_1 + A_21_7);
+                                const Scalar tmp463 = w4*(-A_12_6 - A_21_0);
+                                const Scalar tmp464 = w15*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
+                                const Scalar tmp465 = w5*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
+                                const Scalar tmp466 = w2*(-A_02_6 + A_20_7);
+                                const Scalar tmp467 = w20*(-A_01_6 + A_10_7);
+                                const Scalar tmp468 = w21*(-A_01_1 + A_10_0);
+                                const Scalar tmp469 = w17*(A_02_1 - A_20_0);
+                                const Scalar tmp470 = w6*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
+                                const Scalar tmp471 = w1*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
+                                const Scalar tmp472 = w15*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
+                                const Scalar tmp473 = w16*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
+                                const Scalar tmp474 = w12*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
+                                const Scalar tmp475 = w5*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
+                                const Scalar tmp476 = w18*(-A_12_6 + A_21_4);
+                                const Scalar tmp477 = w20*(A_01_3 - A_10_1);
+                                const Scalar tmp478 = w10*(A_12_0 + A_12_7 - A_21_2 - A_21_5);
+                                const Scalar tmp479 = w4*(A_12_1 - A_21_3);
+                                const Scalar tmp480 = w21*(A_01_4 - A_10_6);
+                                const Scalar tmp481 = w8*(-A_01_0 - A_01_7 + A_10_2 + A_10_5);
+                                const Scalar tmp482 = w6*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
+                                const Scalar tmp483 = w12*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
+                                const Scalar tmp484 = w15*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
+                                const Scalar tmp485 = w5*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
+                                const Scalar tmp486 = w18*(-A_12_1 + A_21_3);
+                                const Scalar tmp487 = w20*(A_01_4 - A_10_6);
+                                const Scalar tmp488 = w4*(A_12_6 - A_21_4);
+                                const Scalar tmp489 = w21*(A_01_3 - A_10_1);
+                                const Scalar tmp490 = w20*(A_01_7 - A_10_5);
+                                const Scalar tmp491 = w18*(A_12_2 - A_21_0);
+                                const Scalar tmp492 = w4*(-A_12_5 + A_21_7);
+                                const Scalar tmp493 = w21*(A_01_0 - A_10_2);
+                                const Scalar tmp494 = w20*(A_01_1 + A_10_2);
+                                const Scalar tmp495 = w21*(A_01_6 + A_10_5);
+                                const Scalar tmp496 = w18*(-A_12_2 - A_21_4);
+                                const Scalar tmp497 = w4*(A_12_5 + A_21_3);
+                                const Scalar tmp498 = w15*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
+                                const Scalar tmp499 = w5*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
+                                const Scalar tmp500 = w18*(-A_12_6 + A_21_2);
+                                const Scalar tmp501 = w4*(A_12_1 - A_21_5);
+                                const Scalar tmp502 = w17*(A_02_6 - A_20_2);
+                                const Scalar tmp503 = w2*(-A_02_1 + A_20_5);
+                                const Scalar tmp504 = w18*(-A_12_3 - A_21_5);
+                                const Scalar tmp505 = w4*(A_12_4 + A_21_2);
+                                const Scalar tmp506 = w2*(A_02_6 + A_20_3);
+                                const Scalar tmp507 = w17*(-A_02_1 - A_20_4);
+                                const Scalar tmp508 = w18*(A_12_0 + A_21_6);
+                                const Scalar tmp509 = w4*(-A_12_7 - A_21_1);
                                 EM_S[INDEX2(0,0,8)]+=tmp198 + tmp200 + tmp214 + tmp259 + tmp262 + tmp289 + tmp294 + tmp299 + tmp303 + tmp304 + tmp307 + tmp309 + tmp343 + tmp347 + tmp362 + tmp363 + tmp379 + tmp380 + tmp381 + tmp382 + tmp383 + tmp384 + tmp385 + tmp386;
                                 EM_S[INDEX2(1,0,8)]+=tmp145 + tmp148 + tmp161 + tmp201 + tmp202 + tmp210 + tmp371 + tmp374 + tmp440 + tmp441 + tmp450 + tmp451 + tmp452 + tmp453 + tmp454 + tmp455 + tmp456 + tmp457 + tmp89 + tmp91;
                                 EM_S[INDEX2(2,0,8)]+=tmp135 + tmp234 + tmp235 + tmp236 + tmp237 + tmp238 + tmp239 + tmp240 + tmp241 + tmp242 + tmp243 + tmp244 + tmp245 + tmp246 + tmp39 + tmp41 + tmp44 + tmp49 + tmp61 + tmp71;
@@ -888,387 +903,387 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_S[INDEX2(6,7,8)]+=tmp100 + tmp14 + tmp161 + tmp201 + tmp202 + tmp203 + tmp204 + tmp205 + tmp206 + tmp207 + tmp208 + tmp209 + tmp210 + tmp211 + tmp212 + tmp213 + tmp88 + tmp9 + tmp90 + tmp94;
                                 EM_S[INDEX2(7,7,8)]+=tmp118 + tmp121 + tmp214 + tmp226 + tmp228 + tmp271 + tmp273 + tmp289 + tmp303 + tmp304 + tmp305 + tmp306 + tmp307 + tmp308 + tmp309 + tmp310 + tmp311 + tmp312 + tmp313 + tmp314 + tmp315 + tmp316 + tmp317 + tmp318;
                             } else { // constant data
-                                const double Aw00 = 8*A_p[INDEX2(0,0,3)]*w27;
-                                const double Aw01 = 12*A_p[INDEX2(0,1,3)]*w8;
-                                const double Aw02 = 12*A_p[INDEX2(0,2,3)]*w11;
-                                const double Aw10 = 12*A_p[INDEX2(1,0,3)]*w8;
-                                const double Aw11 = 8*A_p[INDEX2(1,1,3)]*w22;
-                                const double Aw12 = 12*A_p[INDEX2(1,2,3)]*w10;
-                                const double Aw20 = 12*A_p[INDEX2(2,0,3)]*w11;
-                                const double Aw21 = 12*A_p[INDEX2(2,1,3)]*w10;
-                                const double Aw22 = 8*A_p[INDEX2(2,2,3)]*w13;
-                                const double tmp0 = Aw01 + Aw10;
-                                const double tmp1 = Aw01 - Aw10;
-                                const double tmp2 = -Aw01 - Aw10;
-                                const double tmp3 = -Aw01 + Aw10;
-                                const double tmp4 = Aw02 + Aw20;
-                                const double tmp5 = Aw02 - Aw20;
-                                const double tmp6 = -Aw02 - Aw20;
-                                const double tmp7 = -Aw02 + Aw20;
-                                const double tmp8 = Aw12 + Aw21;
-                                const double tmp9 = Aw12 - Aw21;
-                                const double tmp10 = -Aw12 - Aw21;
-                                const double tmp11 = -Aw12 + Aw21;
-                                EM_S[INDEX2(0,0,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp4 + 2*tmp10;
-                                EM_S[INDEX2(1,0,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + 2*tmp7 + 2*tmp3 + tmp10;
-                                EM_S[INDEX2(2,0,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp9 + tmp4 + 2*tmp1;
-                                EM_S[INDEX2(3,0,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp7 + tmp9 + 2*tmp2;
-                                EM_S[INDEX2(4,0,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + tmp0 + 2*tmp11 + 2*tmp5;
-                                EM_S[INDEX2(5,0,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + tmp11 + 2*tmp6 + tmp3;
-                                EM_S[INDEX2(6,0,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp5 + tmp1 + 2*tmp8;
-                                EM_S[INDEX2(7,0,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp8 + tmp2 + tmp6;
-                                EM_S[INDEX2(0,1,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp10 + 2*tmp1 + 2*tmp5;
-                                EM_S[INDEX2(1,1,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp6 + 2*tmp10 + 2*tmp2;
-                                EM_S[INDEX2(2,1,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp5 + 2*tmp0 + tmp9;
-                                EM_S[INDEX2(3,1,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp9 + 2*tmp3 + tmp6;
-                                EM_S[INDEX2(4,1,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + tmp11 + tmp1 + 2*tmp4;
-                                EM_S[INDEX2(5,1,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp7 + tmp2 + 2*tmp11;
-                                EM_S[INDEX2(6,1,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp8 + tmp4 + tmp0;
-                                EM_S[INDEX2(7,1,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + 2*tmp8 + tmp3 + tmp7;
-                                EM_S[INDEX2(0,2,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp3 + tmp4 + 2*tmp11;
-                                EM_S[INDEX2(1,2,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + 2*tmp0 + tmp11 + tmp7;
-                                EM_S[INDEX2(2,2,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp8 + 2*tmp4 + 2*tmp2;
-                                EM_S[INDEX2(3,2,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + 2*tmp7 + tmp8 + 2*tmp1;
-                                EM_S[INDEX2(4,2,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp5 + tmp3 + 2*tmp10;
-                                EM_S[INDEX2(5,2,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp10 + tmp0 + tmp6;
-                                EM_S[INDEX2(6,2,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp9 + tmp2 + 2*tmp5;
-                                EM_S[INDEX2(7,2,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp6 + tmp1 + tmp9;
-                                EM_S[INDEX2(0,3,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp5 + tmp11 + 2*tmp2;
-                                EM_S[INDEX2(1,3,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + tmp6 + 2*tmp11 + 2*tmp1;
-                                EM_S[INDEX2(2,3,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp8 + 2*tmp3 + 2*tmp5;
-                                EM_S[INDEX2(3,3,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp6 + 2*tmp8;
-                                EM_S[INDEX2(4,3,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp2 + tmp4 + tmp10;
-                                EM_S[INDEX2(5,3,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp1 + 2*tmp10 + tmp7;
-                                EM_S[INDEX2(6,3,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp4 + tmp3 + tmp9;
-                                EM_S[INDEX2(7,3,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp7 + 2*tmp9 + tmp0;
-                                EM_S[INDEX2(0,4,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp7 + 2*tmp9 + tmp0;
-                                EM_S[INDEX2(1,4,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp4 + tmp3 + tmp9;
-                                EM_S[INDEX2(2,4,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp1 + 2*tmp10 + tmp7;
-                                EM_S[INDEX2(3,4,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp2 + tmp4 + tmp10;
-                                EM_S[INDEX2(4,4,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp6 + 2*tmp8;
-                                EM_S[INDEX2(5,4,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp8 + 2*tmp3 + 2*tmp5;
-                                EM_S[INDEX2(6,4,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + tmp6 + 2*tmp11 + 2*tmp1;
-                                EM_S[INDEX2(7,4,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp5 + tmp11 + 2*tmp2;
-                                EM_S[INDEX2(0,5,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp6 + tmp1 + tmp9;
-                                EM_S[INDEX2(1,5,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp9 + tmp2 + 2*tmp5;
-                                EM_S[INDEX2(2,5,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp10 + tmp0 + tmp6;
-                                EM_S[INDEX2(3,5,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp5 + tmp3 + 2*tmp10;
-                                EM_S[INDEX2(4,5,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + 2*tmp7 + tmp8 + 2*tmp1;
-                                EM_S[INDEX2(5,5,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp8 + 2*tmp4 + 2*tmp2;
-                                EM_S[INDEX2(6,5,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + 2*tmp0 + tmp11 + tmp7;
-                                EM_S[INDEX2(7,5,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp3 + tmp4 + 2*tmp11;
-                                EM_S[INDEX2(0,6,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + 2*tmp8 + tmp3 + tmp7;
-                                EM_S[INDEX2(1,6,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp8 + tmp4 + tmp0;
-                                EM_S[INDEX2(2,6,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp7 + tmp2 + 2*tmp11;
-                                EM_S[INDEX2(3,6,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + tmp11 + tmp1 + 2*tmp4;
-                                EM_S[INDEX2(4,6,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp9 + 2*tmp3 + tmp6;
-                                EM_S[INDEX2(5,6,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp5 + 2*tmp0 + tmp9;
-                                EM_S[INDEX2(6,6,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp6 + 2*tmp10 + 2*tmp2;
-                                EM_S[INDEX2(7,6,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp10 + 2*tmp1 + 2*tmp5;
-                                EM_S[INDEX2(0,7,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp8 + tmp2 + tmp6;
-                                EM_S[INDEX2(1,7,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp5 + tmp1 + 2*tmp8;
-                                EM_S[INDEX2(2,7,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + tmp11 + 2*tmp6 + tmp3;
-                                EM_S[INDEX2(3,7,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + tmp0 + 2*tmp11 + 2*tmp5;
-                                EM_S[INDEX2(4,7,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp7 + tmp9 + 2*tmp2;
-                                EM_S[INDEX2(5,7,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp9 + tmp4 + 2*tmp1;
-                                EM_S[INDEX2(6,7,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + 2*tmp7 + 2*tmp3 + tmp10;
-                                EM_S[INDEX2(7,7,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp4 + 2*tmp10;
+                                const Scalar Aw00 =  8.*A_p[INDEX2(0,0,3)]*w27;
+                                const Scalar Aw01 = 12.*A_p[INDEX2(0,1,3)]*w8;
+                                const Scalar Aw02 = 12.*A_p[INDEX2(0,2,3)]*w11;
+                                const Scalar Aw10 = 12.*A_p[INDEX2(1,0,3)]*w8;
+                                const Scalar Aw11 =  8.*A_p[INDEX2(1,1,3)]*w22;
+                                const Scalar Aw12 = 12.*A_p[INDEX2(1,2,3)]*w10;
+                                const Scalar Aw20 = 12.*A_p[INDEX2(2,0,3)]*w11;
+                                const Scalar Aw21 = 12.*A_p[INDEX2(2,1,3)]*w10;
+                                const Scalar Aw22 =  8.*A_p[INDEX2(2,2,3)]*w13;
+                                const Scalar tmp0 = Aw01 + Aw10;
+                                const Scalar tmp1 = Aw01 - Aw10;
+                                const Scalar tmp2 = -Aw01 - Aw10;
+                                const Scalar tmp3 = -Aw01 + Aw10;
+                                const Scalar tmp4 = Aw02 + Aw20;
+                                const Scalar tmp5 = Aw02 - Aw20;
+                                const Scalar tmp6 = -Aw02 - Aw20;
+                                const Scalar tmp7 = -Aw02 + Aw20;
+                                const Scalar tmp8 = Aw12 + Aw21;
+                                const Scalar tmp9 = Aw12 - Aw21;
+                                const Scalar tmp10 = -Aw12 - Aw21;
+                                const Scalar tmp11 = -Aw12 + Aw21;
+                                EM_S[INDEX2(0,0,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp4 + 2.*tmp10;
+                                EM_S[INDEX2(1,0,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + 2.*tmp7 + 2.*tmp3 + tmp10;
+                                EM_S[INDEX2(2,0,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp9 + tmp4 + 2.*tmp1;
+                                EM_S[INDEX2(3,0,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp7 + tmp9 + 2.*tmp2;
+                                EM_S[INDEX2(4,0,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + tmp0 + 2.*tmp11 + 2.*tmp5;
+                                EM_S[INDEX2(5,0,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + tmp11 + 2.*tmp6 + tmp3;
+                                EM_S[INDEX2(6,0,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp5 + tmp1 + 2.*tmp8;
+                                EM_S[INDEX2(7,0,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp8 + tmp2 + tmp6;
+                                EM_S[INDEX2(0,1,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp10 + 2.*tmp1 + 2.*tmp5;
+                                EM_S[INDEX2(1,1,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp6 + 2.*tmp10 + 2.*tmp2;
+                                EM_S[INDEX2(2,1,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp5 + 2.*tmp0 + tmp9;
+                                EM_S[INDEX2(3,1,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp9 + 2.*tmp3 + tmp6;
+                                EM_S[INDEX2(4,1,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + tmp11 + tmp1 + 2.*tmp4;
+                                EM_S[INDEX2(5,1,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp7 + tmp2 + 2.*tmp11;
+                                EM_S[INDEX2(6,1,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp8 + tmp4 + tmp0;
+                                EM_S[INDEX2(7,1,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + 2.*tmp8 + tmp3 + tmp7;
+                                EM_S[INDEX2(0,2,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp3 + tmp4 + 2.*tmp11;
+                                EM_S[INDEX2(1,2,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + 2.*tmp0 + tmp11 + tmp7;
+                                EM_S[INDEX2(2,2,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp8 + 2.*tmp4 + 2.*tmp2;
+                                EM_S[INDEX2(3,2,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + 2.*tmp7 + tmp8 + 2.*tmp1;
+                                EM_S[INDEX2(4,2,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp5 + tmp3 + 2.*tmp10;
+                                EM_S[INDEX2(5,2,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp10 + tmp0 + tmp6;
+                                EM_S[INDEX2(6,2,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp9 + tmp2 + 2.*tmp5;
+                                EM_S[INDEX2(7,2,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp6 + tmp1 + tmp9;
+                                EM_S[INDEX2(0,3,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp5 + tmp11 + 2.*tmp2;
+                                EM_S[INDEX2(1,3,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + tmp6 + 2.*tmp11 + 2.*tmp1;
+                                EM_S[INDEX2(2,3,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp8 + 2.*tmp3 + 2.*tmp5;
+                                EM_S[INDEX2(3,3,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp6 + 2.*tmp8;
+                                EM_S[INDEX2(4,3,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp2 + tmp4 + tmp10;
+                                EM_S[INDEX2(5,3,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp1 + 2.*tmp10 + tmp7;
+                                EM_S[INDEX2(6,3,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp4 + tmp3 + tmp9;
+                                EM_S[INDEX2(7,3,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp7 + 2.*tmp9 + tmp0;
+                                EM_S[INDEX2(0,4,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp7 + 2.*tmp9 + tmp0;
+                                EM_S[INDEX2(1,4,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp4 + tmp3 + tmp9;
+                                EM_S[INDEX2(2,4,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp1 + 2.*tmp10 + tmp7;
+                                EM_S[INDEX2(3,4,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp2 + tmp4 + tmp10;
+                                EM_S[INDEX2(4,4,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp6 + 2.*tmp8;
+                                EM_S[INDEX2(5,4,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp8 + 2.*tmp3 + 2.*tmp5;
+                                EM_S[INDEX2(6,4,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + tmp6 + 2.*tmp11 + 2.*tmp1;
+                                EM_S[INDEX2(7,4,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp5 + tmp11 + 2.*tmp2;
+                                EM_S[INDEX2(0,5,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp6 + tmp1 + tmp9;
+                                EM_S[INDEX2(1,5,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp9 + tmp2 + 2.*tmp5;
+                                EM_S[INDEX2(2,5,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp10 + tmp0 + tmp6;
+                                EM_S[INDEX2(3,5,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp5 + tmp3 + 2.*tmp10;
+                                EM_S[INDEX2(4,5,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + 2.*tmp7 + tmp8 + 2.*tmp1;
+                                EM_S[INDEX2(5,5,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp8 + 2.*tmp4 + 2.*tmp2;
+                                EM_S[INDEX2(6,5,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + 2.*tmp0 + tmp11 + tmp7;
+                                EM_S[INDEX2(7,5,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp3 + tmp4 + 2.*tmp11;
+                                EM_S[INDEX2(0,6,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + 2.*tmp8 + tmp3 + tmp7;
+                                EM_S[INDEX2(1,6,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp8 + tmp4 + tmp0;
+                                EM_S[INDEX2(2,6,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp7 + tmp2 + 2.*tmp11;
+                                EM_S[INDEX2(3,6,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + tmp11 + tmp1 + 2.*tmp4;
+                                EM_S[INDEX2(4,6,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp9 + 2.*tmp3 + tmp6;
+                                EM_S[INDEX2(5,6,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp5 + 2.*tmp0 + tmp9;
+                                EM_S[INDEX2(6,6,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp6 + 2.*tmp10 + 2.*tmp2;
+                                EM_S[INDEX2(7,6,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp10 + 2.*tmp1 + 2.*tmp5;
+                                EM_S[INDEX2(0,7,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp8 + tmp2 + tmp6;
+                                EM_S[INDEX2(1,7,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp5 + tmp1 + 2.*tmp8;
+                                EM_S[INDEX2(2,7,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + tmp11 + 2.*tmp6 + tmp3;
+                                EM_S[INDEX2(3,7,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + tmp0 + 2.*tmp11 + 2.*tmp5;
+                                EM_S[INDEX2(4,7,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp7 + tmp9 + 2.*tmp2;
+                                EM_S[INDEX2(5,7,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp9 + tmp4 + 2.*tmp1;
+                                EM_S[INDEX2(6,7,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + 2.*tmp7 + 2.*tmp3 + tmp10;
+                                EM_S[INDEX2(7,7,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp4 + 2.*tmp10;
                             }
                         }
                         ///////////////
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double* B_p=B.getSampleDataRO(e);
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
                             if (B.actsExpanded()) {
-                                const double B_0_0 = B_p[INDEX2(0,0,3)];
-                                const double B_1_0 = B_p[INDEX2(1,0,3)];
-                                const double B_2_0 = B_p[INDEX2(2,0,3)];
-                                const double B_0_1 = B_p[INDEX2(0,1,3)];
-                                const double B_1_1 = B_p[INDEX2(1,1,3)];
-                                const double B_2_1 = B_p[INDEX2(2,1,3)];
-                                const double B_0_2 = B_p[INDEX2(0,2,3)];
-                                const double B_1_2 = B_p[INDEX2(1,2,3)];
-                                const double B_2_2 = B_p[INDEX2(2,2,3)];
-                                const double B_0_3 = B_p[INDEX2(0,3,3)];
-                                const double B_1_3 = B_p[INDEX2(1,3,3)];
-                                const double B_2_3 = B_p[INDEX2(2,3,3)];
-                                const double B_0_4 = B_p[INDEX2(0,4,3)];
-                                const double B_1_4 = B_p[INDEX2(1,4,3)];
-                                const double B_2_4 = B_p[INDEX2(2,4,3)];
-                                const double B_0_5 = B_p[INDEX2(0,5,3)];
-                                const double B_1_5 = B_p[INDEX2(1,5,3)];
-                                const double B_2_5 = B_p[INDEX2(2,5,3)];
-                                const double B_0_6 = B_p[INDEX2(0,6,3)];
-                                const double B_1_6 = B_p[INDEX2(1,6,3)];
-                                const double B_2_6 = B_p[INDEX2(2,6,3)];
-                                const double B_0_7 = B_p[INDEX2(0,7,3)];
-                                const double B_1_7 = B_p[INDEX2(1,7,3)];
-                                const double B_2_7 = B_p[INDEX2(2,7,3)];
-                                const double tmp0 = w38*(B_0_3 + B_0_7);
-                                const double tmp1 = w31*(B_1_0 + B_1_4);
-                                const double tmp2 = w42*(B_2_5 + B_2_6);
-                                const double tmp3 = w35*(B_2_1 + B_2_2);
-                                const double tmp4 = w37*(B_1_2 + B_1_6);
-                                const double tmp5 = w39*(B_1_3 + B_1_7);
-                                const double tmp6 = w36*(B_0_2 + B_0_6);
-                                const double tmp7 = w33*(B_0_1 + B_0_5);
-                                const double tmp8 = w30*(B_0_0 + B_0_4);
-                                const double tmp9 = w34*(B_1_1 + B_1_5);
-                                const double tmp10 = w38*(-B_0_5 - B_0_7);
-                                const double tmp11 = w31*(-B_1_0 - B_1_1);
-                                const double tmp12 = w42*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
-                                const double tmp13 = w35*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
-                                const double tmp14 = w37*(-B_1_2 - B_1_3);
-                                const double tmp15 = w39*(-B_1_6 - B_1_7);
-                                const double tmp16 = w36*(-B_0_4 - B_0_6);
-                                const double tmp17 = w33*(-B_0_1 - B_0_3);
-                                const double tmp18 = w30*(-B_0_0 - B_0_2);
-                                const double tmp19 = w34*(-B_1_4 - B_1_5);
-                                const double tmp20 = w38*(B_0_1 + B_0_3);
-                                const double tmp21 = w42*(-B_2_0 - B_2_2);
-                                const double tmp22 = w35*(-B_2_5 - B_2_7);
-                                const double tmp23 = w37*(-B_1_0 - B_1_5);
-                                const double tmp24 = w32*(-B_2_4 - B_2_6);
-                                const double tmp25 = w36*(B_0_0 + B_0_2);
-                                const double tmp26 = w33*(B_0_5 + B_0_7);
-                                const double tmp27 = w30*(B_0_4 + B_0_6);
-                                const double tmp28 = w43*(-B_2_1 - B_2_3);
-                                const double tmp29 = w34*(-B_1_2 - B_1_7);
-                                const double tmp30 = w38*(-B_0_4 - B_0_6);
-                                const double tmp31 = w42*(B_2_5 + B_2_7);
-                                const double tmp32 = w35*(B_2_0 + B_2_2);
-                                const double tmp33 = w37*(B_1_2 + B_1_7);
-                                const double tmp34 = w32*(B_2_1 + B_2_3);
-                                const double tmp35 = w36*(-B_0_5 - B_0_7);
-                                const double tmp36 = w33*(-B_0_0 - B_0_2);
-                                const double tmp37 = w30*(-B_0_1 - B_0_3);
-                                const double tmp38 = w43*(B_2_4 + B_2_6);
-                                const double tmp39 = w34*(B_1_0 + B_1_5);
-                                const double tmp40 = w38*(B_0_0 + B_0_2);
-                                const double tmp41 = w31*(B_1_6 + B_1_7);
-                                const double tmp42 = w42*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
-                                const double tmp43 = w35*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
-                                const double tmp44 = w37*(B_1_4 + B_1_5);
-                                const double tmp45 = w39*(B_1_0 + B_1_1);
-                                const double tmp46 = w36*(B_0_1 + B_0_3);
-                                const double tmp47 = w33*(B_0_4 + B_0_6);
-                                const double tmp48 = w30*(B_0_5 + B_0_7);
-                                const double tmp49 = w34*(B_1_2 + B_1_3);
-                                const double tmp50 = w31*(-B_1_2 - B_1_3);
-                                const double tmp51 = w42*(B_2_6 + B_2_7);
-                                const double tmp52 = w35*(B_2_0 + B_2_1);
-                                const double tmp53 = w37*(-B_1_0 - B_1_1);
-                                const double tmp54 = w32*(B_2_2 + B_2_3);
-                                const double tmp55 = w39*(-B_1_4 - B_1_5);
-                                const double tmp56 = w36*(B_0_0 + B_0_6);
-                                const double tmp57 = w33*(B_0_1 + B_0_7);
-                                const double tmp58 = w43*(B_2_4 + B_2_5);
-                                const double tmp59 = w34*(-B_1_6 - B_1_7);
-                                const double tmp60 = w42*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
-                                const double tmp61 = w35*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
-                                const double tmp62 = w37*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
-                                const double tmp63 = w36*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
-                                const double tmp64 = w33*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
-                                const double tmp65 = w34*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
-                                const double tmp66 = w38*(B_0_4 + B_0_6);
-                                const double tmp67 = w36*(B_0_5 + B_0_7);
-                                const double tmp68 = w33*(B_0_0 + B_0_2);
-                                const double tmp69 = w30*(B_0_1 + B_0_3);
-                                const double tmp70 = w38*(-B_0_2 - B_0_6);
-                                const double tmp71 = w31*(B_1_1 + B_1_5);
-                                const double tmp72 = w42*(-B_2_0 - B_2_3);
-                                const double tmp73 = w35*(-B_2_4 - B_2_7);
-                                const double tmp74 = w37*(B_1_3 + B_1_7);
-                                const double tmp75 = w39*(B_1_2 + B_1_6);
-                                const double tmp76 = w36*(-B_0_3 - B_0_7);
-                                const double tmp77 = w33*(-B_0_0 - B_0_4);
-                                const double tmp78 = w30*(-B_0_1 - B_0_5);
-                                const double tmp79 = w34*(B_1_0 + B_1_4);
-                                const double tmp80 = w36*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
-                                const double tmp81 = w33*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
-                                const double tmp82 = w38*(B_0_1 + B_0_5);
-                                const double tmp83 = w31*(-B_1_2 - B_1_6);
-                                const double tmp84 = w42*(B_2_4 + B_2_7);
-                                const double tmp85 = w35*(B_2_0 + B_2_3);
-                                const double tmp86 = w37*(-B_1_0 - B_1_4);
-                                const double tmp87 = w39*(-B_1_1 - B_1_5);
-                                const double tmp88 = w36*(B_0_0 + B_0_4);
-                                const double tmp89 = w33*(B_0_3 + B_0_7);
-                                const double tmp90 = w30*(B_0_2 + B_0_6);
-                                const double tmp91 = w34*(-B_1_3 - B_1_7);
-                                const double tmp92 = w42*(-B_2_1 - B_2_2);
-                                const double tmp93 = w35*(-B_2_5 - B_2_6);
-                                const double tmp94 = w37*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
-                                const double tmp95 = w34*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
-                                const double tmp96 = w38*(-B_0_1 - B_0_3);
-                                const double tmp97 = w31*(-B_1_4 - B_1_5);
-                                const double tmp98 = w37*(-B_1_6 - B_1_7);
-                                const double tmp99 = w39*(-B_1_2 - B_1_3);
-                                const double tmp100 = w36*(-B_0_0 - B_0_2);
-                                const double tmp101 = w33*(-B_0_5 - B_0_7);
-                                const double tmp102 = w30*(-B_0_4 - B_0_6);
-                                const double tmp103 = w34*(-B_1_0 - B_1_1);
-                                const double tmp104 = w38*(B_0_2 + B_0_6);
-                                const double tmp105 = w42*(B_2_0 + B_2_1);
-                                const double tmp106 = w35*(B_2_6 + B_2_7);
-                                const double tmp107 = w37*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
-                                const double tmp108 = w32*(B_2_4 + B_2_5);
-                                const double tmp109 = w36*(B_0_3 + B_0_7);
-                                const double tmp110 = w33*(B_0_0 + B_0_4);
-                                const double tmp111 = w30*(B_0_1 + B_0_5);
-                                const double tmp112 = w43*(B_2_2 + B_2_3);
-                                const double tmp113 = w34*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
-                                const double tmp114 = w38*(-B_0_0 - B_0_4);
-                                const double tmp115 = w31*(-B_1_3 - B_1_7);
-                                const double tmp116 = w37*(-B_1_1 - B_1_5);
-                                const double tmp117 = w39*(-B_1_0 - B_1_4);
-                                const double tmp118 = w36*(-B_0_1 - B_0_5);
-                                const double tmp119 = w33*(-B_0_2 - B_0_6);
-                                const double tmp120 = w30*(-B_0_3 - B_0_7);
-                                const double tmp121 = w34*(-B_1_2 - B_1_6);
-                                const double tmp122 = w31*(B_1_0 + B_1_1);
-                                const double tmp123 = w42*(B_2_4 + B_2_5);
-                                const double tmp124 = w35*(B_2_2 + B_2_3);
-                                const double tmp125 = w37*(B_1_2 + B_1_3);
-                                const double tmp126 = w32*(B_2_0 + B_2_1);
-                                const double tmp127 = w39*(B_1_6 + B_1_7);
-                                const double tmp128 = w36*(-B_0_3 - B_0_5);
-                                const double tmp129 = w33*(-B_0_2 - B_0_4);
-                                const double tmp130 = w43*(B_2_6 + B_2_7);
-                                const double tmp131 = w34*(B_1_4 + B_1_5);
-                                const double tmp132 = w42*(-B_2_5 - B_2_6);
-                                const double tmp133 = w35*(-B_2_1 - B_2_2);
-                                const double tmp134 = w37*(B_1_0 + B_1_5);
-                                const double tmp135 = w36*(B_0_1 + B_0_7);
-                                const double tmp136 = w33*(B_0_0 + B_0_6);
-                                const double tmp137 = w34*(B_1_2 + B_1_7);
-                                const double tmp138 = w38*(-B_0_0 - B_0_2);
-                                const double tmp139 = w42*(-B_2_1 - B_2_3);
-                                const double tmp140 = w35*(-B_2_4 - B_2_6);
-                                const double tmp141 = w37*(B_1_3 + B_1_6);
-                                const double tmp142 = w32*(-B_2_5 - B_2_7);
-                                const double tmp143 = w36*(-B_0_1 - B_0_3);
-                                const double tmp144 = w33*(-B_0_4 - B_0_6);
-                                const double tmp145 = w30*(-B_0_5 - B_0_7);
-                                const double tmp146 = w43*(-B_2_0 - B_2_2);
-                                const double tmp147 = w34*(B_1_1 + B_1_4);
-                                const double tmp148 = w36*(B_0_2 + B_0_4);
-                                const double tmp149 = w33*(B_0_3 + B_0_5);
-                                const double tmp150 = w42*(B_2_1 + B_2_2);
-                                const double tmp151 = w35*(B_2_5 + B_2_6);
-                                const double tmp152 = w37*(-B_1_2 - B_1_7);
-                                const double tmp153 = w36*(-B_0_0 - B_0_6);
-                                const double tmp154 = w33*(-B_0_1 - B_0_7);
-                                const double tmp155 = w34*(-B_1_0 - B_1_5);
-                                const double tmp156 = w38*(-B_0_3 - B_0_7);
-                                const double tmp157 = w36*(-B_0_2 - B_0_6);
-                                const double tmp158 = w33*(-B_0_1 - B_0_5);
-                                const double tmp159 = w30*(-B_0_0 - B_0_4);
-                                const double tmp160 = w42*(-B_2_4 - B_2_5);
-                                const double tmp161 = w35*(-B_2_2 - B_2_3);
-                                const double tmp162 = w32*(-B_2_0 - B_2_1);
-                                const double tmp163 = w43*(-B_2_6 - B_2_7);
-                                const double tmp164 = w42*(-B_2_4 - B_2_7);
-                                const double tmp165 = w35*(-B_2_0 - B_2_3);
-                                const double tmp166 = w37*(B_1_1 + B_1_4);
-                                const double tmp167 = w34*(B_1_3 + B_1_6);
-                                const double tmp168 = w36*(B_0_3 + B_0_5);
-                                const double tmp169 = w33*(B_0_2 + B_0_4);
-                                const double tmp170 = w38*(B_0_5 + B_0_7);
-                                const double tmp171 = w42*(B_2_4 + B_2_6);
-                                const double tmp172 = w35*(B_2_1 + B_2_3);
-                                const double tmp173 = w37*(-B_1_1 - B_1_4);
-                                const double tmp174 = w32*(B_2_0 + B_2_2);
-                                const double tmp175 = w36*(B_0_4 + B_0_6);
-                                const double tmp176 = w33*(B_0_1 + B_0_3);
-                                const double tmp177 = w30*(B_0_0 + B_0_2);
-                                const double tmp178 = w43*(B_2_5 + B_2_7);
-                                const double tmp179 = w34*(-B_1_3 - B_1_6);
-                                const double tmp180 = w31*(-B_1_0 - B_1_4);
-                                const double tmp181 = w42*(B_2_0 + B_2_2);
-                                const double tmp182 = w35*(B_2_5 + B_2_7);
-                                const double tmp183 = w37*(-B_1_2 - B_1_6);
-                                const double tmp184 = w32*(B_2_4 + B_2_6);
-                                const double tmp185 = w39*(-B_1_3 - B_1_7);
-                                const double tmp186 = w36*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
-                                const double tmp187 = w33*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
-                                const double tmp188 = w43*(B_2_1 + B_2_3);
-                                const double tmp189 = w34*(-B_1_1 - B_1_5);
-                                const double tmp190 = w38*(-B_0_1 - B_0_5);
-                                const double tmp191 = w42*(B_2_2 + B_2_3);
-                                const double tmp192 = w35*(B_2_4 + B_2_5);
-                                const double tmp193 = w37*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
-                                const double tmp194 = w32*(B_2_6 + B_2_7);
-                                const double tmp195 = w36*(-B_0_0 - B_0_4);
-                                const double tmp196 = w33*(-B_0_3 - B_0_7);
-                                const double tmp197 = w30*(-B_0_2 - B_0_6);
-                                const double tmp198 = w43*(B_2_0 + B_2_1);
-                                const double tmp199 = w34*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
-                                const double tmp200 = w31*(B_1_4 + B_1_5);
-                                const double tmp201 = w42*(-B_2_0 - B_2_1);
-                                const double tmp202 = w35*(-B_2_6 - B_2_7);
-                                const double tmp203 = w37*(B_1_6 + B_1_7);
-                                const double tmp204 = w32*(-B_2_4 - B_2_5);
-                                const double tmp205 = w39*(B_1_2 + B_1_3);
-                                const double tmp206 = w43*(-B_2_2 - B_2_3);
-                                const double tmp207 = w34*(B_1_0 + B_1_1);
-                                const double tmp208 = w37*(-B_1_3 - B_1_6);
-                                const double tmp209 = w36*(-B_0_2 - B_0_4);
-                                const double tmp210 = w33*(-B_0_3 - B_0_5);
-                                const double tmp211 = w34*(-B_1_1 - B_1_4);
-                                const double tmp212 = w42*(B_2_0 + B_2_3);
-                                const double tmp213 = w35*(B_2_4 + B_2_7);
-                                const double tmp214 = w38*(B_0_0 + B_0_4);
-                                const double tmp215 = w36*(B_0_1 + B_0_5);
-                                const double tmp216 = w33*(B_0_2 + B_0_6);
-                                const double tmp217 = w30*(B_0_3 + B_0_7);
-                                const double tmp218 = w31*(B_1_2 + B_1_6);
-                                const double tmp219 = w37*(B_1_0 + B_1_4);
-                                const double tmp220 = w39*(B_1_1 + B_1_5);
-                                const double tmp221 = w34*(B_1_3 + B_1_7);
-                                const double tmp222 = w36*(-B_0_1 - B_0_7);
-                                const double tmp223 = w33*(-B_0_0 - B_0_6);
-                                const double tmp224 = w42*(-B_2_6 - B_2_7);
-                                const double tmp225 = w35*(-B_2_0 - B_2_1);
-                                const double tmp226 = w32*(-B_2_2 - B_2_3);
-                                const double tmp227 = w43*(-B_2_4 - B_2_5);
-                                const double tmp228 = w31*(B_1_3 + B_1_7);
-                                const double tmp229 = w42*(B_2_1 + B_2_3);
-                                const double tmp230 = w35*(B_2_4 + B_2_6);
-                                const double tmp231 = w37*(B_1_1 + B_1_5);
-                                const double tmp232 = w32*(B_2_5 + B_2_7);
-                                const double tmp233 = w39*(B_1_0 + B_1_4);
-                                const double tmp234 = w36*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
-                                const double tmp235 = w33*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
-                                const double tmp236 = w43*(B_2_0 + B_2_2);
-                                const double tmp237 = w34*(B_1_2 + B_1_6);
-                                const double tmp238 = w31*(-B_1_1 - B_1_5);
-                                const double tmp239 = w37*(-B_1_3 - B_1_7);
-                                const double tmp240 = w39*(-B_1_2 - B_1_6);
-                                const double tmp241 = w34*(-B_1_0 - B_1_4);
-                                const double tmp242 = w31*(-B_1_6 - B_1_7);
-                                const double tmp243 = w42*(-B_2_2 - B_2_3);
-                                const double tmp244 = w35*(-B_2_4 - B_2_5);
-                                const double tmp245 = w37*(-B_1_4 - B_1_5);
-                                const double tmp246 = w32*(-B_2_6 - B_2_7);
-                                const double tmp247 = w39*(-B_1_0 - B_1_1);
-                                const double tmp248 = w43*(-B_2_0 - B_2_1);
-                                const double tmp249 = w34*(-B_1_2 - B_1_3);
-                                const double tmp250 = w31*(B_1_2 + B_1_3);
-                                const double tmp251 = w37*(B_1_0 + B_1_1);
-                                const double tmp252 = w39*(B_1_4 + B_1_5);
-                                const double tmp253 = w34*(B_1_6 + B_1_7);
-                                const double tmp254 = w42*(-B_2_4 - B_2_6);
-                                const double tmp255 = w35*(-B_2_1 - B_2_3);
-                                const double tmp256 = w32*(-B_2_0 - B_2_2);
-                                const double tmp257 = w43*(-B_2_5 - B_2_7);
-                                const double tmp258 = w42*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
-                                const double tmp259 = w35*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
-                                const double tmp260 = w42*(-B_2_5 - B_2_7);
-                                const double tmp261 = w35*(-B_2_0 - B_2_2);
-                                const double tmp262 = w32*(-B_2_1 - B_2_3);
-                                const double tmp263 = w43*(-B_2_4 - B_2_6);
+                                const Scalar B_0_0 = B_p[INDEX2(0,0,3)];
+                                const Scalar B_1_0 = B_p[INDEX2(1,0,3)];
+                                const Scalar B_2_0 = B_p[INDEX2(2,0,3)];
+                                const Scalar B_0_1 = B_p[INDEX2(0,1,3)];
+                                const Scalar B_1_1 = B_p[INDEX2(1,1,3)];
+                                const Scalar B_2_1 = B_p[INDEX2(2,1,3)];
+                                const Scalar B_0_2 = B_p[INDEX2(0,2,3)];
+                                const Scalar B_1_2 = B_p[INDEX2(1,2,3)];
+                                const Scalar B_2_2 = B_p[INDEX2(2,2,3)];
+                                const Scalar B_0_3 = B_p[INDEX2(0,3,3)];
+                                const Scalar B_1_3 = B_p[INDEX2(1,3,3)];
+                                const Scalar B_2_3 = B_p[INDEX2(2,3,3)];
+                                const Scalar B_0_4 = B_p[INDEX2(0,4,3)];
+                                const Scalar B_1_4 = B_p[INDEX2(1,4,3)];
+                                const Scalar B_2_4 = B_p[INDEX2(2,4,3)];
+                                const Scalar B_0_5 = B_p[INDEX2(0,5,3)];
+                                const Scalar B_1_5 = B_p[INDEX2(1,5,3)];
+                                const Scalar B_2_5 = B_p[INDEX2(2,5,3)];
+                                const Scalar B_0_6 = B_p[INDEX2(0,6,3)];
+                                const Scalar B_1_6 = B_p[INDEX2(1,6,3)];
+                                const Scalar B_2_6 = B_p[INDEX2(2,6,3)];
+                                const Scalar B_0_7 = B_p[INDEX2(0,7,3)];
+                                const Scalar B_1_7 = B_p[INDEX2(1,7,3)];
+                                const Scalar B_2_7 = B_p[INDEX2(2,7,3)];
+                                const Scalar tmp0 = w38*(B_0_3 + B_0_7);
+                                const Scalar tmp1 = w31*(B_1_0 + B_1_4);
+                                const Scalar tmp2 = w42*(B_2_5 + B_2_6);
+                                const Scalar tmp3 = w35*(B_2_1 + B_2_2);
+                                const Scalar tmp4 = w37*(B_1_2 + B_1_6);
+                                const Scalar tmp5 = w39*(B_1_3 + B_1_7);
+                                const Scalar tmp6 = w36*(B_0_2 + B_0_6);
+                                const Scalar tmp7 = w33*(B_0_1 + B_0_5);
+                                const Scalar tmp8 = w30*(B_0_0 + B_0_4);
+                                const Scalar tmp9 = w34*(B_1_1 + B_1_5);
+                                const Scalar tmp10 = w38*(-B_0_5 - B_0_7);
+                                const Scalar tmp11 = w31*(-B_1_0 - B_1_1);
+                                const Scalar tmp12 = w42*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
+                                const Scalar tmp13 = w35*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
+                                const Scalar tmp14 = w37*(-B_1_2 - B_1_3);
+                                const Scalar tmp15 = w39*(-B_1_6 - B_1_7);
+                                const Scalar tmp16 = w36*(-B_0_4 - B_0_6);
+                                const Scalar tmp17 = w33*(-B_0_1 - B_0_3);
+                                const Scalar tmp18 = w30*(-B_0_0 - B_0_2);
+                                const Scalar tmp19 = w34*(-B_1_4 - B_1_5);
+                                const Scalar tmp20 = w38*(B_0_1 + B_0_3);
+                                const Scalar tmp21 = w42*(-B_2_0 - B_2_2);
+                                const Scalar tmp22 = w35*(-B_2_5 - B_2_7);
+                                const Scalar tmp23 = w37*(-B_1_0 - B_1_5);
+                                const Scalar tmp24 = w32*(-B_2_4 - B_2_6);
+                                const Scalar tmp25 = w36*(B_0_0 + B_0_2);
+                                const Scalar tmp26 = w33*(B_0_5 + B_0_7);
+                                const Scalar tmp27 = w30*(B_0_4 + B_0_6);
+                                const Scalar tmp28 = w43*(-B_2_1 - B_2_3);
+                                const Scalar tmp29 = w34*(-B_1_2 - B_1_7);
+                                const Scalar tmp30 = w38*(-B_0_4 - B_0_6);
+                                const Scalar tmp31 = w42*(B_2_5 + B_2_7);
+                                const Scalar tmp32 = w35*(B_2_0 + B_2_2);
+                                const Scalar tmp33 = w37*(B_1_2 + B_1_7);
+                                const Scalar tmp34 = w32*(B_2_1 + B_2_3);
+                                const Scalar tmp35 = w36*(-B_0_5 - B_0_7);
+                                const Scalar tmp36 = w33*(-B_0_0 - B_0_2);
+                                const Scalar tmp37 = w30*(-B_0_1 - B_0_3);
+                                const Scalar tmp38 = w43*(B_2_4 + B_2_6);
+                                const Scalar tmp39 = w34*(B_1_0 + B_1_5);
+                                const Scalar tmp40 = w38*(B_0_0 + B_0_2);
+                                const Scalar tmp41 = w31*(B_1_6 + B_1_7);
+                                const Scalar tmp42 = w42*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
+                                const Scalar tmp43 = w35*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
+                                const Scalar tmp44 = w37*(B_1_4 + B_1_5);
+                                const Scalar tmp45 = w39*(B_1_0 + B_1_1);
+                                const Scalar tmp46 = w36*(B_0_1 + B_0_3);
+                                const Scalar tmp47 = w33*(B_0_4 + B_0_6);
+                                const Scalar tmp48 = w30*(B_0_5 + B_0_7);
+                                const Scalar tmp49 = w34*(B_1_2 + B_1_3);
+                                const Scalar tmp50 = w31*(-B_1_2 - B_1_3);
+                                const Scalar tmp51 = w42*(B_2_6 + B_2_7);
+                                const Scalar tmp52 = w35*(B_2_0 + B_2_1);
+                                const Scalar tmp53 = w37*(-B_1_0 - B_1_1);
+                                const Scalar tmp54 = w32*(B_2_2 + B_2_3);
+                                const Scalar tmp55 = w39*(-B_1_4 - B_1_5);
+                                const Scalar tmp56 = w36*(B_0_0 + B_0_6);
+                                const Scalar tmp57 = w33*(B_0_1 + B_0_7);
+                                const Scalar tmp58 = w43*(B_2_4 + B_2_5);
+                                const Scalar tmp59 = w34*(-B_1_6 - B_1_7);
+                                const Scalar tmp60 = w42*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
+                                const Scalar tmp61 = w35*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
+                                const Scalar tmp62 = w37*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
+                                const Scalar tmp63 = w36*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
+                                const Scalar tmp64 = w33*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
+                                const Scalar tmp65 = w34*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
+                                const Scalar tmp66 = w38*(B_0_4 + B_0_6);
+                                const Scalar tmp67 = w36*(B_0_5 + B_0_7);
+                                const Scalar tmp68 = w33*(B_0_0 + B_0_2);
+                                const Scalar tmp69 = w30*(B_0_1 + B_0_3);
+                                const Scalar tmp70 = w38*(-B_0_2 - B_0_6);
+                                const Scalar tmp71 = w31*(B_1_1 + B_1_5);
+                                const Scalar tmp72 = w42*(-B_2_0 - B_2_3);
+                                const Scalar tmp73 = w35*(-B_2_4 - B_2_7);
+                                const Scalar tmp74 = w37*(B_1_3 + B_1_7);
+                                const Scalar tmp75 = w39*(B_1_2 + B_1_6);
+                                const Scalar tmp76 = w36*(-B_0_3 - B_0_7);
+                                const Scalar tmp77 = w33*(-B_0_0 - B_0_4);
+                                const Scalar tmp78 = w30*(-B_0_1 - B_0_5);
+                                const Scalar tmp79 = w34*(B_1_0 + B_1_4);
+                                const Scalar tmp80 = w36*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
+                                const Scalar tmp81 = w33*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
+                                const Scalar tmp82 = w38*(B_0_1 + B_0_5);
+                                const Scalar tmp83 = w31*(-B_1_2 - B_1_6);
+                                const Scalar tmp84 = w42*(B_2_4 + B_2_7);
+                                const Scalar tmp85 = w35*(B_2_0 + B_2_3);
+                                const Scalar tmp86 = w37*(-B_1_0 - B_1_4);
+                                const Scalar tmp87 = w39*(-B_1_1 - B_1_5);
+                                const Scalar tmp88 = w36*(B_0_0 + B_0_4);
+                                const Scalar tmp89 = w33*(B_0_3 + B_0_7);
+                                const Scalar tmp90 = w30*(B_0_2 + B_0_6);
+                                const Scalar tmp91 = w34*(-B_1_3 - B_1_7);
+                                const Scalar tmp92 = w42*(-B_2_1 - B_2_2);
+                                const Scalar tmp93 = w35*(-B_2_5 - B_2_6);
+                                const Scalar tmp94 = w37*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
+                                const Scalar tmp95 = w34*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
+                                const Scalar tmp96 = w38*(-B_0_1 - B_0_3);
+                                const Scalar tmp97 = w31*(-B_1_4 - B_1_5);
+                                const Scalar tmp98 = w37*(-B_1_6 - B_1_7);
+                                const Scalar tmp99 = w39*(-B_1_2 - B_1_3);
+                                const Scalar tmp100 = w36*(-B_0_0 - B_0_2);
+                                const Scalar tmp101 = w33*(-B_0_5 - B_0_7);
+                                const Scalar tmp102 = w30*(-B_0_4 - B_0_6);
+                                const Scalar tmp103 = w34*(-B_1_0 - B_1_1);
+                                const Scalar tmp104 = w38*(B_0_2 + B_0_6);
+                                const Scalar tmp105 = w42*(B_2_0 + B_2_1);
+                                const Scalar tmp106 = w35*(B_2_6 + B_2_7);
+                                const Scalar tmp107 = w37*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
+                                const Scalar tmp108 = w32*(B_2_4 + B_2_5);
+                                const Scalar tmp109 = w36*(B_0_3 + B_0_7);
+                                const Scalar tmp110 = w33*(B_0_0 + B_0_4);
+                                const Scalar tmp111 = w30*(B_0_1 + B_0_5);
+                                const Scalar tmp112 = w43*(B_2_2 + B_2_3);
+                                const Scalar tmp113 = w34*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
+                                const Scalar tmp114 = w38*(-B_0_0 - B_0_4);
+                                const Scalar tmp115 = w31*(-B_1_3 - B_1_7);
+                                const Scalar tmp116 = w37*(-B_1_1 - B_1_5);
+                                const Scalar tmp117 = w39*(-B_1_0 - B_1_4);
+                                const Scalar tmp118 = w36*(-B_0_1 - B_0_5);
+                                const Scalar tmp119 = w33*(-B_0_2 - B_0_6);
+                                const Scalar tmp120 = w30*(-B_0_3 - B_0_7);
+                                const Scalar tmp121 = w34*(-B_1_2 - B_1_6);
+                                const Scalar tmp122 = w31*(B_1_0 + B_1_1);
+                                const Scalar tmp123 = w42*(B_2_4 + B_2_5);
+                                const Scalar tmp124 = w35*(B_2_2 + B_2_3);
+                                const Scalar tmp125 = w37*(B_1_2 + B_1_3);
+                                const Scalar tmp126 = w32*(B_2_0 + B_2_1);
+                                const Scalar tmp127 = w39*(B_1_6 + B_1_7);
+                                const Scalar tmp128 = w36*(-B_0_3 - B_0_5);
+                                const Scalar tmp129 = w33*(-B_0_2 - B_0_4);
+                                const Scalar tmp130 = w43*(B_2_6 + B_2_7);
+                                const Scalar tmp131 = w34*(B_1_4 + B_1_5);
+                                const Scalar tmp132 = w42*(-B_2_5 - B_2_6);
+                                const Scalar tmp133 = w35*(-B_2_1 - B_2_2);
+                                const Scalar tmp134 = w37*(B_1_0 + B_1_5);
+                                const Scalar tmp135 = w36*(B_0_1 + B_0_7);
+                                const Scalar tmp136 = w33*(B_0_0 + B_0_6);
+                                const Scalar tmp137 = w34*(B_1_2 + B_1_7);
+                                const Scalar tmp138 = w38*(-B_0_0 - B_0_2);
+                                const Scalar tmp139 = w42*(-B_2_1 - B_2_3);
+                                const Scalar tmp140 = w35*(-B_2_4 - B_2_6);
+                                const Scalar tmp141 = w37*(B_1_3 + B_1_6);
+                                const Scalar tmp142 = w32*(-B_2_5 - B_2_7);
+                                const Scalar tmp143 = w36*(-B_0_1 - B_0_3);
+                                const Scalar tmp144 = w33*(-B_0_4 - B_0_6);
+                                const Scalar tmp145 = w30*(-B_0_5 - B_0_7);
+                                const Scalar tmp146 = w43*(-B_2_0 - B_2_2);
+                                const Scalar tmp147 = w34*(B_1_1 + B_1_4);
+                                const Scalar tmp148 = w36*(B_0_2 + B_0_4);
+                                const Scalar tmp149 = w33*(B_0_3 + B_0_5);
+                                const Scalar tmp150 = w42*(B_2_1 + B_2_2);
+                                const Scalar tmp151 = w35*(B_2_5 + B_2_6);
+                                const Scalar tmp152 = w37*(-B_1_2 - B_1_7);
+                                const Scalar tmp153 = w36*(-B_0_0 - B_0_6);
+                                const Scalar tmp154 = w33*(-B_0_1 - B_0_7);
+                                const Scalar tmp155 = w34*(-B_1_0 - B_1_5);
+                                const Scalar tmp156 = w38*(-B_0_3 - B_0_7);
+                                const Scalar tmp157 = w36*(-B_0_2 - B_0_6);
+                                const Scalar tmp158 = w33*(-B_0_1 - B_0_5);
+                                const Scalar tmp159 = w30*(-B_0_0 - B_0_4);
+                                const Scalar tmp160 = w42*(-B_2_4 - B_2_5);
+                                const Scalar tmp161 = w35*(-B_2_2 - B_2_3);
+                                const Scalar tmp162 = w32*(-B_2_0 - B_2_1);
+                                const Scalar tmp163 = w43*(-B_2_6 - B_2_7);
+                                const Scalar tmp164 = w42*(-B_2_4 - B_2_7);
+                                const Scalar tmp165 = w35*(-B_2_0 - B_2_3);
+                                const Scalar tmp166 = w37*(B_1_1 + B_1_4);
+                                const Scalar tmp167 = w34*(B_1_3 + B_1_6);
+                                const Scalar tmp168 = w36*(B_0_3 + B_0_5);
+                                const Scalar tmp169 = w33*(B_0_2 + B_0_4);
+                                const Scalar tmp170 = w38*(B_0_5 + B_0_7);
+                                const Scalar tmp171 = w42*(B_2_4 + B_2_6);
+                                const Scalar tmp172 = w35*(B_2_1 + B_2_3);
+                                const Scalar tmp173 = w37*(-B_1_1 - B_1_4);
+                                const Scalar tmp174 = w32*(B_2_0 + B_2_2);
+                                const Scalar tmp175 = w36*(B_0_4 + B_0_6);
+                                const Scalar tmp176 = w33*(B_0_1 + B_0_3);
+                                const Scalar tmp177 = w30*(B_0_0 + B_0_2);
+                                const Scalar tmp178 = w43*(B_2_5 + B_2_7);
+                                const Scalar tmp179 = w34*(-B_1_3 - B_1_6);
+                                const Scalar tmp180 = w31*(-B_1_0 - B_1_4);
+                                const Scalar tmp181 = w42*(B_2_0 + B_2_2);
+                                const Scalar tmp182 = w35*(B_2_5 + B_2_7);
+                                const Scalar tmp183 = w37*(-B_1_2 - B_1_6);
+                                const Scalar tmp184 = w32*(B_2_4 + B_2_6);
+                                const Scalar tmp185 = w39*(-B_1_3 - B_1_7);
+                                const Scalar tmp186 = w36*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
+                                const Scalar tmp187 = w33*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
+                                const Scalar tmp188 = w43*(B_2_1 + B_2_3);
+                                const Scalar tmp189 = w34*(-B_1_1 - B_1_5);
+                                const Scalar tmp190 = w38*(-B_0_1 - B_0_5);
+                                const Scalar tmp191 = w42*(B_2_2 + B_2_3);
+                                const Scalar tmp192 = w35*(B_2_4 + B_2_5);
+                                const Scalar tmp193 = w37*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
+                                const Scalar tmp194 = w32*(B_2_6 + B_2_7);
+                                const Scalar tmp195 = w36*(-B_0_0 - B_0_4);
+                                const Scalar tmp196 = w33*(-B_0_3 - B_0_7);
+                                const Scalar tmp197 = w30*(-B_0_2 - B_0_6);
+                                const Scalar tmp198 = w43*(B_2_0 + B_2_1);
+                                const Scalar tmp199 = w34*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
+                                const Scalar tmp200 = w31*(B_1_4 + B_1_5);
+                                const Scalar tmp201 = w42*(-B_2_0 - B_2_1);
+                                const Scalar tmp202 = w35*(-B_2_6 - B_2_7);
+                                const Scalar tmp203 = w37*(B_1_6 + B_1_7);
+                                const Scalar tmp204 = w32*(-B_2_4 - B_2_5);
+                                const Scalar tmp205 = w39*(B_1_2 + B_1_3);
+                                const Scalar tmp206 = w43*(-B_2_2 - B_2_3);
+                                const Scalar tmp207 = w34*(B_1_0 + B_1_1);
+                                const Scalar tmp208 = w37*(-B_1_3 - B_1_6);
+                                const Scalar tmp209 = w36*(-B_0_2 - B_0_4);
+                                const Scalar tmp210 = w33*(-B_0_3 - B_0_5);
+                                const Scalar tmp211 = w34*(-B_1_1 - B_1_4);
+                                const Scalar tmp212 = w42*(B_2_0 + B_2_3);
+                                const Scalar tmp213 = w35*(B_2_4 + B_2_7);
+                                const Scalar tmp214 = w38*(B_0_0 + B_0_4);
+                                const Scalar tmp215 = w36*(B_0_1 + B_0_5);
+                                const Scalar tmp216 = w33*(B_0_2 + B_0_6);
+                                const Scalar tmp217 = w30*(B_0_3 + B_0_7);
+                                const Scalar tmp218 = w31*(B_1_2 + B_1_6);
+                                const Scalar tmp219 = w37*(B_1_0 + B_1_4);
+                                const Scalar tmp220 = w39*(B_1_1 + B_1_5);
+                                const Scalar tmp221 = w34*(B_1_3 + B_1_7);
+                                const Scalar tmp222 = w36*(-B_0_1 - B_0_7);
+                                const Scalar tmp223 = w33*(-B_0_0 - B_0_6);
+                                const Scalar tmp224 = w42*(-B_2_6 - B_2_7);
+                                const Scalar tmp225 = w35*(-B_2_0 - B_2_1);
+                                const Scalar tmp226 = w32*(-B_2_2 - B_2_3);
+                                const Scalar tmp227 = w43*(-B_2_4 - B_2_5);
+                                const Scalar tmp228 = w31*(B_1_3 + B_1_7);
+                                const Scalar tmp229 = w42*(B_2_1 + B_2_3);
+                                const Scalar tmp230 = w35*(B_2_4 + B_2_6);
+                                const Scalar tmp231 = w37*(B_1_1 + B_1_5);
+                                const Scalar tmp232 = w32*(B_2_5 + B_2_7);
+                                const Scalar tmp233 = w39*(B_1_0 + B_1_4);
+                                const Scalar tmp234 = w36*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
+                                const Scalar tmp235 = w33*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
+                                const Scalar tmp236 = w43*(B_2_0 + B_2_2);
+                                const Scalar tmp237 = w34*(B_1_2 + B_1_6);
+                                const Scalar tmp238 = w31*(-B_1_1 - B_1_5);
+                                const Scalar tmp239 = w37*(-B_1_3 - B_1_7);
+                                const Scalar tmp240 = w39*(-B_1_2 - B_1_6);
+                                const Scalar tmp241 = w34*(-B_1_0 - B_1_4);
+                                const Scalar tmp242 = w31*(-B_1_6 - B_1_7);
+                                const Scalar tmp243 = w42*(-B_2_2 - B_2_3);
+                                const Scalar tmp244 = w35*(-B_2_4 - B_2_5);
+                                const Scalar tmp245 = w37*(-B_1_4 - B_1_5);
+                                const Scalar tmp246 = w32*(-B_2_6 - B_2_7);
+                                const Scalar tmp247 = w39*(-B_1_0 - B_1_1);
+                                const Scalar tmp248 = w43*(-B_2_0 - B_2_1);
+                                const Scalar tmp249 = w34*(-B_1_2 - B_1_3);
+                                const Scalar tmp250 = w31*(B_1_2 + B_1_3);
+                                const Scalar tmp251 = w37*(B_1_0 + B_1_1);
+                                const Scalar tmp252 = w39*(B_1_4 + B_1_5);
+                                const Scalar tmp253 = w34*(B_1_6 + B_1_7);
+                                const Scalar tmp254 = w42*(-B_2_4 - B_2_6);
+                                const Scalar tmp255 = w35*(-B_2_1 - B_2_3);
+                                const Scalar tmp256 = w32*(-B_2_0 - B_2_2);
+                                const Scalar tmp257 = w43*(-B_2_5 - B_2_7);
+                                const Scalar tmp258 = w42*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
+                                const Scalar tmp259 = w35*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
+                                const Scalar tmp260 = w42*(-B_2_5 - B_2_7);
+                                const Scalar tmp261 = w35*(-B_2_0 - B_2_2);
+                                const Scalar tmp262 = w32*(-B_2_1 - B_2_3);
+                                const Scalar tmp263 = w43*(-B_2_4 - B_2_6);
                                 EM_S[INDEX2(0,0,8)]+=-B_0_0*w47 - B_0_1*w38 - B_0_6*w30 - B_0_7*w46 + B_1_0*w44 - B_1_2*w39 - B_1_5*w31 + B_1_7*w45 - B_2_0*w40 - B_2_3*w32 - B_2_4*w43 - B_2_7*w41 + tmp132 + tmp133 + tmp208 + tmp209 + tmp210 + tmp211;
                                 EM_S[INDEX2(1,0,8)]+=B_0_0*w47 + B_0_1*w38 + B_0_6*w30 + B_0_7*w46 + tmp148 + tmp149 + tmp242 + tmp243 + tmp244 + tmp245 + tmp246 + tmp247 + tmp248 + tmp249;
                                 EM_S[INDEX2(2,0,8)]+=-B_1_0*w44 + B_1_2*w39 + B_1_5*w31 - B_1_7*w45 + tmp138 + tmp139 + tmp140 + tmp141 + tmp142 + tmp143 + tmp144 + tmp145 + tmp146 + tmp147;
@@ -1334,369 +1349,369 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_S[INDEX2(6,7,8)]+=-B_0_0*w46 - B_0_1*w30 - B_0_6*w38 - B_0_7*w47 + tmp122 + tmp123 + tmp124 + tmp125 + tmp126 + tmp127 + tmp128 + tmp129 + tmp130 + tmp131;
                                 EM_S[INDEX2(7,7,8)]+=B_0_0*w46 + B_0_1*w30 + B_0_6*w38 + B_0_7*w47 - B_1_0*w45 + B_1_2*w31 + B_1_5*w39 - B_1_7*w44 + B_2_0*w41 + B_2_3*w43 + B_2_4*w32 + B_2_7*w40 + tmp150 + tmp151 + tmp166 + tmp167 + tmp168 + tmp169;
                             } else { // constant data
-                                const double wB0 = B_p[0]*w53;
-                                const double wB1 = B_p[1]*w51;
-                                const double wB2 = B_p[2]*w50;
-                                EM_S[INDEX2(0,0,8)]+= 4*wB0 + 4*wB1 - 4*wB2;
-                                EM_S[INDEX2(1,0,8)]+=-4*wB0 + 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(2,0,8)]+= 2*wB0 - 4*wB1 - 2*wB2;
-                                EM_S[INDEX2(3,0,8)]+=-2*wB0 - 2*wB1 - wB2;
-                                EM_S[INDEX2(4,0,8)]+= 2*wB0 + 2*wB1 + 4*wB2;
-                                EM_S[INDEX2(5,0,8)]+=-2*wB0 +   wB1 + 2*wB2;
-                                EM_S[INDEX2(6,0,8)]+=   wB0 - 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(7,0,8)]+=  -wB0 -   wB1 + wB2;
-                                EM_S[INDEX2(0,1,8)]+= 4*wB0 + 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(1,1,8)]+=-4*wB0 + 4*wB1 - 4*wB2;
-                                EM_S[INDEX2(2,1,8)]+= 2*wB0 - 2*wB1 - wB2;
-                                EM_S[INDEX2(3,1,8)]+=-2*wB0 - 4*wB1 - 2*wB2;
-                                EM_S[INDEX2(4,1,8)]+= 2*wB0 +   wB1 + 2*wB2;
-                                EM_S[INDEX2(5,1,8)]+=-2*wB0 + 2*wB1 + 4*wB2;
-                                EM_S[INDEX2(6,1,8)]+=   wB0 -   wB1 + wB2;
-                                EM_S[INDEX2(7,1,8)]+=  -wB0 - 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(0,2,8)]+= 2*wB0 + 4*wB1 - 2*wB2;
-                                EM_S[INDEX2(1,2,8)]+=-2*wB0 + 2*wB1 - wB2;
-                                EM_S[INDEX2(2,2,8)]+= 4*wB0 - 4*wB1 - 4*wB2;
-                                EM_S[INDEX2(3,2,8)]+=-4*wB0 - 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(4,2,8)]+=   wB0 + 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(5,2,8)]+=  -wB0 +   wB1 + wB2;
-                                EM_S[INDEX2(6,2,8)]+= 2*wB0 - 2*wB1 + 4*wB2;
-                                EM_S[INDEX2(7,2,8)]+=-2*wB0 -   wB1 + 2*wB2;
-                                EM_S[INDEX2(0,3,8)]+= 2*wB0 + 2*wB1 - wB2;
-                                EM_S[INDEX2(1,3,8)]+=-2*wB0 + 4*wB1 - 2*wB2;
-                                EM_S[INDEX2(2,3,8)]+= 4*wB0 - 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(3,3,8)]+=-4*wB0 - 4*wB1 - 4*wB2;
-                                EM_S[INDEX2(4,3,8)]+=   wB0 +   wB1 + wB2;
-                                EM_S[INDEX2(5,3,8)]+=  -wB0 + 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(6,3,8)]+= 2*wB0 -   wB1 + 2*wB2;
-                                EM_S[INDEX2(7,3,8)]+=-2*wB0 - 2*wB1 + 4*wB2;
-                                EM_S[INDEX2(0,4,8)]+= 2*wB0 + 2*wB1 - 4*wB2;
-                                EM_S[INDEX2(1,4,8)]+=-2*wB0 +   wB1 - 2*wB2;
-                                EM_S[INDEX2(2,4,8)]+=   wB0 - 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(3,4,8)]+=  -wB0 -   wB1 - wB2;
-                                EM_S[INDEX2(4,4,8)]+= 4*wB0 + 4*wB1 + 4*wB2;
-                                EM_S[INDEX2(5,4,8)]+=-4*wB0 + 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(6,4,8)]+= 2*wB0 - 4*wB1 + 2*wB2;
-                                EM_S[INDEX2(7,4,8)]+=-2*wB0 - 2*wB1 + wB2;
-                                EM_S[INDEX2(0,5,8)]+= 2*wB0 +   wB1 - 2*wB2;
-                                EM_S[INDEX2(1,5,8)]+=-2*wB0 + 2*wB1 - 4*wB2;
-                                EM_S[INDEX2(2,5,8)]+=   wB0 -   wB1 - wB2;
-                                EM_S[INDEX2(3,5,8)]+=  -wB0 - 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(4,5,8)]+= 4*wB0 + 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(5,5,8)]+=-4*wB0 + 4*wB1 + 4*wB2;
-                                EM_S[INDEX2(6,5,8)]+= 2*wB0 - 2*wB1 + wB2;
-                                EM_S[INDEX2(7,5,8)]+=-2*wB0 - 4*wB1 + 2*wB2;
-                                EM_S[INDEX2(0,6,8)]+=   wB0 + 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(1,6,8)]+=  -wB0 +   wB1 - wB2;
-                                EM_S[INDEX2(2,6,8)]+= 2*wB0 - 2*wB1 - 4*wB2;
-                                EM_S[INDEX2(3,6,8)]+=-2*wB0 -   wB1 - 2*wB2;
-                                EM_S[INDEX2(4,6,8)]+= 2*wB0 + 4*wB1 + 2*wB2;
-                                EM_S[INDEX2(5,6,8)]+=-2*wB0 + 2*wB1 + wB2;
-                                EM_S[INDEX2(6,6,8)]+= 4*wB0 - 4*wB1 + 4*wB2;
-                                EM_S[INDEX2(7,6,8)]+=-4*wB0 - 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(0,7,8)]+=   wB0 +   wB1 - wB2;
-                                EM_S[INDEX2(1,7,8)]+=  -wB0 + 2*wB1 - 2*wB2;
-                                EM_S[INDEX2(2,7,8)]+= 2*wB0 -   wB1 - 2*wB2;
-                                EM_S[INDEX2(3,7,8)]+=-2*wB0 - 2*wB1 - 4*wB2;
-                                EM_S[INDEX2(4,7,8)]+= 2*wB0 + 2*wB1 + wB2;
-                                EM_S[INDEX2(5,7,8)]+=-2*wB0 + 4*wB1 + 2*wB2;
-                                EM_S[INDEX2(6,7,8)]+= 4*wB0 - 2*wB1 + 2*wB2;
-                                EM_S[INDEX2(7,7,8)]+=-4*wB0 - 4*wB1 + 4*wB2;
+                                const Scalar wB0 = B_p[0]*w53;
+                                const Scalar wB1 = B_p[1]*w51;
+                                const Scalar wB2 = B_p[2]*w50;
+                                EM_S[INDEX2(0,0,8)]+= 4.*wB0 + 4.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(1,0,8)]+=-4.*wB0 + 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(2,0,8)]+= 2.*wB0 - 4.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(3,0,8)]+=-2.*wB0 - 2.*wB1 - wB2;
+                                EM_S[INDEX2(4,0,8)]+= 2.*wB0 + 2.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(5,0,8)]+=-2.*wB0 +    wB1 + 2.*wB2;
+                                EM_S[INDEX2(6,0,8)]+=    wB0 - 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(7,0,8)]+=   -wB0 -    wB1 + wB2;
+                                EM_S[INDEX2(0,1,8)]+= 4.*wB0 + 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(1,1,8)]+=-4.*wB0 + 4.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(2,1,8)]+= 2.*wB0 - 2.*wB1 - wB2;
+                                EM_S[INDEX2(3,1,8)]+=-2.*wB0 - 4.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(4,1,8)]+= 2.*wB0 +    wB1 + 2.*wB2;
+                                EM_S[INDEX2(5,1,8)]+=-2.*wB0 + 2.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(6,1,8)]+=    wB0 -    wB1 + wB2;
+                                EM_S[INDEX2(7,1,8)]+=   -wB0 - 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(0,2,8)]+= 2.*wB0 + 4.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(1,2,8)]+=-2.*wB0 + 2.*wB1 - wB2;
+                                EM_S[INDEX2(2,2,8)]+= 4.*wB0 - 4.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(3,2,8)]+=-4.*wB0 - 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(4,2,8)]+=    wB0 + 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(5,2,8)]+=   -wB0 +    wB1 + wB2;
+                                EM_S[INDEX2(6,2,8)]+= 2.*wB0 - 2.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(7,2,8)]+=-2.*wB0 -    wB1 + 2.*wB2;
+                                EM_S[INDEX2(0,3,8)]+= 2.*wB0 + 2.*wB1 - wB2;
+                                EM_S[INDEX2(1,3,8)]+=-2.*wB0 + 4.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(2,3,8)]+= 4.*wB0 - 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(3,3,8)]+=-4.*wB0 - 4.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(4,3,8)]+=    wB0 +    wB1 + wB2;
+                                EM_S[INDEX2(5,3,8)]+=   -wB0 + 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(6,3,8)]+= 2.*wB0 -    wB1 + 2.*wB2;
+                                EM_S[INDEX2(7,3,8)]+=-2.*wB0 - 2.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(0,4,8)]+= 2.*wB0 + 2.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(1,4,8)]+=-2.*wB0 +    wB1 - 2.*wB2;
+                                EM_S[INDEX2(2,4,8)]+=    wB0 - 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(3,4,8)]+=   -wB0 -    wB1 - wB2;
+                                EM_S[INDEX2(4,4,8)]+= 4.*wB0 + 4.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(5,4,8)]+=-4.*wB0 + 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(6,4,8)]+= 2.*wB0 - 4.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(7,4,8)]+=-2.*wB0 - 2.*wB1 + wB2;
+                                EM_S[INDEX2(0,5,8)]+= 2.*wB0 +    wB1 - 2.*wB2;
+                                EM_S[INDEX2(1,5,8)]+=-2.*wB0 + 2.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(2,5,8)]+=    wB0 -    wB1 - wB2;
+                                EM_S[INDEX2(3,5,8)]+=   -wB0 - 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(4,5,8)]+= 4.*wB0 + 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(5,5,8)]+=-4.*wB0 + 4.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(6,5,8)]+= 2.*wB0 - 2.*wB1 + wB2;
+                                EM_S[INDEX2(7,5,8)]+=-2.*wB0 - 4.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(0,6,8)]+=    wB0 + 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(1,6,8)]+=   -wB0 +    wB1 - wB2;
+                                EM_S[INDEX2(2,6,8)]+= 2.*wB0 - 2.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(3,6,8)]+=-2.*wB0 -    wB1 - 2.*wB2;
+                                EM_S[INDEX2(4,6,8)]+= 2.*wB0 + 4.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(5,6,8)]+=-2.*wB0 + 2.*wB1 + wB2;
+                                EM_S[INDEX2(6,6,8)]+= 4.*wB0 - 4.*wB1 + 4.*wB2;
+                                EM_S[INDEX2(7,6,8)]+=-4.*wB0 - 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(0,7,8)]+=    wB0 +    wB1 - wB2;
+                                EM_S[INDEX2(1,7,8)]+=   -wB0 + 2.*wB1 - 2.*wB2;
+                                EM_S[INDEX2(2,7,8)]+= 2.*wB0 -    wB1 - 2.*wB2;
+                                EM_S[INDEX2(3,7,8)]+=-2.*wB0 - 2.*wB1 - 4.*wB2;
+                                EM_S[INDEX2(4,7,8)]+= 2.*wB0 + 2.*wB1 + wB2;
+                                EM_S[INDEX2(5,7,8)]+=-2.*wB0 + 4.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(6,7,8)]+= 4.*wB0 - 2.*wB1 + 2.*wB2;
+                                EM_S[INDEX2(7,7,8)]+=-4.*wB0 - 4.*wB1 + 4.*wB2;
                             }
                         }
                         ///////////////
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double* C_p=C.getSampleDataRO(e);
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
                             if (C.actsExpanded()) {
-                                const double C_0_0 = C_p[INDEX2(0,0,3)];
-                                const double C_1_0 = C_p[INDEX2(1,0,3)];
-                                const double C_2_0 = C_p[INDEX2(2,0,3)];
-                                const double C_0_1 = C_p[INDEX2(0,1,3)];
-                                const double C_1_1 = C_p[INDEX2(1,1,3)];
-                                const double C_2_1 = C_p[INDEX2(2,1,3)];
-                                const double C_0_2 = C_p[INDEX2(0,2,3)];
-                                const double C_1_2 = C_p[INDEX2(1,2,3)];
-                                const double C_2_2 = C_p[INDEX2(2,2,3)];
-                                const double C_0_3 = C_p[INDEX2(0,3,3)];
-                                const double C_1_3 = C_p[INDEX2(1,3,3)];
-                                const double C_2_3 = C_p[INDEX2(2,3,3)];
-                                const double C_0_4 = C_p[INDEX2(0,4,3)];
-                                const double C_1_4 = C_p[INDEX2(1,4,3)];
-                                const double C_2_4 = C_p[INDEX2(2,4,3)];
-                                const double C_0_5 = C_p[INDEX2(0,5,3)];
-                                const double C_1_5 = C_p[INDEX2(1,5,3)];
-                                const double C_2_5 = C_p[INDEX2(2,5,3)];
-                                const double C_0_6 = C_p[INDEX2(0,6,3)];
-                                const double C_1_6 = C_p[INDEX2(1,6,3)];
-                                const double C_2_6 = C_p[INDEX2(2,6,3)];
-                                const double C_0_7 = C_p[INDEX2(0,7,3)];
-                                const double C_1_7 = C_p[INDEX2(1,7,3)];
-                                const double C_2_7 = C_p[INDEX2(2,7,3)];
-                                const double tmp0 = w38*(C_0_3 + C_0_7);
-                                const double tmp1 = w31*(C_1_0 + C_1_4);
-                                const double tmp2 = w42*(-C_2_1 - C_2_2);
-                                const double tmp3 = w35*(-C_2_5 - C_2_6);
-                                const double tmp4 = w37*(C_1_2 + C_1_6);
-                                const double tmp5 = w39*(C_1_3 + C_1_7);
-                                const double tmp6 = w36*(C_0_2 + C_0_6);
-                                const double tmp7 = w33*(C_0_1 + C_0_5);
-                                const double tmp8 = w30*(C_0_0 + C_0_4);
-                                const double tmp9 = w34*(C_1_1 + C_1_5);
-                                const double tmp10 = w38*(C_0_4 + C_0_6);
-                                const double tmp11 = w31*(C_1_2 + C_1_3);
-                                const double tmp12 = w42*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
-                                const double tmp13 = w35*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
-                                const double tmp14 = w37*(C_1_0 + C_1_1);
-                                const double tmp15 = w39*(C_1_4 + C_1_5);
-                                const double tmp16 = w36*(C_0_5 + C_0_7);
-                                const double tmp17 = w33*(C_0_0 + C_0_2);
-                                const double tmp18 = w30*(C_0_1 + C_0_3);
-                                const double tmp19 = w34*(C_1_6 + C_1_7);
-                                const double tmp20 = w38*(C_0_1 + C_0_3);
-                                const double tmp21 = w42*(-C_2_0 - C_2_2);
-                                const double tmp22 = w35*(-C_2_5 - C_2_7);
-                                const double tmp23 = w37*(C_1_2 + C_1_7);
-                                const double tmp24 = w32*(-C_2_4 - C_2_6);
-                                const double tmp25 = w36*(C_0_0 + C_0_2);
-                                const double tmp26 = w33*(C_0_5 + C_0_7);
-                                const double tmp27 = w30*(C_0_4 + C_0_6);
-                                const double tmp28 = w43*(-C_2_1 - C_2_3);
-                                const double tmp29 = w34*(C_1_0 + C_1_5);
-                                const double tmp30 = w38*(-C_0_4 - C_0_6);
-                                const double tmp31 = w42*(C_2_5 + C_2_7);
-                                const double tmp32 = w35*(C_2_0 + C_2_2);
-                                const double tmp33 = w37*(-C_1_0 - C_1_5);
-                                const double tmp34 = w32*(C_2_1 + C_2_3);
-                                const double tmp35 = w36*(-C_0_5 - C_0_7);
-                                const double tmp36 = w33*(-C_0_0 - C_0_2);
-                                const double tmp37 = w30*(-C_0_1 - C_0_3);
-                                const double tmp38 = w43*(C_2_4 + C_2_6);
-                                const double tmp39 = w34*(-C_1_2 - C_1_7);
-                                const double tmp40 = w38*(-C_0_1 - C_0_3);
-                                const double tmp41 = w31*(-C_1_4 - C_1_5);
-                                const double tmp42 = w42*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
-                                const double tmp43 = w35*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
-                                const double tmp44 = w37*(-C_1_6 - C_1_7);
-                                const double tmp45 = w39*(-C_1_2 - C_1_3);
-                                const double tmp46 = w36*(-C_0_0 - C_0_2);
-                                const double tmp47 = w33*(-C_0_5 - C_0_7);
-                                const double tmp48 = w30*(-C_0_4 - C_0_6);
-                                const double tmp49 = w34*(-C_1_0 - C_1_1);
-                                const double tmp50 = w31*(-C_1_2 - C_1_3);
-                                const double tmp51 = w42*(C_2_6 + C_2_7);
-                                const double tmp52 = w35*(C_2_0 + C_2_1);
-                                const double tmp53 = w37*(-C_1_0 - C_1_1);
-                                const double tmp54 = w32*(C_2_2 + C_2_3);
-                                const double tmp55 = w39*(-C_1_4 - C_1_5);
-                                const double tmp56 = w36*(-C_0_1 - C_0_7);
-                                const double tmp57 = w33*(-C_0_0 - C_0_6);
-                                const double tmp58 = w43*(C_2_4 + C_2_5);
-                                const double tmp59 = w34*(-C_1_6 - C_1_7);
-                                const double tmp60 = w42*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
-                                const double tmp61 = w35*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
-                                const double tmp62 = w37*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
-                                const double tmp63 = w36*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
-                                const double tmp64 = w33*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
-                                const double tmp65 = w34*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
-                                const double tmp66 = w38*(-C_0_5 - C_0_7);
-                                const double tmp67 = w36*(-C_0_4 - C_0_6);
-                                const double tmp68 = w33*(-C_0_1 - C_0_3);
-                                const double tmp69 = w30*(-C_0_0 - C_0_2);
-                                const double tmp70 = w38*(-C_0_2 - C_0_6);
-                                const double tmp71 = w31*(C_1_1 + C_1_5);
-                                const double tmp72 = w42*(C_2_4 + C_2_7);
-                                const double tmp73 = w35*(C_2_0 + C_2_3);
-                                const double tmp74 = w37*(C_1_3 + C_1_7);
-                                const double tmp75 = w39*(C_1_2 + C_1_6);
-                                const double tmp76 = w36*(-C_0_3 - C_0_7);
-                                const double tmp77 = w33*(-C_0_0 - C_0_4);
-                                const double tmp78 = w30*(-C_0_1 - C_0_5);
-                                const double tmp79 = w34*(C_1_0 + C_1_4);
-                                const double tmp80 = w36*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
-                                const double tmp81 = w33*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
-                                const double tmp82 = w38*(C_0_1 + C_0_5);
-                                const double tmp83 = w31*(-C_1_2 - C_1_6);
-                                const double tmp84 = w42*(-C_2_0 - C_2_3);
-                                const double tmp85 = w35*(-C_2_4 - C_2_7);
-                                const double tmp86 = w37*(-C_1_0 - C_1_4);
-                                const double tmp87 = w39*(-C_1_1 - C_1_5);
-                                const double tmp88 = w36*(C_0_0 + C_0_4);
-                                const double tmp89 = w33*(C_0_3 + C_0_7);
-                                const double tmp90 = w30*(C_0_2 + C_0_6);
-                                const double tmp91 = w34*(-C_1_3 - C_1_7);
-                                const double tmp92 = w42*(C_2_5 + C_2_6);
-                                const double tmp93 = w35*(C_2_1 + C_2_2);
-                                const double tmp94 = w37*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
-                                const double tmp95 = w34*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
-                                const double tmp96 = w38*(C_0_0 + C_0_2);
-                                const double tmp97 = w31*(C_1_6 + C_1_7);
-                                const double tmp98 = w37*(C_1_4 + C_1_5);
-                                const double tmp99 = w39*(C_1_0 + C_1_1);
-                                const double tmp100 = w36*(C_0_1 + C_0_3);
-                                const double tmp101 = w33*(C_0_4 + C_0_6);
-                                const double tmp102 = w30*(C_0_5 + C_0_7);
-                                const double tmp103 = w34*(C_1_2 + C_1_3);
-                                const double tmp104 = w38*(-C_0_3 - C_0_7);
-                                const double tmp105 = w42*(-C_2_4 - C_2_5);
-                                const double tmp106 = w35*(-C_2_2 - C_2_3);
-                                const double tmp107 = w37*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
-                                const double tmp108 = w32*(-C_2_0 - C_2_1);
-                                const double tmp109 = w36*(-C_0_2 - C_0_6);
-                                const double tmp110 = w33*(-C_0_1 - C_0_5);
-                                const double tmp111 = w30*(-C_0_0 - C_0_4);
-                                const double tmp112 = w43*(-C_2_6 - C_2_7);
-                                const double tmp113 = w34*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
-                                const double tmp114 = w38*(-C_0_0 - C_0_4);
-                                const double tmp115 = w31*(-C_1_3 - C_1_7);
-                                const double tmp116 = w37*(-C_1_1 - C_1_5);
-                                const double tmp117 = w39*(-C_1_0 - C_1_4);
-                                const double tmp118 = w36*(-C_0_1 - C_0_5);
-                                const double tmp119 = w33*(-C_0_2 - C_0_6);
-                                const double tmp120 = w30*(-C_0_3 - C_0_7);
-                                const double tmp121 = w34*(-C_1_2 - C_1_6);
-                                const double tmp122 = w31*(C_1_0 + C_1_1);
-                                const double tmp123 = w42*(C_2_4 + C_2_5);
-                                const double tmp124 = w35*(C_2_2 + C_2_3);
-                                const double tmp125 = w37*(C_1_2 + C_1_3);
-                                const double tmp126 = w32*(C_2_0 + C_2_1);
-                                const double tmp127 = w39*(C_1_6 + C_1_7);
-                                const double tmp128 = w36*(C_0_2 + C_0_4);
-                                const double tmp129 = w33*(C_0_3 + C_0_5);
-                                const double tmp130 = w43*(C_2_6 + C_2_7);
-                                const double tmp131 = w34*(C_1_4 + C_1_5);
-                                const double tmp132 = w42*(-C_2_5 - C_2_6);
-                                const double tmp133 = w35*(-C_2_1 - C_2_2);
-                                const double tmp134 = w37*(C_1_0 + C_1_5);
-                                const double tmp135 = w36*(C_0_1 + C_0_7);
-                                const double tmp136 = w33*(C_0_0 + C_0_6);
-                                const double tmp137 = w34*(C_1_2 + C_1_7);
-                                const double tmp138 = w38*(-C_0_0 - C_0_2);
-                                const double tmp139 = w42*(-C_2_1 - C_2_3);
-                                const double tmp140 = w35*(-C_2_4 - C_2_6);
-                                const double tmp141 = w37*(-C_1_1 - C_1_4);
-                                const double tmp142 = w32*(-C_2_5 - C_2_7);
-                                const double tmp143 = w36*(-C_0_1 - C_0_3);
-                                const double tmp144 = w33*(-C_0_4 - C_0_6);
-                                const double tmp145 = w30*(-C_0_5 - C_0_7);
-                                const double tmp146 = w43*(-C_2_0 - C_2_2);
-                                const double tmp147 = w34*(-C_1_3 - C_1_6);
-                                const double tmp148 = w36*(-C_0_3 - C_0_5);
-                                const double tmp149 = w33*(-C_0_2 - C_0_4);
-                                const double tmp150 = w42*(C_2_1 + C_2_2);
-                                const double tmp151 = w35*(C_2_5 + C_2_6);
-                                const double tmp152 = w37*(-C_1_2 - C_1_7);
-                                const double tmp153 = w36*(-C_0_0 - C_0_6);
-                                const double tmp154 = w33*(-C_0_1 - C_0_7);
-                                const double tmp155 = w34*(-C_1_0 - C_1_5);
-                                const double tmp156 = w38*(C_0_2 + C_0_6);
-                                const double tmp157 = w36*(C_0_3 + C_0_7);
-                                const double tmp158 = w33*(C_0_0 + C_0_4);
-                                const double tmp159 = w30*(C_0_1 + C_0_5);
-                                const double tmp160 = w42*(C_2_0 + C_2_1);
-                                const double tmp161 = w35*(C_2_6 + C_2_7);
-                                const double tmp162 = w32*(C_2_4 + C_2_5);
-                                const double tmp163 = w43*(C_2_2 + C_2_3);
-                                const double tmp164 = w42*(-C_2_4 - C_2_7);
-                                const double tmp165 = w35*(-C_2_0 - C_2_3);
-                                const double tmp166 = w37*(C_1_1 + C_1_4);
-                                const double tmp167 = w34*(C_1_3 + C_1_6);
-                                const double tmp168 = w36*(C_0_3 + C_0_5);
-                                const double tmp169 = w33*(C_0_2 + C_0_4);
-                                const double tmp170 = w38*(C_0_5 + C_0_7);
-                                const double tmp171 = w42*(C_2_4 + C_2_6);
-                                const double tmp172 = w35*(C_2_1 + C_2_3);
-                                const double tmp173 = w37*(C_1_3 + C_1_6);
-                                const double tmp174 = w32*(C_2_0 + C_2_2);
-                                const double tmp175 = w36*(C_0_4 + C_0_6);
-                                const double tmp176 = w33*(C_0_1 + C_0_3);
-                                const double tmp177 = w30*(C_0_0 + C_0_2);
-                                const double tmp178 = w43*(C_2_5 + C_2_7);
-                                const double tmp179 = w34*(C_1_1 + C_1_4);
-                                const double tmp180 = w31*(C_1_2 + C_1_6);
-                                const double tmp181 = w42*(-C_2_4 - C_2_6);
-                                const double tmp182 = w35*(-C_2_1 - C_2_3);
-                                const double tmp183 = w37*(C_1_0 + C_1_4);
-                                const double tmp184 = w32*(-C_2_0 - C_2_2);
-                                const double tmp185 = w39*(C_1_1 + C_1_5);
-                                const double tmp186 = w36*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
-                                const double tmp187 = w33*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
-                                const double tmp188 = w43*(-C_2_5 - C_2_7);
-                                const double tmp189 = w34*(C_1_3 + C_1_7);
-                                const double tmp190 = w38*(C_0_0 + C_0_4);
-                                const double tmp191 = w42*(-C_2_6 - C_2_7);
-                                const double tmp192 = w35*(-C_2_0 - C_2_1);
-                                const double tmp193 = w37*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
-                                const double tmp194 = w32*(-C_2_2 - C_2_3);
-                                const double tmp195 = w36*(C_0_1 + C_0_5);
-                                const double tmp196 = w33*(C_0_2 + C_0_6);
-                                const double tmp197 = w30*(C_0_3 + C_0_7);
-                                const double tmp198 = w43*(-C_2_4 - C_2_5);
-                                const double tmp199 = w34*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
-                                const double tmp200 = w31*(C_1_4 + C_1_5);
-                                const double tmp201 = w42*(-C_2_0 - C_2_1);
-                                const double tmp202 = w35*(-C_2_6 - C_2_7);
-                                const double tmp203 = w37*(C_1_6 + C_1_7);
-                                const double tmp204 = w32*(-C_2_4 - C_2_5);
-                                const double tmp205 = w39*(C_1_2 + C_1_3);
-                                const double tmp206 = w43*(-C_2_2 - C_2_3);
-                                const double tmp207 = w34*(C_1_0 + C_1_1);
-                                const double tmp208 = w37*(-C_1_3 - C_1_6);
-                                const double tmp209 = w36*(-C_0_2 - C_0_4);
-                                const double tmp210 = w33*(-C_0_3 - C_0_5);
-                                const double tmp211 = w34*(-C_1_1 - C_1_4);
-                                const double tmp212 = w42*(C_2_0 + C_2_3);
-                                const double tmp213 = w35*(C_2_4 + C_2_7);
-                                const double tmp214 = w38*(-C_0_1 - C_0_5);
-                                const double tmp215 = w36*(-C_0_0 - C_0_4);
-                                const double tmp216 = w33*(-C_0_3 - C_0_7);
-                                const double tmp217 = w30*(-C_0_2 - C_0_6);
-                                const double tmp218 = w31*(-C_1_0 - C_1_4);
-                                const double tmp219 = w37*(-C_1_2 - C_1_6);
-                                const double tmp220 = w39*(-C_1_3 - C_1_7);
-                                const double tmp221 = w34*(-C_1_1 - C_1_5);
-                                const double tmp222 = w36*(C_0_0 + C_0_6);
-                                const double tmp223 = w33*(C_0_1 + C_0_7);
-                                const double tmp224 = w42*(C_2_2 + C_2_3);
-                                const double tmp225 = w35*(C_2_4 + C_2_5);
-                                const double tmp226 = w32*(C_2_6 + C_2_7);
-                                const double tmp227 = w43*(C_2_0 + C_2_1);
-                                const double tmp228 = w31*(-C_1_1 - C_1_5);
-                                const double tmp229 = w42*(-C_2_5 - C_2_7);
-                                const double tmp230 = w35*(-C_2_0 - C_2_2);
-                                const double tmp231 = w37*(-C_1_3 - C_1_7);
-                                const double tmp232 = w32*(-C_2_1 - C_2_3);
-                                const double tmp233 = w39*(-C_1_2 - C_1_6);
-                                const double tmp234 = w36*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
-                                const double tmp235 = w33*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
-                                const double tmp236 = w43*(-C_2_4 - C_2_6);
-                                const double tmp237 = w34*(-C_1_0 - C_1_4);
-                                const double tmp238 = w31*(C_1_3 + C_1_7);
-                                const double tmp239 = w37*(C_1_1 + C_1_5);
-                                const double tmp240 = w39*(C_1_0 + C_1_4);
-                                const double tmp241 = w34*(C_1_2 + C_1_6);
-                                const double tmp242 = w31*(-C_1_6 - C_1_7);
-                                const double tmp243 = w42*(-C_2_2 - C_2_3);
-                                const double tmp244 = w35*(-C_2_4 - C_2_5);
-                                const double tmp245 = w37*(-C_1_4 - C_1_5);
-                                const double tmp246 = w32*(-C_2_6 - C_2_7);
-                                const double tmp247 = w39*(-C_1_0 - C_1_1);
-                                const double tmp248 = w43*(-C_2_0 - C_2_1);
-                                const double tmp249 = w34*(-C_1_2 - C_1_3);
-                                const double tmp250 = w31*(-C_1_0 - C_1_1);
-                                const double tmp251 = w37*(-C_1_2 - C_1_3);
-                                const double tmp252 = w39*(-C_1_6 - C_1_7);
-                                const double tmp253 = w34*(-C_1_4 - C_1_5);
-                                const double tmp254 = w42*(C_2_0 + C_2_2);
-                                const double tmp255 = w35*(C_2_5 + C_2_7);
-                                const double tmp256 = w32*(C_2_4 + C_2_6);
-                                const double tmp257 = w43*(C_2_1 + C_2_3);
-                                const double tmp258 = w42*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
-                                const double tmp259 = w35*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
-                                const double tmp260 = w42*(C_2_1 + C_2_3);
-                                const double tmp261 = w35*(C_2_4 + C_2_6);
-                                const double tmp262 = w32*(C_2_5 + C_2_7);
-                                const double tmp263 = w43*(C_2_0 + C_2_2);
+                                const Scalar C_0_0 = C_p[INDEX2(0,0,3)];
+                                const Scalar C_1_0 = C_p[INDEX2(1,0,3)];
+                                const Scalar C_2_0 = C_p[INDEX2(2,0,3)];
+                                const Scalar C_0_1 = C_p[INDEX2(0,1,3)];
+                                const Scalar C_1_1 = C_p[INDEX2(1,1,3)];
+                                const Scalar C_2_1 = C_p[INDEX2(2,1,3)];
+                                const Scalar C_0_2 = C_p[INDEX2(0,2,3)];
+                                const Scalar C_1_2 = C_p[INDEX2(1,2,3)];
+                                const Scalar C_2_2 = C_p[INDEX2(2,2,3)];
+                                const Scalar C_0_3 = C_p[INDEX2(0,3,3)];
+                                const Scalar C_1_3 = C_p[INDEX2(1,3,3)];
+                                const Scalar C_2_3 = C_p[INDEX2(2,3,3)];
+                                const Scalar C_0_4 = C_p[INDEX2(0,4,3)];
+                                const Scalar C_1_4 = C_p[INDEX2(1,4,3)];
+                                const Scalar C_2_4 = C_p[INDEX2(2,4,3)];
+                                const Scalar C_0_5 = C_p[INDEX2(0,5,3)];
+                                const Scalar C_1_5 = C_p[INDEX2(1,5,3)];
+                                const Scalar C_2_5 = C_p[INDEX2(2,5,3)];
+                                const Scalar C_0_6 = C_p[INDEX2(0,6,3)];
+                                const Scalar C_1_6 = C_p[INDEX2(1,6,3)];
+                                const Scalar C_2_6 = C_p[INDEX2(2,6,3)];
+                                const Scalar C_0_7 = C_p[INDEX2(0,7,3)];
+                                const Scalar C_1_7 = C_p[INDEX2(1,7,3)];
+                                const Scalar C_2_7 = C_p[INDEX2(2,7,3)];
+                                const Scalar tmp0 = w38*(C_0_3 + C_0_7);
+                                const Scalar tmp1 = w31*(C_1_0 + C_1_4);
+                                const Scalar tmp2 = w42*(-C_2_1 - C_2_2);
+                                const Scalar tmp3 = w35*(-C_2_5 - C_2_6);
+                                const Scalar tmp4 = w37*(C_1_2 + C_1_6);
+                                const Scalar tmp5 = w39*(C_1_3 + C_1_7);
+                                const Scalar tmp6 = w36*(C_0_2 + C_0_6);
+                                const Scalar tmp7 = w33*(C_0_1 + C_0_5);
+                                const Scalar tmp8 = w30*(C_0_0 + C_0_4);
+                                const Scalar tmp9 = w34*(C_1_1 + C_1_5);
+                                const Scalar tmp10 = w38*(C_0_4 + C_0_6);
+                                const Scalar tmp11 = w31*(C_1_2 + C_1_3);
+                                const Scalar tmp12 = w42*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
+                                const Scalar tmp13 = w35*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
+                                const Scalar tmp14 = w37*(C_1_0 + C_1_1);
+                                const Scalar tmp15 = w39*(C_1_4 + C_1_5);
+                                const Scalar tmp16 = w36*(C_0_5 + C_0_7);
+                                const Scalar tmp17 = w33*(C_0_0 + C_0_2);
+                                const Scalar tmp18 = w30*(C_0_1 + C_0_3);
+                                const Scalar tmp19 = w34*(C_1_6 + C_1_7);
+                                const Scalar tmp20 = w38*(C_0_1 + C_0_3);
+                                const Scalar tmp21 = w42*(-C_2_0 - C_2_2);
+                                const Scalar tmp22 = w35*(-C_2_5 - C_2_7);
+                                const Scalar tmp23 = w37*(C_1_2 + C_1_7);
+                                const Scalar tmp24 = w32*(-C_2_4 - C_2_6);
+                                const Scalar tmp25 = w36*(C_0_0 + C_0_2);
+                                const Scalar tmp26 = w33*(C_0_5 + C_0_7);
+                                const Scalar tmp27 = w30*(C_0_4 + C_0_6);
+                                const Scalar tmp28 = w43*(-C_2_1 - C_2_3);
+                                const Scalar tmp29 = w34*(C_1_0 + C_1_5);
+                                const Scalar tmp30 = w38*(-C_0_4 - C_0_6);
+                                const Scalar tmp31 = w42*(C_2_5 + C_2_7);
+                                const Scalar tmp32 = w35*(C_2_0 + C_2_2);
+                                const Scalar tmp33 = w37*(-C_1_0 - C_1_5);
+                                const Scalar tmp34 = w32*(C_2_1 + C_2_3);
+                                const Scalar tmp35 = w36*(-C_0_5 - C_0_7);
+                                const Scalar tmp36 = w33*(-C_0_0 - C_0_2);
+                                const Scalar tmp37 = w30*(-C_0_1 - C_0_3);
+                                const Scalar tmp38 = w43*(C_2_4 + C_2_6);
+                                const Scalar tmp39 = w34*(-C_1_2 - C_1_7);
+                                const Scalar tmp40 = w38*(-C_0_1 - C_0_3);
+                                const Scalar tmp41 = w31*(-C_1_4 - C_1_5);
+                                const Scalar tmp42 = w42*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
+                                const Scalar tmp43 = w35*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
+                                const Scalar tmp44 = w37*(-C_1_6 - C_1_7);
+                                const Scalar tmp45 = w39*(-C_1_2 - C_1_3);
+                                const Scalar tmp46 = w36*(-C_0_0 - C_0_2);
+                                const Scalar tmp47 = w33*(-C_0_5 - C_0_7);
+                                const Scalar tmp48 = w30*(-C_0_4 - C_0_6);
+                                const Scalar tmp49 = w34*(-C_1_0 - C_1_1);
+                                const Scalar tmp50 = w31*(-C_1_2 - C_1_3);
+                                const Scalar tmp51 = w42*(C_2_6 + C_2_7);
+                                const Scalar tmp52 = w35*(C_2_0 + C_2_1);
+                                const Scalar tmp53 = w37*(-C_1_0 - C_1_1);
+                                const Scalar tmp54 = w32*(C_2_2 + C_2_3);
+                                const Scalar tmp55 = w39*(-C_1_4 - C_1_5);
+                                const Scalar tmp56 = w36*(-C_0_1 - C_0_7);
+                                const Scalar tmp57 = w33*(-C_0_0 - C_0_6);
+                                const Scalar tmp58 = w43*(C_2_4 + C_2_5);
+                                const Scalar tmp59 = w34*(-C_1_6 - C_1_7);
+                                const Scalar tmp60 = w42*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
+                                const Scalar tmp61 = w35*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
+                                const Scalar tmp62 = w37*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
+                                const Scalar tmp63 = w36*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
+                                const Scalar tmp64 = w33*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
+                                const Scalar tmp65 = w34*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
+                                const Scalar tmp66 = w38*(-C_0_5 - C_0_7);
+                                const Scalar tmp67 = w36*(-C_0_4 - C_0_6);
+                                const Scalar tmp68 = w33*(-C_0_1 - C_0_3);
+                                const Scalar tmp69 = w30*(-C_0_0 - C_0_2);
+                                const Scalar tmp70 = w38*(-C_0_2 - C_0_6);
+                                const Scalar tmp71 = w31*(C_1_1 + C_1_5);
+                                const Scalar tmp72 = w42*(C_2_4 + C_2_7);
+                                const Scalar tmp73 = w35*(C_2_0 + C_2_3);
+                                const Scalar tmp74 = w37*(C_1_3 + C_1_7);
+                                const Scalar tmp75 = w39*(C_1_2 + C_1_6);
+                                const Scalar tmp76 = w36*(-C_0_3 - C_0_7);
+                                const Scalar tmp77 = w33*(-C_0_0 - C_0_4);
+                                const Scalar tmp78 = w30*(-C_0_1 - C_0_5);
+                                const Scalar tmp79 = w34*(C_1_0 + C_1_4);
+                                const Scalar tmp80 = w36*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
+                                const Scalar tmp81 = w33*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
+                                const Scalar tmp82 = w38*(C_0_1 + C_0_5);
+                                const Scalar tmp83 = w31*(-C_1_2 - C_1_6);
+                                const Scalar tmp84 = w42*(-C_2_0 - C_2_3);
+                                const Scalar tmp85 = w35*(-C_2_4 - C_2_7);
+                                const Scalar tmp86 = w37*(-C_1_0 - C_1_4);
+                                const Scalar tmp87 = w39*(-C_1_1 - C_1_5);
+                                const Scalar tmp88 = w36*(C_0_0 + C_0_4);
+                                const Scalar tmp89 = w33*(C_0_3 + C_0_7);
+                                const Scalar tmp90 = w30*(C_0_2 + C_0_6);
+                                const Scalar tmp91 = w34*(-C_1_3 - C_1_7);
+                                const Scalar tmp92 = w42*(C_2_5 + C_2_6);
+                                const Scalar tmp93 = w35*(C_2_1 + C_2_2);
+                                const Scalar tmp94 = w37*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
+                                const Scalar tmp95 = w34*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
+                                const Scalar tmp96 = w38*(C_0_0 + C_0_2);
+                                const Scalar tmp97 = w31*(C_1_6 + C_1_7);
+                                const Scalar tmp98 = w37*(C_1_4 + C_1_5);
+                                const Scalar tmp99 = w39*(C_1_0 + C_1_1);
+                                const Scalar tmp100 = w36*(C_0_1 + C_0_3);
+                                const Scalar tmp101 = w33*(C_0_4 + C_0_6);
+                                const Scalar tmp102 = w30*(C_0_5 + C_0_7);
+                                const Scalar tmp103 = w34*(C_1_2 + C_1_3);
+                                const Scalar tmp104 = w38*(-C_0_3 - C_0_7);
+                                const Scalar tmp105 = w42*(-C_2_4 - C_2_5);
+                                const Scalar tmp106 = w35*(-C_2_2 - C_2_3);
+                                const Scalar tmp107 = w37*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
+                                const Scalar tmp108 = w32*(-C_2_0 - C_2_1);
+                                const Scalar tmp109 = w36*(-C_0_2 - C_0_6);
+                                const Scalar tmp110 = w33*(-C_0_1 - C_0_5);
+                                const Scalar tmp111 = w30*(-C_0_0 - C_0_4);
+                                const Scalar tmp112 = w43*(-C_2_6 - C_2_7);
+                                const Scalar tmp113 = w34*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
+                                const Scalar tmp114 = w38*(-C_0_0 - C_0_4);
+                                const Scalar tmp115 = w31*(-C_1_3 - C_1_7);
+                                const Scalar tmp116 = w37*(-C_1_1 - C_1_5);
+                                const Scalar tmp117 = w39*(-C_1_0 - C_1_4);
+                                const Scalar tmp118 = w36*(-C_0_1 - C_0_5);
+                                const Scalar tmp119 = w33*(-C_0_2 - C_0_6);
+                                const Scalar tmp120 = w30*(-C_0_3 - C_0_7);
+                                const Scalar tmp121 = w34*(-C_1_2 - C_1_6);
+                                const Scalar tmp122 = w31*(C_1_0 + C_1_1);
+                                const Scalar tmp123 = w42*(C_2_4 + C_2_5);
+                                const Scalar tmp124 = w35*(C_2_2 + C_2_3);
+                                const Scalar tmp125 = w37*(C_1_2 + C_1_3);
+                                const Scalar tmp126 = w32*(C_2_0 + C_2_1);
+                                const Scalar tmp127 = w39*(C_1_6 + C_1_7);
+                                const Scalar tmp128 = w36*(C_0_2 + C_0_4);
+                                const Scalar tmp129 = w33*(C_0_3 + C_0_5);
+                                const Scalar tmp130 = w43*(C_2_6 + C_2_7);
+                                const Scalar tmp131 = w34*(C_1_4 + C_1_5);
+                                const Scalar tmp132 = w42*(-C_2_5 - C_2_6);
+                                const Scalar tmp133 = w35*(-C_2_1 - C_2_2);
+                                const Scalar tmp134 = w37*(C_1_0 + C_1_5);
+                                const Scalar tmp135 = w36*(C_0_1 + C_0_7);
+                                const Scalar tmp136 = w33*(C_0_0 + C_0_6);
+                                const Scalar tmp137 = w34*(C_1_2 + C_1_7);
+                                const Scalar tmp138 = w38*(-C_0_0 - C_0_2);
+                                const Scalar tmp139 = w42*(-C_2_1 - C_2_3);
+                                const Scalar tmp140 = w35*(-C_2_4 - C_2_6);
+                                const Scalar tmp141 = w37*(-C_1_1 - C_1_4);
+                                const Scalar tmp142 = w32*(-C_2_5 - C_2_7);
+                                const Scalar tmp143 = w36*(-C_0_1 - C_0_3);
+                                const Scalar tmp144 = w33*(-C_0_4 - C_0_6);
+                                const Scalar tmp145 = w30*(-C_0_5 - C_0_7);
+                                const Scalar tmp146 = w43*(-C_2_0 - C_2_2);
+                                const Scalar tmp147 = w34*(-C_1_3 - C_1_6);
+                                const Scalar tmp148 = w36*(-C_0_3 - C_0_5);
+                                const Scalar tmp149 = w33*(-C_0_2 - C_0_4);
+                                const Scalar tmp150 = w42*(C_2_1 + C_2_2);
+                                const Scalar tmp151 = w35*(C_2_5 + C_2_6);
+                                const Scalar tmp152 = w37*(-C_1_2 - C_1_7);
+                                const Scalar tmp153 = w36*(-C_0_0 - C_0_6);
+                                const Scalar tmp154 = w33*(-C_0_1 - C_0_7);
+                                const Scalar tmp155 = w34*(-C_1_0 - C_1_5);
+                                const Scalar tmp156 = w38*(C_0_2 + C_0_6);
+                                const Scalar tmp157 = w36*(C_0_3 + C_0_7);
+                                const Scalar tmp158 = w33*(C_0_0 + C_0_4);
+                                const Scalar tmp159 = w30*(C_0_1 + C_0_5);
+                                const Scalar tmp160 = w42*(C_2_0 + C_2_1);
+                                const Scalar tmp161 = w35*(C_2_6 + C_2_7);
+                                const Scalar tmp162 = w32*(C_2_4 + C_2_5);
+                                const Scalar tmp163 = w43*(C_2_2 + C_2_3);
+                                const Scalar tmp164 = w42*(-C_2_4 - C_2_7);
+                                const Scalar tmp165 = w35*(-C_2_0 - C_2_3);
+                                const Scalar tmp166 = w37*(C_1_1 + C_1_4);
+                                const Scalar tmp167 = w34*(C_1_3 + C_1_6);
+                                const Scalar tmp168 = w36*(C_0_3 + C_0_5);
+                                const Scalar tmp169 = w33*(C_0_2 + C_0_4);
+                                const Scalar tmp170 = w38*(C_0_5 + C_0_7);
+                                const Scalar tmp171 = w42*(C_2_4 + C_2_6);
+                                const Scalar tmp172 = w35*(C_2_1 + C_2_3);
+                                const Scalar tmp173 = w37*(C_1_3 + C_1_6);
+                                const Scalar tmp174 = w32*(C_2_0 + C_2_2);
+                                const Scalar tmp175 = w36*(C_0_4 + C_0_6);
+                                const Scalar tmp176 = w33*(C_0_1 + C_0_3);
+                                const Scalar tmp177 = w30*(C_0_0 + C_0_2);
+                                const Scalar tmp178 = w43*(C_2_5 + C_2_7);
+                                const Scalar tmp179 = w34*(C_1_1 + C_1_4);
+                                const Scalar tmp180 = w31*(C_1_2 + C_1_6);
+                                const Scalar tmp181 = w42*(-C_2_4 - C_2_6);
+                                const Scalar tmp182 = w35*(-C_2_1 - C_2_3);
+                                const Scalar tmp183 = w37*(C_1_0 + C_1_4);
+                                const Scalar tmp184 = w32*(-C_2_0 - C_2_2);
+                                const Scalar tmp185 = w39*(C_1_1 + C_1_5);
+                                const Scalar tmp186 = w36*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
+                                const Scalar tmp187 = w33*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
+                                const Scalar tmp188 = w43*(-C_2_5 - C_2_7);
+                                const Scalar tmp189 = w34*(C_1_3 + C_1_7);
+                                const Scalar tmp190 = w38*(C_0_0 + C_0_4);
+                                const Scalar tmp191 = w42*(-C_2_6 - C_2_7);
+                                const Scalar tmp192 = w35*(-C_2_0 - C_2_1);
+                                const Scalar tmp193 = w37*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
+                                const Scalar tmp194 = w32*(-C_2_2 - C_2_3);
+                                const Scalar tmp195 = w36*(C_0_1 + C_0_5);
+                                const Scalar tmp196 = w33*(C_0_2 + C_0_6);
+                                const Scalar tmp197 = w30*(C_0_3 + C_0_7);
+                                const Scalar tmp198 = w43*(-C_2_4 - C_2_5);
+                                const Scalar tmp199 = w34*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
+                                const Scalar tmp200 = w31*(C_1_4 + C_1_5);
+                                const Scalar tmp201 = w42*(-C_2_0 - C_2_1);
+                                const Scalar tmp202 = w35*(-C_2_6 - C_2_7);
+                                const Scalar tmp203 = w37*(C_1_6 + C_1_7);
+                                const Scalar tmp204 = w32*(-C_2_4 - C_2_5);
+                                const Scalar tmp205 = w39*(C_1_2 + C_1_3);
+                                const Scalar tmp206 = w43*(-C_2_2 - C_2_3);
+                                const Scalar tmp207 = w34*(C_1_0 + C_1_1);
+                                const Scalar tmp208 = w37*(-C_1_3 - C_1_6);
+                                const Scalar tmp209 = w36*(-C_0_2 - C_0_4);
+                                const Scalar tmp210 = w33*(-C_0_3 - C_0_5);
+                                const Scalar tmp211 = w34*(-C_1_1 - C_1_4);
+                                const Scalar tmp212 = w42*(C_2_0 + C_2_3);
+                                const Scalar tmp213 = w35*(C_2_4 + C_2_7);
+                                const Scalar tmp214 = w38*(-C_0_1 - C_0_5);
+                                const Scalar tmp215 = w36*(-C_0_0 - C_0_4);
+                                const Scalar tmp216 = w33*(-C_0_3 - C_0_7);
+                                const Scalar tmp217 = w30*(-C_0_2 - C_0_6);
+                                const Scalar tmp218 = w31*(-C_1_0 - C_1_4);
+                                const Scalar tmp219 = w37*(-C_1_2 - C_1_6);
+                                const Scalar tmp220 = w39*(-C_1_3 - C_1_7);
+                                const Scalar tmp221 = w34*(-C_1_1 - C_1_5);
+                                const Scalar tmp222 = w36*(C_0_0 + C_0_6);
+                                const Scalar tmp223 = w33*(C_0_1 + C_0_7);
+                                const Scalar tmp224 = w42*(C_2_2 + C_2_3);
+                                const Scalar tmp225 = w35*(C_2_4 + C_2_5);
+                                const Scalar tmp226 = w32*(C_2_6 + C_2_7);
+                                const Scalar tmp227 = w43*(C_2_0 + C_2_1);
+                                const Scalar tmp228 = w31*(-C_1_1 - C_1_5);
+                                const Scalar tmp229 = w42*(-C_2_5 - C_2_7);
+                                const Scalar tmp230 = w35*(-C_2_0 - C_2_2);
+                                const Scalar tmp231 = w37*(-C_1_3 - C_1_7);
+                                const Scalar tmp232 = w32*(-C_2_1 - C_2_3);
+                                const Scalar tmp233 = w39*(-C_1_2 - C_1_6);
+                                const Scalar tmp234 = w36*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
+                                const Scalar tmp235 = w33*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
+                                const Scalar tmp236 = w43*(-C_2_4 - C_2_6);
+                                const Scalar tmp237 = w34*(-C_1_0 - C_1_4);
+                                const Scalar tmp238 = w31*(C_1_3 + C_1_7);
+                                const Scalar tmp239 = w37*(C_1_1 + C_1_5);
+                                const Scalar tmp240 = w39*(C_1_0 + C_1_4);
+                                const Scalar tmp241 = w34*(C_1_2 + C_1_6);
+                                const Scalar tmp242 = w31*(-C_1_6 - C_1_7);
+                                const Scalar tmp243 = w42*(-C_2_2 - C_2_3);
+                                const Scalar tmp244 = w35*(-C_2_4 - C_2_5);
+                                const Scalar tmp245 = w37*(-C_1_4 - C_1_5);
+                                const Scalar tmp246 = w32*(-C_2_6 - C_2_7);
+                                const Scalar tmp247 = w39*(-C_1_0 - C_1_1);
+                                const Scalar tmp248 = w43*(-C_2_0 - C_2_1);
+                                const Scalar tmp249 = w34*(-C_1_2 - C_1_3);
+                                const Scalar tmp250 = w31*(-C_1_0 - C_1_1);
+                                const Scalar tmp251 = w37*(-C_1_2 - C_1_3);
+                                const Scalar tmp252 = w39*(-C_1_6 - C_1_7);
+                                const Scalar tmp253 = w34*(-C_1_4 - C_1_5);
+                                const Scalar tmp254 = w42*(C_2_0 + C_2_2);
+                                const Scalar tmp255 = w35*(C_2_5 + C_2_7);
+                                const Scalar tmp256 = w32*(C_2_4 + C_2_6);
+                                const Scalar tmp257 = w43*(C_2_1 + C_2_3);
+                                const Scalar tmp258 = w42*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
+                                const Scalar tmp259 = w35*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
+                                const Scalar tmp260 = w42*(C_2_1 + C_2_3);
+                                const Scalar tmp261 = w35*(C_2_4 + C_2_6);
+                                const Scalar tmp262 = w32*(C_2_5 + C_2_7);
+                                const Scalar tmp263 = w43*(C_2_0 + C_2_2);
                                 EM_S[INDEX2(0,0,8)]+=-C_0_0*w47 - C_0_1*w38 - C_0_6*w30 - C_0_7*w46 + C_1_0*w44 - C_1_2*w39 - C_1_5*w31 + C_1_7*w45 - C_2_0*w40 - C_2_3*w32 - C_2_4*w43 - C_2_7*w41 + tmp132 + tmp133 + tmp208 + tmp209 + tmp210 + tmp211;
                                 EM_S[INDEX2(1,0,8)]+=-C_0_0*w38 - C_0_1*w47 - C_0_6*w46 - C_0_7*w30 + tmp148 + tmp149 + tmp242 + tmp243 + tmp244 + tmp245 + tmp246 + tmp247 + tmp248 + tmp249;
                                 EM_S[INDEX2(2,0,8)]+=-C_1_0*w39 + C_1_2*w44 + C_1_5*w45 - C_1_7*w31 + tmp138 + tmp139 + tmp140 + tmp141 + tmp142 + tmp143 + tmp144 + tmp145 + tmp146 + tmp147;
@@ -1762,148 +1777,148 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_S[INDEX2(6,7,8)]+=C_0_0*w30 + C_0_1*w46 + C_0_6*w47 + C_0_7*w38 + tmp122 + tmp123 + tmp124 + tmp125 + tmp126 + tmp127 + tmp128 + tmp129 + tmp130 + tmp131;
                                 EM_S[INDEX2(7,7,8)]+=C_0_0*w46 + C_0_1*w30 + C_0_6*w38 + C_0_7*w47 - C_1_0*w45 + C_1_2*w31 + C_1_5*w39 - C_1_7*w44 + C_2_0*w41 + C_2_3*w43 + C_2_4*w32 + C_2_7*w40 + tmp150 + tmp151 + tmp166 + tmp167 + tmp168 + tmp169;
                             } else { // constant data
-                                const double wC0 = C_p[0]*w53;
-                                const double wC1 = C_p[1]*w51;
-                                const double wC2 = C_p[2]*w50;
-                                EM_S[INDEX2(0,0,8)]+= 4*wC0 + 4*wC1 - 4*wC2;
-                                EM_S[INDEX2(1,0,8)]+= 4*wC0 + 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(2,0,8)]+= 2*wC0 + 4*wC1 - 2*wC2;
-                                EM_S[INDEX2(3,0,8)]+= 2*wC0 + 2*wC1 -   wC2;
-                                EM_S[INDEX2(4,0,8)]+= 2*wC0 + 2*wC1 - 4*wC2;
-                                EM_S[INDEX2(5,0,8)]+= 2*wC0 +   wC1 - 2*wC2;
-                                EM_S[INDEX2(6,0,8)]+=   wC0 + 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(7,0,8)]+=   wC0 +   wC1 -   wC2;
-                                EM_S[INDEX2(0,1,8)]+=-4*wC0 + 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(1,1,8)]+=-4*wC0 + 4*wC1 - 4*wC2;
-                                EM_S[INDEX2(2,1,8)]+=-2*wC0 + 2*wC1 -   wC2;
-                                EM_S[INDEX2(3,1,8)]+=-2*wC0 + 4*wC1 - 2*wC2;
-                                EM_S[INDEX2(4,1,8)]+=-2*wC0 +   wC1 - 2*wC2;
-                                EM_S[INDEX2(5,1,8)]+=-2*wC0 + 2*wC1 - 4*wC2;
-                                EM_S[INDEX2(6,1,8)]+=  -wC0 +   wC1 -   wC2;
-                                EM_S[INDEX2(7,1,8)]+=  -wC0 + 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(0,2,8)]+= 2*wC0 - 4*wC1 - 2*wC2;
-                                EM_S[INDEX2(1,2,8)]+= 2*wC0 - 2*wC1 -   wC2;
-                                EM_S[INDEX2(2,2,8)]+= 4*wC0 - 4*wC1 - 4*wC2;
-                                EM_S[INDEX2(3,2,8)]+= 4*wC0 - 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(4,2,8)]+=   wC0 - 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(5,2,8)]+=   wC0 -   wC1 -   wC2;
-                                EM_S[INDEX2(6,2,8)]+= 2*wC0 - 2*wC1 - 4*wC2;
-                                EM_S[INDEX2(7,2,8)]+= 2*wC0 -   wC1 - 2*wC2;
-                                EM_S[INDEX2(0,3,8)]+=-2*wC0 - 2*wC1 -   wC2;
-                                EM_S[INDEX2(1,3,8)]+=-2*wC0 - 4*wC1 - 2*wC2;
-                                EM_S[INDEX2(2,3,8)]+=-4*wC0 - 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(3,3,8)]+=-4*wC0 - 4*wC1 - 4*wC2;
-                                EM_S[INDEX2(4,3,8)]+=  -wC0 -   wC1 -   wC2;
-                                EM_S[INDEX2(5,3,8)]+=  -wC0 - 2*wC1 - 2*wC2;
-                                EM_S[INDEX2(6,3,8)]+=-2*wC0 -   wC1 - 2*wC2;
-                                EM_S[INDEX2(7,3,8)]+=-2*wC0 - 2*wC1 - 4*wC2;
-                                EM_S[INDEX2(0,4,8)]+= 2*wC0 + 2*wC1 + 4*wC2;
-                                EM_S[INDEX2(1,4,8)]+= 2*wC0 +   wC1 + 2*wC2;
-                                EM_S[INDEX2(2,4,8)]+=   wC0 + 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(3,4,8)]+=   wC0 +   wC1 +   wC2;
-                                EM_S[INDEX2(4,4,8)]+= 4*wC0 + 4*wC1 + 4*wC2;
-                                EM_S[INDEX2(5,4,8)]+= 4*wC0 + 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(6,4,8)]+= 2*wC0 + 4*wC1 + 2*wC2;
-                                EM_S[INDEX2(7,4,8)]+= 2*wC0 + 2*wC1 +   wC2;
-                                EM_S[INDEX2(0,5,8)]+=-2*wC0 +   wC1 + 2*wC2;
-                                EM_S[INDEX2(1,5,8)]+=-2*wC0 + 2*wC1 + 4*wC2;
-                                EM_S[INDEX2(2,5,8)]+=  -wC0 +   wC1 +   wC2;
-                                EM_S[INDEX2(3,5,8)]+=  -wC0 + 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(4,5,8)]+=-4*wC0 + 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(5,5,8)]+=-4*wC0 + 4*wC1 + 4*wC2;
-                                EM_S[INDEX2(6,5,8)]+=-2*wC0 + 2*wC1 +   wC2;
-                                EM_S[INDEX2(7,5,8)]+=-2*wC0 + 4*wC1 + 2*wC2;
-                                EM_S[INDEX2(0,6,8)]+=   wC0 - 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(1,6,8)]+=   wC0 -   wC1 +   wC2;
-                                EM_S[INDEX2(2,6,8)]+= 2*wC0 - 2*wC1 + 4*wC2;
-                                EM_S[INDEX2(3,6,8)]+= 2*wC0 -   wC1 + 2*wC2;
-                                EM_S[INDEX2(4,6,8)]+= 2*wC0 - 4*wC1 + 2*wC2;
-                                EM_S[INDEX2(5,6,8)]+= 2*wC0 - 2*wC1 +   wC2;
-                                EM_S[INDEX2(6,6,8)]+= 4*wC0 - 4*wC1 + 4*wC2;
-                                EM_S[INDEX2(7,6,8)]+= 4*wC0 - 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(0,7,8)]+=  -wC0 -   wC1 +   wC2;
-                                EM_S[INDEX2(1,7,8)]+=  -wC0 - 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(2,7,8)]+=-2*wC0 -   wC1 + 2*wC2;
-                                EM_S[INDEX2(3,7,8)]+=-2*wC0 - 2*wC1 + 4*wC2;
-                                EM_S[INDEX2(4,7,8)]+=-2*wC0 - 2*wC1 +   wC2;
-                                EM_S[INDEX2(5,7,8)]+=-2*wC0 - 4*wC1 + 2*wC2;
-                                EM_S[INDEX2(6,7,8)]+=-4*wC0 - 2*wC1 + 2*wC2;
-                                EM_S[INDEX2(7,7,8)]+=-4*wC0 - 4*wC1 + 4*wC2;
+                                const Scalar wC0 = C_p[0]*w53;
+                                const Scalar wC1 = C_p[1]*w51;
+                                const Scalar wC2 = C_p[2]*w50;
+                                EM_S[INDEX2(0,0,8)]+= 4.*wC0 + 4.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(1,0,8)]+= 4.*wC0 + 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(2,0,8)]+= 2.*wC0 + 4.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(3,0,8)]+= 2.*wC0 + 2.*wC1 -    wC2;
+                                EM_S[INDEX2(4,0,8)]+= 2.*wC0 + 2.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(5,0,8)]+= 2.*wC0 +    wC1 - 2.*wC2;
+                                EM_S[INDEX2(6,0,8)]+=    wC0 + 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(7,0,8)]+=    wC0 +    wC1 -    wC2;
+                                EM_S[INDEX2(0,1,8)]+=-4.*wC0 + 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(1,1,8)]+=-4.*wC0 + 4.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(2,1,8)]+=-2.*wC0 + 2.*wC1 -    wC2;
+                                EM_S[INDEX2(3,1,8)]+=-2.*wC0 + 4.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(4,1,8)]+=-2.*wC0 +    wC1 - 2.*wC2;
+                                EM_S[INDEX2(5,1,8)]+=-2.*wC0 + 2.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(6,1,8)]+=   -wC0 +    wC1 -    wC2;
+                                EM_S[INDEX2(7,1,8)]+=   -wC0 + 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(0,2,8)]+= 2.*wC0 - 4.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(1,2,8)]+= 2.*wC0 - 2.*wC1 -    wC2;
+                                EM_S[INDEX2(2,2,8)]+= 4.*wC0 - 4.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(3,2,8)]+= 4.*wC0 - 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(4,2,8)]+=    wC0 - 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(5,2,8)]+=    wC0 -    wC1 -    wC2;
+                                EM_S[INDEX2(6,2,8)]+= 2.*wC0 - 2.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(7,2,8)]+= 2.*wC0 -    wC1 - 2.*wC2;
+                                EM_S[INDEX2(0,3,8)]+=-2.*wC0 - 2.*wC1 -    wC2;
+                                EM_S[INDEX2(1,3,8)]+=-2.*wC0 - 4.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(2,3,8)]+=-4.*wC0 - 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(3,3,8)]+=-4.*wC0 - 4.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(4,3,8)]+=   -wC0 -    wC1 -    wC2;
+                                EM_S[INDEX2(5,3,8)]+=   -wC0 - 2.*wC1 - 2.*wC2;
+                                EM_S[INDEX2(6,3,8)]+=-2.*wC0 -    wC1 - 2.*wC2;
+                                EM_S[INDEX2(7,3,8)]+=-2.*wC0 - 2.*wC1 - 4.*wC2;
+                                EM_S[INDEX2(0,4,8)]+= 2.*wC0 + 2.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(1,4,8)]+= 2.*wC0 +    wC1 + 2.*wC2;
+                                EM_S[INDEX2(2,4,8)]+=    wC0 + 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(3,4,8)]+=    wC0 +    wC1 +    wC2;
+                                EM_S[INDEX2(4,4,8)]+= 4.*wC0 + 4.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(5,4,8)]+= 4.*wC0 + 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(6,4,8)]+= 2.*wC0 + 4.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(7,4,8)]+= 2.*wC0 + 2.*wC1 +    wC2;
+                                EM_S[INDEX2(0,5,8)]+=-2.*wC0 +    wC1 + 2.*wC2;
+                                EM_S[INDEX2(1,5,8)]+=-2.*wC0 + 2.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(2,5,8)]+=   -wC0 +    wC1 +    wC2;
+                                EM_S[INDEX2(3,5,8)]+=   -wC0 + 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(4,5,8)]+=-4.*wC0 + 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(5,5,8)]+=-4.*wC0 + 4.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(6,5,8)]+=-2.*wC0 + 2.*wC1 +    wC2;
+                                EM_S[INDEX2(7,5,8)]+=-2.*wC0 + 4.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(0,6,8)]+=    wC0 - 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(1,6,8)]+=    wC0 -    wC1 +    wC2;
+                                EM_S[INDEX2(2,6,8)]+= 2.*wC0 - 2.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(3,6,8)]+= 2.*wC0 -    wC1 + 2.*wC2;
+                                EM_S[INDEX2(4,6,8)]+= 2.*wC0 - 4.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(5,6,8)]+= 2.*wC0 - 2.*wC1 +    wC2;
+                                EM_S[INDEX2(6,6,8)]+= 4.*wC0 - 4.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(7,6,8)]+= 4.*wC0 - 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(0,7,8)]+=   -wC0 -    wC1 +    wC2;
+                                EM_S[INDEX2(1,7,8)]+=   -wC0 - 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(2,7,8)]+=-2.*wC0 -    wC1 + 2.*wC2;
+                                EM_S[INDEX2(3,7,8)]+=-2.*wC0 - 2.*wC1 + 4.*wC2;
+                                EM_S[INDEX2(4,7,8)]+=-2.*wC0 - 2.*wC1 +    wC2;
+                                EM_S[INDEX2(5,7,8)]+=-2.*wC0 - 4.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(6,7,8)]+=-4.*wC0 - 2.*wC1 + 2.*wC2;
+                                EM_S[INDEX2(7,7,8)]+=-4.*wC0 - 4.*wC1 + 4.*wC2;
                             }
                         }
                         ///////////////
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double* D_p=D.getSampleDataRO(e);
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
                             if (D.actsExpanded()) {
-                                const double D_0 = D_p[0];
-                                const double D_1 = D_p[1];
-                                const double D_2 = D_p[2];
-                                const double D_3 = D_p[3];
-                                const double D_4 = D_p[4];
-                                const double D_5 = D_p[5];
-                                const double D_6 = D_p[6];
-                                const double D_7 = D_p[7];
-                                const double tmp0 = w54*(D_0 + D_4);
-                                const double tmp1 = w55*(D_1 + D_2 + D_5 + D_6);
-                                const double tmp2 = w56*(D_3 + D_7);
-                                const double tmp3 = w57*(D_0 + D_1 + D_2 + D_3);
-                                const double tmp4 = w58*(D_4 + D_5 + D_6 + D_7);
-                                const double tmp5 = w54*(D_4 + D_6);
-                                const double tmp6 = w55*(D_0 + D_2 + D_5 + D_7);
-                                const double tmp7 = w56*(D_1 + D_3);
-                                const double tmp8 = w54*(D_1 + D_3);
-                                const double tmp9 = w56*(D_4 + D_6);
-                                const double tmp10 = w57*(D_4 + D_5 + D_6 + D_7);
-                                const double tmp11 = w58*(D_0 + D_1 + D_2 + D_3);
-                                const double tmp12 = w54*(D_2 + D_3);
-                                const double tmp13 = w55*(D_0 + D_1 + D_6 + D_7);
-                                const double tmp14 = w56*(D_4 + D_5);
-                                const double tmp15 = w55*(D_0 + D_1 + D_2 + D_3 + D_4 + D_5 + D_6 + D_7);
-                                const double tmp16 = w54*(D_1 + D_5);
-                                const double tmp17 = w55*(D_0 + D_3 + D_4 + D_7);
-                                const double tmp18 = w56*(D_2 + D_6);
-                                const double tmp19 = w54*(D_2 + D_6);
-                                const double tmp20 = w56*(D_1 + D_5);
-                                const double tmp21 = w57*(D_0 + D_1 + D_4 + D_5);
-                                const double tmp22 = w58*(D_2 + D_3 + D_6 + D_7);
-                                const double tmp23 = w54*(D_3 + D_7);
-                                const double tmp24 = w56*(D_0 + D_4);
-                                const double tmp25 = w54*(D_0 + D_1);
-                                const double tmp26 = w55*(D_2 + D_3 + D_4 + D_5);
-                                const double tmp27 = w56*(D_6 + D_7);
-                                const double tmp28 = w57*(D_0 + D_5 + D_6);
-                                const double tmp29 = w58*(D_1 + D_2 + D_7);
-                                const double tmp30 = w54*(D_5 + D_7);
-                                const double tmp31 = w55*(D_1 + D_3 + D_4 + D_6);
-                                const double tmp32 = w56*(D_0 + D_2);
-                                const double tmp33 = w57*(D_1 + D_2 + D_7);
-                                const double tmp34 = w58*(D_0 + D_5 + D_6);
-                                const double tmp35 = w57*(D_1 + D_4 + D_7);
-                                const double tmp36 = w58*(D_0 + D_3 + D_6);
-                                const double tmp37 = w57*(D_1 + D_2 + D_4);
-                                const double tmp38 = w58*(D_3 + D_5 + D_6);
-                                const double tmp39 = w54*(D_0 + D_2);
-                                const double tmp40 = w56*(D_5 + D_7);
-                                const double tmp41 = w57*(D_0 + D_2 + D_4 + D_6);
-                                const double tmp42 = w58*(D_1 + D_3 + D_5 + D_7);
-                                const double tmp43 = w57*(D_2 + D_3 + D_6 + D_7);
-                                const double tmp44 = w58*(D_0 + D_1 + D_4 + D_5);
-                                const double tmp45 = w57*(D_2 + D_4 + D_7);
-                                const double tmp46 = w58*(D_0 + D_3 + D_5);
-                                const double tmp47 = w54*(D_4 + D_5);
-                                const double tmp48 = w56*(D_2 + D_3);
-                                const double tmp49 = w57*(D_3 + D_5 + D_6);
-                                const double tmp50 = w58*(D_1 + D_2 + D_4);
-                                const double tmp51 = w57*(D_0 + D_3 + D_5);
-                                const double tmp52 = w58*(D_2 + D_4 + D_7);
-                                const double tmp53 = w57*(D_0 + D_3 + D_6);
-                                const double tmp54 = w58*(D_1 + D_4 + D_7);
-                                const double tmp55 = w57*(D_1 + D_3 + D_5 + D_7);
-                                const double tmp56 = w58*(D_0 + D_2 + D_4 + D_6);
-                                const double tmp57 = w54*(D_6 + D_7);
-                                const double tmp58 = w56*(D_0 + D_1);
+                                const Scalar D_0 = D_p[0];
+                                const Scalar D_1 = D_p[1];
+                                const Scalar D_2 = D_p[2];
+                                const Scalar D_3 = D_p[3];
+                                const Scalar D_4 = D_p[4];
+                                const Scalar D_5 = D_p[5];
+                                const Scalar D_6 = D_p[6];
+                                const Scalar D_7 = D_p[7];
+                                const Scalar tmp0 = w54*(D_0 + D_4);
+                                const Scalar tmp1 = w55*(D_1 + D_2 + D_5 + D_6);
+                                const Scalar tmp2 = w56*(D_3 + D_7);
+                                const Scalar tmp3 = w57*(D_0 + D_1 + D_2 + D_3);
+                                const Scalar tmp4 = w58*(D_4 + D_5 + D_6 + D_7);
+                                const Scalar tmp5 = w54*(D_4 + D_6);
+                                const Scalar tmp6 = w55*(D_0 + D_2 + D_5 + D_7);
+                                const Scalar tmp7 = w56*(D_1 + D_3);
+                                const Scalar tmp8 = w54*(D_1 + D_3);
+                                const Scalar tmp9 = w56*(D_4 + D_6);
+                                const Scalar tmp10 = w57*(D_4 + D_5 + D_6 + D_7);
+                                const Scalar tmp11 = w58*(D_0 + D_1 + D_2 + D_3);
+                                const Scalar tmp12 = w54*(D_2 + D_3);
+                                const Scalar tmp13 = w55*(D_0 + D_1 + D_6 + D_7);
+                                const Scalar tmp14 = w56*(D_4 + D_5);
+                                const Scalar tmp15 = w55*(D_0 + D_1 + D_2 + D_3 + D_4 + D_5 + D_6 + D_7);
+                                const Scalar tmp16 = w54*(D_1 + D_5);
+                                const Scalar tmp17 = w55*(D_0 + D_3 + D_4 + D_7);
+                                const Scalar tmp18 = w56*(D_2 + D_6);
+                                const Scalar tmp19 = w54*(D_2 + D_6);
+                                const Scalar tmp20 = w56*(D_1 + D_5);
+                                const Scalar tmp21 = w57*(D_0 + D_1 + D_4 + D_5);
+                                const Scalar tmp22 = w58*(D_2 + D_3 + D_6 + D_7);
+                                const Scalar tmp23 = w54*(D_3 + D_7);
+                                const Scalar tmp24 = w56*(D_0 + D_4);
+                                const Scalar tmp25 = w54*(D_0 + D_1);
+                                const Scalar tmp26 = w55*(D_2 + D_3 + D_4 + D_5);
+                                const Scalar tmp27 = w56*(D_6 + D_7);
+                                const Scalar tmp28 = w57*(D_0 + D_5 + D_6);
+                                const Scalar tmp29 = w58*(D_1 + D_2 + D_7);
+                                const Scalar tmp30 = w54*(D_5 + D_7);
+                                const Scalar tmp31 = w55*(D_1 + D_3 + D_4 + D_6);
+                                const Scalar tmp32 = w56*(D_0 + D_2);
+                                const Scalar tmp33 = w57*(D_1 + D_2 + D_7);
+                                const Scalar tmp34 = w58*(D_0 + D_5 + D_6);
+                                const Scalar tmp35 = w57*(D_1 + D_4 + D_7);
+                                const Scalar tmp36 = w58*(D_0 + D_3 + D_6);
+                                const Scalar tmp37 = w57*(D_1 + D_2 + D_4);
+                                const Scalar tmp38 = w58*(D_3 + D_5 + D_6);
+                                const Scalar tmp39 = w54*(D_0 + D_2);
+                                const Scalar tmp40 = w56*(D_5 + D_7);
+                                const Scalar tmp41 = w57*(D_0 + D_2 + D_4 + D_6);
+                                const Scalar tmp42 = w58*(D_1 + D_3 + D_5 + D_7);
+                                const Scalar tmp43 = w57*(D_2 + D_3 + D_6 + D_7);
+                                const Scalar tmp44 = w58*(D_0 + D_1 + D_4 + D_5);
+                                const Scalar tmp45 = w57*(D_2 + D_4 + D_7);
+                                const Scalar tmp46 = w58*(D_0 + D_3 + D_5);
+                                const Scalar tmp47 = w54*(D_4 + D_5);
+                                const Scalar tmp48 = w56*(D_2 + D_3);
+                                const Scalar tmp49 = w57*(D_3 + D_5 + D_6);
+                                const Scalar tmp50 = w58*(D_1 + D_2 + D_4);
+                                const Scalar tmp51 = w57*(D_0 + D_3 + D_5);
+                                const Scalar tmp52 = w58*(D_2 + D_4 + D_7);
+                                const Scalar tmp53 = w57*(D_0 + D_3 + D_6);
+                                const Scalar tmp54 = w58*(D_1 + D_4 + D_7);
+                                const Scalar tmp55 = w57*(D_1 + D_3 + D_5 + D_7);
+                                const Scalar tmp56 = w58*(D_0 + D_2 + D_4 + D_6);
+                                const Scalar tmp57 = w54*(D_6 + D_7);
+                                const Scalar tmp58 = w56*(D_0 + D_1);
                                 EM_S[INDEX2(0,0,8)]+=D_0*w59 + D_7*w60 + tmp49 + tmp50;
                                 EM_S[INDEX2(1,0,8)]+=tmp26 + tmp57 + tmp58;
                                 EM_S[INDEX2(2,0,8)]+=tmp30 + tmp31 + tmp32;
@@ -1969,163 +1984,163 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_S[INDEX2(6,7,8)]+=tmp25 + tmp26 + tmp27;
                                 EM_S[INDEX2(7,7,8)]+=D_0*w60 + D_7*w59 + tmp37 + tmp38;
                             } else { // constant data
-                                const double wD0 = 8*D_p[0]*w55;
-                                EM_S[INDEX2(0,0,8)]+=8*wD0;
-                                EM_S[INDEX2(1,0,8)]+=4*wD0;
-                                EM_S[INDEX2(2,0,8)]+=4*wD0;
-                                EM_S[INDEX2(3,0,8)]+=2*wD0;
-                                EM_S[INDEX2(4,0,8)]+=4*wD0;
-                                EM_S[INDEX2(5,0,8)]+=2*wD0;
-                                EM_S[INDEX2(6,0,8)]+=2*wD0;
-                                EM_S[INDEX2(7,0,8)]+=wD0;
-                                EM_S[INDEX2(0,1,8)]+=4*wD0;
-                                EM_S[INDEX2(1,1,8)]+=8*wD0;
-                                EM_S[INDEX2(2,1,8)]+=2*wD0;
-                                EM_S[INDEX2(3,1,8)]+=4*wD0;
-                                EM_S[INDEX2(4,1,8)]+=2*wD0;
-                                EM_S[INDEX2(5,1,8)]+=4*wD0;
-                                EM_S[INDEX2(6,1,8)]+=wD0;
-                                EM_S[INDEX2(7,1,8)]+=2*wD0;
-                                EM_S[INDEX2(0,2,8)]+=4*wD0;
-                                EM_S[INDEX2(1,2,8)]+=2*wD0;
-                                EM_S[INDEX2(2,2,8)]+=8*wD0;
-                                EM_S[INDEX2(3,2,8)]+=4*wD0;
-                                EM_S[INDEX2(4,2,8)]+=2*wD0;
-                                EM_S[INDEX2(5,2,8)]+=wD0;
-                                EM_S[INDEX2(6,2,8)]+=4*wD0;
-                                EM_S[INDEX2(7,2,8)]+=2*wD0;
-                                EM_S[INDEX2(0,3,8)]+=2*wD0;
-                                EM_S[INDEX2(1,3,8)]+=4*wD0;
-                                EM_S[INDEX2(2,3,8)]+=4*wD0;
-                                EM_S[INDEX2(3,3,8)]+=8*wD0;
-                                EM_S[INDEX2(4,3,8)]+=wD0;
-                                EM_S[INDEX2(5,3,8)]+=2*wD0;
-                                EM_S[INDEX2(6,3,8)]+=2*wD0;
-                                EM_S[INDEX2(7,3,8)]+=4*wD0;
-                                EM_S[INDEX2(0,4,8)]+=4*wD0;
-                                EM_S[INDEX2(1,4,8)]+=2*wD0;
-                                EM_S[INDEX2(2,4,8)]+=2*wD0;
-                                EM_S[INDEX2(3,4,8)]+=wD0;
-                                EM_S[INDEX2(4,4,8)]+=8*wD0;
-                                EM_S[INDEX2(5,4,8)]+=4*wD0;
-                                EM_S[INDEX2(6,4,8)]+=4*wD0;
-                                EM_S[INDEX2(7,4,8)]+=2*wD0;
-                                EM_S[INDEX2(0,5,8)]+=2*wD0;
-                                EM_S[INDEX2(1,5,8)]+=4*wD0;
-                                EM_S[INDEX2(2,5,8)]+=wD0;
-                                EM_S[INDEX2(3,5,8)]+=2*wD0;
-                                EM_S[INDEX2(4,5,8)]+=4*wD0;
-                                EM_S[INDEX2(5,5,8)]+=8*wD0;
-                                EM_S[INDEX2(6,5,8)]+=2*wD0;
-                                EM_S[INDEX2(7,5,8)]+=4*wD0;
-                                EM_S[INDEX2(0,6,8)]+=2*wD0;
-                                EM_S[INDEX2(1,6,8)]+=wD0;
-                                EM_S[INDEX2(2,6,8)]+=4*wD0;
-                                EM_S[INDEX2(3,6,8)]+=2*wD0;
-                                EM_S[INDEX2(4,6,8)]+=4*wD0;
-                                EM_S[INDEX2(5,6,8)]+=2*wD0;
-                                EM_S[INDEX2(6,6,8)]+=8*wD0;
-                                EM_S[INDEX2(7,6,8)]+=4*wD0;
-                                EM_S[INDEX2(0,7,8)]+=wD0;
-                                EM_S[INDEX2(1,7,8)]+=2*wD0;
-                                EM_S[INDEX2(2,7,8)]+=2*wD0;
-                                EM_S[INDEX2(3,7,8)]+=4*wD0;
-                                EM_S[INDEX2(4,7,8)]+=2*wD0;
-                                EM_S[INDEX2(5,7,8)]+=4*wD0;
-                                EM_S[INDEX2(6,7,8)]+=4*wD0;
-                                EM_S[INDEX2(7,7,8)]+=8*wD0;
+                                const Scalar wD0 = 8.*D_p[0]*w55;
+                                EM_S[INDEX2(0,0,8)]+=8.*wD0;
+                                EM_S[INDEX2(1,0,8)]+=4.*wD0;
+                                EM_S[INDEX2(2,0,8)]+=4.*wD0;
+                                EM_S[INDEX2(3,0,8)]+=2.*wD0;
+                                EM_S[INDEX2(4,0,8)]+=4.*wD0;
+                                EM_S[INDEX2(5,0,8)]+=2.*wD0;
+                                EM_S[INDEX2(6,0,8)]+=2.*wD0;
+                                EM_S[INDEX2(7,0,8)]+=   wD0;
+                                EM_S[INDEX2(0,1,8)]+=4.*wD0;
+                                EM_S[INDEX2(1,1,8)]+=8.*wD0;
+                                EM_S[INDEX2(2,1,8)]+=2.*wD0;
+                                EM_S[INDEX2(3,1,8)]+=4.*wD0;
+                                EM_S[INDEX2(4,1,8)]+=2.*wD0;
+                                EM_S[INDEX2(5,1,8)]+=4.*wD0;
+                                EM_S[INDEX2(6,1,8)]+=   wD0;
+                                EM_S[INDEX2(7,1,8)]+=2.*wD0;
+                                EM_S[INDEX2(0,2,8)]+=4.*wD0;
+                                EM_S[INDEX2(1,2,8)]+=2.*wD0;
+                                EM_S[INDEX2(2,2,8)]+=8.*wD0;
+                                EM_S[INDEX2(3,2,8)]+=4.*wD0;
+                                EM_S[INDEX2(4,2,8)]+=2.*wD0;
+                                EM_S[INDEX2(5,2,8)]+=   wD0;
+                                EM_S[INDEX2(6,2,8)]+=4.*wD0;
+                                EM_S[INDEX2(7,2,8)]+=2.*wD0;
+                                EM_S[INDEX2(0,3,8)]+=2.*wD0;
+                                EM_S[INDEX2(1,3,8)]+=4.*wD0;
+                                EM_S[INDEX2(2,3,8)]+=4.*wD0;
+                                EM_S[INDEX2(3,3,8)]+=8.*wD0;
+                                EM_S[INDEX2(4,3,8)]+=   wD0;
+                                EM_S[INDEX2(5,3,8)]+=2.*wD0;
+                                EM_S[INDEX2(6,3,8)]+=2.*wD0;
+                                EM_S[INDEX2(7,3,8)]+=4.*wD0;
+                                EM_S[INDEX2(0,4,8)]+=4.*wD0;
+                                EM_S[INDEX2(1,4,8)]+=2.*wD0;
+                                EM_S[INDEX2(2,4,8)]+=2.*wD0;
+                                EM_S[INDEX2(3,4,8)]+=   wD0;
+                                EM_S[INDEX2(4,4,8)]+=8.*wD0;
+                                EM_S[INDEX2(5,4,8)]+=4.*wD0;
+                                EM_S[INDEX2(6,4,8)]+=4.*wD0;
+                                EM_S[INDEX2(7,4,8)]+=2.*wD0;
+                                EM_S[INDEX2(0,5,8)]+=2.*wD0;
+                                EM_S[INDEX2(1,5,8)]+=4.*wD0;
+                                EM_S[INDEX2(2,5,8)]+=   wD0;
+                                EM_S[INDEX2(3,5,8)]+=2.*wD0;
+                                EM_S[INDEX2(4,5,8)]+=4.*wD0;
+                                EM_S[INDEX2(5,5,8)]+=8.*wD0;
+                                EM_S[INDEX2(6,5,8)]+=2.*wD0;
+                                EM_S[INDEX2(7,5,8)]+=4.*wD0;
+                                EM_S[INDEX2(0,6,8)]+=2.*wD0;
+                                EM_S[INDEX2(1,6,8)]+=   wD0;
+                                EM_S[INDEX2(2,6,8)]+=4.*wD0;
+                                EM_S[INDEX2(3,6,8)]+=2.*wD0;
+                                EM_S[INDEX2(4,6,8)]+=4.*wD0;
+                                EM_S[INDEX2(5,6,8)]+=2.*wD0;
+                                EM_S[INDEX2(6,6,8)]+=8.*wD0;
+                                EM_S[INDEX2(7,6,8)]+=4.*wD0;
+                                EM_S[INDEX2(0,7,8)]+=   wD0;
+                                EM_S[INDEX2(1,7,8)]+=2.*wD0;
+                                EM_S[INDEX2(2,7,8)]+=2.*wD0;
+                                EM_S[INDEX2(3,7,8)]+=4.*wD0;
+                                EM_S[INDEX2(4,7,8)]+=2.*wD0;
+                                EM_S[INDEX2(5,7,8)]+=4.*wD0;
+                                EM_S[INDEX2(6,7,8)]+=4.*wD0;
+                                EM_S[INDEX2(7,7,8)]+=8.*wD0;
                             }
                         }
                         ///////////////
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double* X_p=X.getSampleDataRO(e);
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
                             if (X.actsExpanded()) {
-                                const double X_0_0 = X_p[INDEX2(0,0,3)];
-                                const double X_1_0 = X_p[INDEX2(1,0,3)];
-                                const double X_2_0 = X_p[INDEX2(2,0,3)];
-                                const double X_0_1 = X_p[INDEX2(0,1,3)];
-                                const double X_1_1 = X_p[INDEX2(1,1,3)];
-                                const double X_2_1 = X_p[INDEX2(2,1,3)];
-                                const double X_0_2 = X_p[INDEX2(0,2,3)];
-                                const double X_1_2 = X_p[INDEX2(1,2,3)];
-                                const double X_2_2 = X_p[INDEX2(2,2,3)];
-                                const double X_0_3 = X_p[INDEX2(0,3,3)];
-                                const double X_1_3 = X_p[INDEX2(1,3,3)];
-                                const double X_2_3 = X_p[INDEX2(2,3,3)];
-                                const double X_0_4 = X_p[INDEX2(0,4,3)];
-                                const double X_1_4 = X_p[INDEX2(1,4,3)];
-                                const double X_2_4 = X_p[INDEX2(2,4,3)];
-                                const double X_0_5 = X_p[INDEX2(0,5,3)];
-                                const double X_1_5 = X_p[INDEX2(1,5,3)];
-                                const double X_2_5 = X_p[INDEX2(2,5,3)];
-                                const double X_0_6 = X_p[INDEX2(0,6,3)];
-                                const double X_1_6 = X_p[INDEX2(1,6,3)];
-                                const double X_2_6 = X_p[INDEX2(2,6,3)];
-                                const double X_0_7 = X_p[INDEX2(0,7,3)];
-                                const double X_1_7 = X_p[INDEX2(1,7,3)];
-                                const double X_2_7 = X_p[INDEX2(2,7,3)];
-                                const double tmp0 = w66*(X_0_2 + X_0_3 + X_0_4 + X_0_5);
-                                const double tmp1 = w64*(X_1_1 + X_1_3 + X_1_4 + X_1_6);
-                                const double tmp2 = w61*(X_0_0 + X_0_1);
-                                const double tmp3 = w68*(X_1_5 + X_1_7);
-                                const double tmp4 = w65*(X_2_1 + X_2_2 + X_2_5 + X_2_6);
-                                const double tmp5 = w63*(X_2_0 + X_2_4);
-                                const double tmp6 = w67*(X_2_3 + X_2_7);
-                                const double tmp7 = w69*(X_0_6 + X_0_7);
-                                const double tmp8 = w62*(X_1_0 + X_1_2);
-                                const double tmp9 = w66*(-X_0_2 - X_0_3 - X_0_4 - X_0_5);
-                                const double tmp10 = w64*(X_1_0 + X_1_2 + X_1_5 + X_1_7);
-                                const double tmp11 = w61*(-X_0_0 - X_0_1);
-                                const double tmp12 = w68*(X_1_4 + X_1_6);
-                                const double tmp13 = w65*(X_2_0 + X_2_3 + X_2_4 + X_2_7);
-                                const double tmp14 = w63*(X_2_1 + X_2_5);
-                                const double tmp15 = w67*(X_2_2 + X_2_6);
-                                const double tmp16 = w69*(-X_0_6 - X_0_7);
-                                const double tmp17 = w62*(X_1_1 + X_1_3);
-                                const double tmp18 = w66*(X_0_0 + X_0_1 + X_0_6 + X_0_7);
-                                const double tmp19 = w64*(-X_1_1 - X_1_3 - X_1_4 - X_1_6);
-                                const double tmp20 = w61*(X_0_2 + X_0_3);
-                                const double tmp21 = w68*(-X_1_5 - X_1_7);
-                                const double tmp22 = w63*(X_2_2 + X_2_6);
-                                const double tmp23 = w67*(X_2_1 + X_2_5);
-                                const double tmp24 = w69*(X_0_4 + X_0_5);
-                                const double tmp25 = w62*(-X_1_0 - X_1_2);
-                                const double tmp26 = w66*(-X_0_0 - X_0_1 - X_0_6 - X_0_7);
-                                const double tmp27 = w64*(-X_1_0 - X_1_2 - X_1_5 - X_1_7);
-                                const double tmp28 = w61*(-X_0_2 - X_0_3);
-                                const double tmp29 = w68*(-X_1_4 - X_1_6);
-                                const double tmp30 = w63*(X_2_3 + X_2_7);
-                                const double tmp31 = w67*(X_2_0 + X_2_4);
-                                const double tmp32 = w69*(-X_0_4 - X_0_5);
-                                const double tmp33 = w62*(-X_1_1 - X_1_3);
-                                const double tmp34 = w61*(X_0_4 + X_0_5);
-                                const double tmp35 = w68*(X_1_1 + X_1_3);
-                                const double tmp36 = w65*(-X_2_1 - X_2_2 - X_2_5 - X_2_6);
-                                const double tmp37 = w63*(-X_2_0 - X_2_4);
-                                const double tmp38 = w67*(-X_2_3 - X_2_7);
-                                const double tmp39 = w69*(X_0_2 + X_0_3);
-                                const double tmp40 = w62*(X_1_4 + X_1_6);
-                                const double tmp41 = w61*(-X_0_4 - X_0_5);
-                                const double tmp42 = w68*(X_1_0 + X_1_2);
-                                const double tmp43 = w65*(-X_2_0 - X_2_3 - X_2_4 - X_2_7);
-                                const double tmp44 = w63*(-X_2_1 - X_2_5);
-                                const double tmp45 = w67*(-X_2_2 - X_2_6);
-                                const double tmp46 = w69*(-X_0_2 - X_0_3);
-                                const double tmp47 = w62*(X_1_5 + X_1_7);
-                                const double tmp48 = w61*(X_0_6 + X_0_7);
-                                const double tmp49 = w68*(-X_1_1 - X_1_3);
-                                const double tmp50 = w63*(-X_2_2 - X_2_6);
-                                const double tmp51 = w67*(-X_2_1 - X_2_5);
-                                const double tmp52 = w69*(X_0_0 + X_0_1);
-                                const double tmp53 = w62*(-X_1_4 - X_1_6);
-                                const double tmp54 = w61*(-X_0_6 - X_0_7);
-                                const double tmp55 = w68*(-X_1_0 - X_1_2);
-                                const double tmp56 = w63*(-X_2_3 - X_2_7);
-                                const double tmp57 = w67*(-X_2_0 - X_2_4);
-                                const double tmp58 = w69*(-X_0_0 - X_0_1);
-                                const double tmp59 = w62*(-X_1_5 - X_1_7);
+                                const Scalar X_0_0 = X_p[INDEX2(0,0,3)];
+                                const Scalar X_1_0 = X_p[INDEX2(1,0,3)];
+                                const Scalar X_2_0 = X_p[INDEX2(2,0,3)];
+                                const Scalar X_0_1 = X_p[INDEX2(0,1,3)];
+                                const Scalar X_1_1 = X_p[INDEX2(1,1,3)];
+                                const Scalar X_2_1 = X_p[INDEX2(2,1,3)];
+                                const Scalar X_0_2 = X_p[INDEX2(0,2,3)];
+                                const Scalar X_1_2 = X_p[INDEX2(1,2,3)];
+                                const Scalar X_2_2 = X_p[INDEX2(2,2,3)];
+                                const Scalar X_0_3 = X_p[INDEX2(0,3,3)];
+                                const Scalar X_1_3 = X_p[INDEX2(1,3,3)];
+                                const Scalar X_2_3 = X_p[INDEX2(2,3,3)];
+                                const Scalar X_0_4 = X_p[INDEX2(0,4,3)];
+                                const Scalar X_1_4 = X_p[INDEX2(1,4,3)];
+                                const Scalar X_2_4 = X_p[INDEX2(2,4,3)];
+                                const Scalar X_0_5 = X_p[INDEX2(0,5,3)];
+                                const Scalar X_1_5 = X_p[INDEX2(1,5,3)];
+                                const Scalar X_2_5 = X_p[INDEX2(2,5,3)];
+                                const Scalar X_0_6 = X_p[INDEX2(0,6,3)];
+                                const Scalar X_1_6 = X_p[INDEX2(1,6,3)];
+                                const Scalar X_2_6 = X_p[INDEX2(2,6,3)];
+                                const Scalar X_0_7 = X_p[INDEX2(0,7,3)];
+                                const Scalar X_1_7 = X_p[INDEX2(1,7,3)];
+                                const Scalar X_2_7 = X_p[INDEX2(2,7,3)];
+                                const Scalar tmp0 = w66*(X_0_2 + X_0_3 + X_0_4 + X_0_5);
+                                const Scalar tmp1 = w64*(X_1_1 + X_1_3 + X_1_4 + X_1_6);
+                                const Scalar tmp2 = w61*(X_0_0 + X_0_1);
+                                const Scalar tmp3 = w68*(X_1_5 + X_1_7);
+                                const Scalar tmp4 = w65*(X_2_1 + X_2_2 + X_2_5 + X_2_6);
+                                const Scalar tmp5 = w63*(X_2_0 + X_2_4);
+                                const Scalar tmp6 = w67*(X_2_3 + X_2_7);
+                                const Scalar tmp7 = w69*(X_0_6 + X_0_7);
+                                const Scalar tmp8 = w62*(X_1_0 + X_1_2);
+                                const Scalar tmp9 = w66*(-X_0_2 - X_0_3 - X_0_4 - X_0_5);
+                                const Scalar tmp10 = w64*(X_1_0 + X_1_2 + X_1_5 + X_1_7);
+                                const Scalar tmp11 = w61*(-X_0_0 - X_0_1);
+                                const Scalar tmp12 = w68*(X_1_4 + X_1_6);
+                                const Scalar tmp13 = w65*(X_2_0 + X_2_3 + X_2_4 + X_2_7);
+                                const Scalar tmp14 = w63*(X_2_1 + X_2_5);
+                                const Scalar tmp15 = w67*(X_2_2 + X_2_6);
+                                const Scalar tmp16 = w69*(-X_0_6 - X_0_7);
+                                const Scalar tmp17 = w62*(X_1_1 + X_1_3);
+                                const Scalar tmp18 = w66*(X_0_0 + X_0_1 + X_0_6 + X_0_7);
+                                const Scalar tmp19 = w64*(-X_1_1 - X_1_3 - X_1_4 - X_1_6);
+                                const Scalar tmp20 = w61*(X_0_2 + X_0_3);
+                                const Scalar tmp21 = w68*(-X_1_5 - X_1_7);
+                                const Scalar tmp22 = w63*(X_2_2 + X_2_6);
+                                const Scalar tmp23 = w67*(X_2_1 + X_2_5);
+                                const Scalar tmp24 = w69*(X_0_4 + X_0_5);
+                                const Scalar tmp25 = w62*(-X_1_0 - X_1_2);
+                                const Scalar tmp26 = w66*(-X_0_0 - X_0_1 - X_0_6 - X_0_7);
+                                const Scalar tmp27 = w64*(-X_1_0 - X_1_2 - X_1_5 - X_1_7);
+                                const Scalar tmp28 = w61*(-X_0_2 - X_0_3);
+                                const Scalar tmp29 = w68*(-X_1_4 - X_1_6);
+                                const Scalar tmp30 = w63*(X_2_3 + X_2_7);
+                                const Scalar tmp31 = w67*(X_2_0 + X_2_4);
+                                const Scalar tmp32 = w69*(-X_0_4 - X_0_5);
+                                const Scalar tmp33 = w62*(-X_1_1 - X_1_3);
+                                const Scalar tmp34 = w61*(X_0_4 + X_0_5);
+                                const Scalar tmp35 = w68*(X_1_1 + X_1_3);
+                                const Scalar tmp36 = w65*(-X_2_1 - X_2_2 - X_2_5 - X_2_6);
+                                const Scalar tmp37 = w63*(-X_2_0 - X_2_4);
+                                const Scalar tmp38 = w67*(-X_2_3 - X_2_7);
+                                const Scalar tmp39 = w69*(X_0_2 + X_0_3);
+                                const Scalar tmp40 = w62*(X_1_4 + X_1_6);
+                                const Scalar tmp41 = w61*(-X_0_4 - X_0_5);
+                                const Scalar tmp42 = w68*(X_1_0 + X_1_2);
+                                const Scalar tmp43 = w65*(-X_2_0 - X_2_3 - X_2_4 - X_2_7);
+                                const Scalar tmp44 = w63*(-X_2_1 - X_2_5);
+                                const Scalar tmp45 = w67*(-X_2_2 - X_2_6);
+                                const Scalar tmp46 = w69*(-X_0_2 - X_0_3);
+                                const Scalar tmp47 = w62*(X_1_5 + X_1_7);
+                                const Scalar tmp48 = w61*(X_0_6 + X_0_7);
+                                const Scalar tmp49 = w68*(-X_1_1 - X_1_3);
+                                const Scalar tmp50 = w63*(-X_2_2 - X_2_6);
+                                const Scalar tmp51 = w67*(-X_2_1 - X_2_5);
+                                const Scalar tmp52 = w69*(X_0_0 + X_0_1);
+                                const Scalar tmp53 = w62*(-X_1_4 - X_1_6);
+                                const Scalar tmp54 = w61*(-X_0_6 - X_0_7);
+                                const Scalar tmp55 = w68*(-X_1_0 - X_1_2);
+                                const Scalar tmp56 = w63*(-X_2_3 - X_2_7);
+                                const Scalar tmp57 = w67*(-X_2_0 - X_2_4);
+                                const Scalar tmp58 = w69*(-X_0_0 - X_0_1);
+                                const Scalar tmp59 = w62*(-X_1_5 - X_1_7);
                                 EM_F[0]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 + tmp7 + tmp8;
                                 EM_F[1]+=tmp10 + tmp11 + tmp12 + tmp13 + tmp14 + tmp15 + tmp16 + tmp17 + tmp9;
                                 EM_F[2]+=tmp13 + tmp18 + tmp19 + tmp20 + tmp21 + tmp22 + tmp23 + tmp24 + tmp25;
@@ -2135,9 +2150,9 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_F[6]+=tmp0 + tmp27 + tmp43 + tmp48 + tmp49 + tmp50 + tmp51 + tmp52 + tmp53;
                                 EM_F[7]+=tmp19 + tmp36 + tmp54 + tmp55 + tmp56 + tmp57 + tmp58 + tmp59 + tmp9;
                             } else { // constant data
-                                const double wX0 = 12*X_p[0]*w66;
-                                const double wX1 = 12*X_p[1]*w64;
-                                const double wX2 = 18*X_p[2]*w50;
+                                const Scalar wX0 = 12.*X_p[0]*w66;
+                                const Scalar wX1 = 12.*X_p[1]*w64;
+                                const Scalar wX2 = 18.*X_p[2]*w50;
                                 EM_F[0]+= wX0 + wX1 - wX2;
                                 EM_F[1]+=-wX0 + wX1 - wX2;
                                 EM_F[2]+= wX0 - wX1 - wX2;
@@ -2152,32 +2167,32 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double* Y_p=Y.getSampleDataRO(e);
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                             if (Y.actsExpanded()) {
-                                const double Y_0 = Y_p[0];
-                                const double Y_1 = Y_p[1];
-                                const double Y_2 = Y_p[2];
-                                const double Y_3 = Y_p[3];
-                                const double Y_4 = Y_p[4];
-                                const double Y_5 = Y_p[5];
-                                const double Y_6 = Y_p[6];
-                                const double Y_7 = Y_p[7];
-                                const double tmp0 = w72*(Y_3 + Y_5 + Y_6);
-                                const double tmp1 = w71*(Y_1 + Y_2 + Y_4);
-                                const double tmp2 = w72*(Y_2 + Y_4 + Y_7);
-                                const double tmp3 = w71*(Y_0 + Y_3 + Y_5);
-                                const double tmp4 = w72*(Y_1 + Y_4 + Y_7);
-                                const double tmp5 = w71*(Y_0 + Y_3 + Y_6);
-                                const double tmp6 = w72*(Y_0 + Y_5 + Y_6);
-                                const double tmp7 = w71*(Y_1 + Y_2 + Y_7);
-                                const double tmp8 = w72*(Y_1 + Y_2 + Y_7);
-                                const double tmp9 = w71*(Y_0 + Y_5 + Y_6);
-                                const double tmp10 = w72*(Y_0 + Y_3 + Y_6);
-                                const double tmp11 = w71*(Y_1 + Y_4 + Y_7);
-                                const double tmp12 = w72*(Y_0 + Y_3 + Y_5);
-                                const double tmp13 = w71*(Y_2 + Y_4 + Y_7);
-                                const double tmp14 = w72*(Y_1 + Y_2 + Y_4);
-                                const double tmp15 = w71*(Y_3 + Y_5 + Y_6);
+                                const Scalar Y_0 = Y_p[0];
+                                const Scalar Y_1 = Y_p[1];
+                                const Scalar Y_2 = Y_p[2];
+                                const Scalar Y_3 = Y_p[3];
+                                const Scalar Y_4 = Y_p[4];
+                                const Scalar Y_5 = Y_p[5];
+                                const Scalar Y_6 = Y_p[6];
+                                const Scalar Y_7 = Y_p[7];
+                                const Scalar tmp0 = w72*(Y_3 + Y_5 + Y_6);
+                                const Scalar tmp1 = w71*(Y_1 + Y_2 + Y_4);
+                                const Scalar tmp2 = w72*(Y_2 + Y_4 + Y_7);
+                                const Scalar tmp3 = w71*(Y_0 + Y_3 + Y_5);
+                                const Scalar tmp4 = w72*(Y_1 + Y_4 + Y_7);
+                                const Scalar tmp5 = w71*(Y_0 + Y_3 + Y_6);
+                                const Scalar tmp6 = w72*(Y_0 + Y_5 + Y_6);
+                                const Scalar tmp7 = w71*(Y_1 + Y_2 + Y_7);
+                                const Scalar tmp8 = w72*(Y_1 + Y_2 + Y_7);
+                                const Scalar tmp9 = w71*(Y_0 + Y_5 + Y_6);
+                                const Scalar tmp10 = w72*(Y_0 + Y_3 + Y_6);
+                                const Scalar tmp11 = w71*(Y_1 + Y_4 + Y_7);
+                                const Scalar tmp12 = w72*(Y_0 + Y_3 + Y_5);
+                                const Scalar tmp13 = w71*(Y_2 + Y_4 + Y_7);
+                                const Scalar tmp14 = w72*(Y_1 + Y_2 + Y_4);
+                                const Scalar tmp15 = w71*(Y_3 + Y_5 + Y_6);
                                 EM_F[0]+=Y_0*w70 + Y_7*w73 + tmp0 + tmp1;
                                 EM_F[1]+=Y_1*w70 + Y_6*w73 + tmp2 + tmp3;
                                 EM_F[2]+=Y_2*w70 + Y_5*w73 + tmp4 + tmp5;
@@ -2187,14 +2202,14 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
                                 EM_F[6]+=Y_1*w73 + Y_6*w70 + tmp12 + tmp13;
                                 EM_F[7]+=Y_0*w73 + Y_7*w70 + tmp14 + tmp15;
                             } else { // constant data
-                                EM_F[0]+=216*Y_p[0]*w55;
-                                EM_F[1]+=216*Y_p[0]*w55;
-                                EM_F[2]+=216*Y_p[0]*w55;
-                                EM_F[3]+=216*Y_p[0]*w55;
-                                EM_F[4]+=216*Y_p[0]*w55;
-                                EM_F[5]+=216*Y_p[0]*w55;
-                                EM_F[6]+=216*Y_p[0]*w55;
-                                EM_F[7]+=216*Y_p[0]*w55;
+                                EM_F[0]+=216.*Y_p[0]*w55;
+                                EM_F[1]+=216.*Y_p[0]*w55;
+                                EM_F[2]+=216.*Y_p[0]*w55;
+                                EM_F[3]+=216.*Y_p[0]*w55;
+                                EM_F[4]+=216.*Y_p[0]*w55;
+                                EM_F[5]+=216.*Y_p[0]*w55;
+                                EM_F[6]+=216.*Y_p[0]*w55;
+                                EM_F[7]+=216.*Y_p[0]*w55;
                             }
                         }
 
@@ -2213,8 +2228,10 @@ void DefaultAssembler3D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
 // PDE SINGLE BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
-                                Data& rhs, const Data& d, const Data& y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySingle(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& d, const Data& y) const
 {
     const double SQRT3 = 1.73205080756887719318;
     const double w12 = m_dx[0]*m_dx[1]/144;
@@ -2232,26 +2249,27 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
     const double w1 = w2*(SQRT3 + 2);
     const double w3 = w2*(-4*SQRT3 + 7);
     const double w4 = w2*(4*SQRT3 + 7);
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = !d.isEmpty();
     const bool add_EM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8);
-        vector<double> EM_F(8);
+        vector<Scalar> EM_S(8*8);
+        vector<Scalar> EM_F(8);
 
         if (domain->m_faceOffset[0] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[1] = 0;
-                EM_F[3] = 0;
-                EM_F[5] = 0;
-                EM_F[7] = 0;
+                EM_F[1] = zero;
+                EM_F[3] = zero;
+                EM_F[5] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -2263,23 +2281,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w0*(d_0 + d_1);
-                                const double tmp1 = w1*(d_2 + d_3);
-                                const double tmp2 = w0*(d_0 + d_2);
-                                const double tmp3 = w1*(d_1 + d_3);
-                                const double tmp4 = w0*(d_1 + d_3);
-                                const double tmp5 = w1*(d_0 + d_2);
-                                const double tmp6 = w0*(d_2 + d_3);
-                                const double tmp7 = w1*(d_0 + d_1);
-                                const double tmp8 = w2*(d_0 + d_3);
-                                const double tmp9 = w2*(d_1 + d_2);
-                                const double tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w0*(d_0 + d_1);
+                                const Scalar tmp1 = w1*(d_2 + d_3);
+                                const Scalar tmp2 = w0*(d_0 + d_2);
+                                const Scalar tmp3 = w1*(d_1 + d_3);
+                                const Scalar tmp4 = w0*(d_1 + d_3);
+                                const Scalar tmp5 = w1*(d_0 + d_2);
+                                const Scalar tmp6 = w0*(d_2 + d_3);
+                                const Scalar tmp7 = w1*(d_0 + d_1);
+                                const Scalar tmp8 = w2*(d_0 + d_3);
+                                const Scalar tmp9 = w2*(d_1 + d_2);
+                                const Scalar tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
                                 EM_S[INDEX2(0,0,8)] = d_0*w4 + d_3*w3 + tmp9;
                                 EM_S[INDEX2(2,0,8)] = tmp6 + tmp7;
                                 EM_S[INDEX2(4,0,8)] = tmp4 + tmp5;
@@ -2297,46 +2315,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(4,6,8)] = tmp0 + tmp1;
                                 EM_S[INDEX2(6,6,8)] = d_0*w3 + d_3*w4 + tmp9;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w2;
-                                EM_S[INDEX2(0,0,8)] = 4*wd0;
-                                EM_S[INDEX2(2,0,8)] = 2*wd0;
-                                EM_S[INDEX2(4,0,8)] = 2*wd0;
-                                EM_S[INDEX2(6,0,8)] =   wd0;
-                                EM_S[INDEX2(0,2,8)] = 2*wd0;
-                                EM_S[INDEX2(2,2,8)] = 4*wd0;
-                                EM_S[INDEX2(4,2,8)] =   wd0;
-                                EM_S[INDEX2(6,2,8)] = 2*wd0;
-                                EM_S[INDEX2(0,4,8)] = 2*wd0;
-                                EM_S[INDEX2(2,4,8)] =   wd0;
-                                EM_S[INDEX2(4,4,8)] = 4*wd0;
-                                EM_S[INDEX2(6,4,8)] = 2*wd0;
-                                EM_S[INDEX2(0,6,8)] =   wd0;
-                                EM_S[INDEX2(2,6,8)] = 2*wd0;
-                                EM_S[INDEX2(4,6,8)] = 2*wd0;
-                                EM_S[INDEX2(6,6,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w2;
+                                EM_S[INDEX2(0,0,8)] = 4.*wd0;
+                                EM_S[INDEX2(2,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,0,8)] =    wd0;
+                                EM_S[INDEX2(0,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,2,8)] = 4.*wd0;
+                                EM_S[INDEX2(4,2,8)] =    wd0;
+                                EM_S[INDEX2(6,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,4,8)] =    wd0;
+                                EM_S[INDEX2(4,4,8)] = 4.*wd0;
+                                EM_S[INDEX2(6,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,6,8)] =    wd0;
+                                EM_S[INDEX2(2,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,6,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w2*(y_1 + y_2);
-                                const double tmp1 = 6*w2*(y_0 + y_3);
-                                EM_F[0] = tmp0 + 6*w0*y_3 + 6*w1*y_0;
-                                EM_F[2] = tmp1 + 6*w0*y_2 + 6*w1*y_1;
-                                EM_F[4] = tmp1 + 6*w0*y_1 + 6*w1*y_2;
-                                EM_F[6] = tmp0 + 6*w0*y_0 + 6*w1*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w2*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w2*(y_0 + y_3);
+                                EM_F[0] = tmp0 + 6.*w0*y_3 + 6.*w1*y_0;
+                                EM_F[2] = tmp1 + 6.*w0*y_2 + 6.*w1*y_1;
+                                EM_F[4] = tmp1 + 6.*w0*y_1 + 6.*w1*y_2;
+                                EM_F[6] = tmp0 + 6.*w0*y_0 + 6.*w1*y_3;
                             } else { // constant data
-                                EM_F[0] = 36*w2*y_p[0];
-                                EM_F[2] = 36*w2*y_p[0];
-                                EM_F[4] = 36*w2*y_p[0];
-                                EM_F[6] = 36*w2*y_p[0];
+                                EM_F[0] = 36.*w2*y_p[0];
+                                EM_F[2] = 36.*w2*y_p[0];
+                                EM_F[4] = 36.*w2*y_p[0];
+                                EM_F[6] = 36.*w2*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*k1;
@@ -2349,12 +2367,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[1] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[2] = 0;
-                EM_F[4] = 0;
-                EM_F[6] = 0;
+                EM_F[0] = zero;
+                EM_F[2] = zero;
+                EM_F[4] = zero;
+                EM_F[6] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -2366,23 +2384,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w0*(d_0 + d_2);
-                                const double tmp1 = w1*(d_1 + d_3);
-                                const double tmp2 = w0*(d_2 + d_3);
-                                const double tmp3 = w1*(d_0 + d_1);
-                                const double tmp4 = w0*(d_1 + d_3);
-                                const double tmp5 = w1*(d_0 + d_2);
-                                const double tmp6 = w2*(d_0 + d_3);
-                                const double tmp7 = w2*(d_1 + d_2);
-                                const double tmp8 = w0*(d_0 + d_1);
-                                const double tmp9 = w1*(d_2 + d_3);
-                                const double tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w0*(d_0 + d_2);
+                                const Scalar tmp1 = w1*(d_1 + d_3);
+                                const Scalar tmp2 = w0*(d_2 + d_3);
+                                const Scalar tmp3 = w1*(d_0 + d_1);
+                                const Scalar tmp4 = w0*(d_1 + d_3);
+                                const Scalar tmp5 = w1*(d_0 + d_2);
+                                const Scalar tmp6 = w2*(d_0 + d_3);
+                                const Scalar tmp7 = w2*(d_1 + d_2);
+                                const Scalar tmp8 = w0*(d_0 + d_1);
+                                const Scalar tmp9 = w1*(d_2 + d_3);
+                                const Scalar tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
                                 EM_S[INDEX2(1,1,8)] = d_0*w4 + d_3*w3 + tmp7;
                                 EM_S[INDEX2(3,1,8)] = tmp2 + tmp3;
                                 EM_S[INDEX2(5,1,8)] = tmp4 + tmp5;
@@ -2400,46 +2418,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(5,7,8)] = tmp8 + tmp9;
                                 EM_S[INDEX2(7,7,8)] = d_0*w3 + d_3*w4 + tmp7;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w2;
-                                EM_S[INDEX2(1,1,8)] = 4*wd0;
-                                EM_S[INDEX2(3,1,8)] = 2*wd0;
-                                EM_S[INDEX2(5,1,8)] = 2*wd0;
-                                EM_S[INDEX2(7,1,8)] =   wd0;
-                                EM_S[INDEX2(1,3,8)] = 2*wd0;
-                                EM_S[INDEX2(3,3,8)] = 4*wd0;
-                                EM_S[INDEX2(5,3,8)] =   wd0;
-                                EM_S[INDEX2(7,3,8)] = 2*wd0;
-                                EM_S[INDEX2(1,5,8)] = 2*wd0;
-                                EM_S[INDEX2(3,5,8)] =   wd0;
-                                EM_S[INDEX2(5,5,8)] = 4*wd0;
-                                EM_S[INDEX2(7,5,8)] = 2*wd0;
-                                EM_S[INDEX2(1,7,8)] =   wd0;
-                                EM_S[INDEX2(3,7,8)] = 2*wd0;
-                                EM_S[INDEX2(5,7,8)] = 2*wd0;
-                                EM_S[INDEX2(7,7,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w2;
+                                EM_S[INDEX2(1,1,8)] = 4.*wd0;
+                                EM_S[INDEX2(3,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,1,8)] =    wd0;
+                                EM_S[INDEX2(1,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,3,8)] = 4.*wd0;
+                                EM_S[INDEX2(5,3,8)] =    wd0;
+                                EM_S[INDEX2(7,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,5,8)] =    wd0;
+                                EM_S[INDEX2(5,5,8)] = 4.*wd0;
+                                EM_S[INDEX2(7,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,7,8)] =    wd0;
+                                EM_S[INDEX2(3,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,7,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w2*(y_1 + y_2);
-                                const double tmp1 = 6*w2*(y_0 + y_3);
-                                EM_F[1] = tmp0 + 6*w0*y_3 + 6*w1*y_0;
-                                EM_F[3] = tmp1 + 6*w0*y_2 + 6*w1*y_1;
-                                EM_F[5] = tmp1 + 6*w0*y_1 + 6*w1*y_2;
-                                EM_F[7] = tmp0 + 6*w0*y_0 + 6*w1*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w2*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w2*(y_0 + y_3);
+                                EM_F[1] = tmp0 + 6.*w0*y_3 + 6.*w1*y_0;
+                                EM_F[3] = tmp1 + 6.*w0*y_2 + 6.*w1*y_1;
+                                EM_F[5] = tmp1 + 6.*w0*y_1 + 6.*w1*y_2;
+                                EM_F[7] = tmp0 + 6.*w0*y_0 + 6.*w1*y_3;
                             } else { // constant data
-                                EM_F[1] = 36*w2*y_p[0];
-                                EM_F[3] = 36*w2*y_p[0];
-                                EM_F[5] = 36*w2*y_p[0];
-                                EM_F[7] = 36*w2*y_p[0];
+                                EM_F[1] = 36.*w2*y_p[0];
+                                EM_F[3] = 36.*w2*y_p[0];
+                                EM_F[5] = 36.*w2*y_p[0];
+                                EM_F[7] = 36.*w2*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(k1+1)-2;
@@ -2452,12 +2470,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[2] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[2] = 0;
-                EM_F[3] = 0;
-                EM_F[6] = 0;
-                EM_F[7] = 0;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
+                EM_F[6] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -2469,23 +2487,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w5*(d_0 + d_1);
-                                const double tmp1 = w6*(d_2 + d_3);
-                                const double tmp2 = w5*(d_0 + d_2);
-                                const double tmp3 = w6*(d_1 + d_3);
-                                const double tmp4 = w5*(d_1 + d_3);
-                                const double tmp5 = w6*(d_0 + d_2);
-                                const double tmp6 = w7*(d_0 + d_3);
-                                const double tmp7 = w7*(d_0 + d_1 + d_2 + d_3);
-                                const double tmp8 = w7*(d_1 + d_2);
-                                const double tmp9 = w5*(d_2 + d_3);
-                                const double tmp10 = w6*(d_0 + d_1);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w5*(d_0 + d_1);
+                                const Scalar tmp1 = w6*(d_2 + d_3);
+                                const Scalar tmp2 = w5*(d_0 + d_2);
+                                const Scalar tmp3 = w6*(d_1 + d_3);
+                                const Scalar tmp4 = w5*(d_1 + d_3);
+                                const Scalar tmp5 = w6*(d_0 + d_2);
+                                const Scalar tmp6 = w7*(d_0 + d_3);
+                                const Scalar tmp7 = w7*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar tmp8 = w7*(d_1 + d_2);
+                                const Scalar tmp9 = w5*(d_2 + d_3);
+                                const Scalar tmp10 = w6*(d_0 + d_1);
                                 EM_S[INDEX2(0,0,8)] = d_0*w9 + d_3*w8 + tmp8;
                                 EM_S[INDEX2(1,0,8)] = tmp10 + tmp9;
                                 EM_S[INDEX2(4,0,8)] = tmp4 + tmp5;
@@ -2503,46 +2521,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(4,5,8)] = tmp0 + tmp1;
                                 EM_S[INDEX2(5,5,8)] = d_0*w8 + d_3*w9 + tmp8;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w7;
-                                EM_S[INDEX2(0,0,8)] = 4*wd0;
-                                EM_S[INDEX2(1,0,8)] = 2*wd0;
-                                EM_S[INDEX2(4,0,8)] = 2*wd0;
-                                EM_S[INDEX2(5,0,8)] =   wd0;
-                                EM_S[INDEX2(0,1,8)] = 2*wd0;
-                                EM_S[INDEX2(1,1,8)] = 4*wd0;
-                                EM_S[INDEX2(4,1,8)] =   wd0;
-                                EM_S[INDEX2(5,1,8)] = 2*wd0;
-                                EM_S[INDEX2(0,4,8)] = 2*wd0;
-                                EM_S[INDEX2(1,4,8)] =   wd0;
-                                EM_S[INDEX2(4,4,8)] = 4*wd0;
-                                EM_S[INDEX2(5,4,8)] = 2*wd0;
-                                EM_S[INDEX2(0,5,8)] =   wd0;
-                                EM_S[INDEX2(1,5,8)] = 2*wd0;
-                                EM_S[INDEX2(4,5,8)] = 2*wd0;
-                                EM_S[INDEX2(5,5,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w7;
+                                EM_S[INDEX2(0,0,8)] = 4.*wd0;
+                                EM_S[INDEX2(1,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,0,8)] =    wd0;
+                                EM_S[INDEX2(0,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,1,8)] = 4.*wd0;
+                                EM_S[INDEX2(4,1,8)] =    wd0;
+                                EM_S[INDEX2(5,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,4,8)] =    wd0;
+                                EM_S[INDEX2(4,4,8)] = 4.*wd0;
+                                EM_S[INDEX2(5,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,5,8)] =    wd0;
+                                EM_S[INDEX2(1,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,5,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w7*(y_1 + y_2);
-                                const double tmp1 = 6*w7*(y_0 + y_3);
-                                EM_F[0] = tmp0 + 6*w5*y_3 + 6*w6*y_0;
-                                EM_F[1] = tmp1 + 6*w5*y_2 + 6*w6*y_1;
-                                EM_F[4] = tmp1 + 6*w5*y_1 + 6*w6*y_2;
-                                EM_F[5] = tmp0 + 6*w5*y_0 + 6*w6*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w7*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w7*(y_0 + y_3);
+                                EM_F[0] = tmp0 + 6.*w5*y_3 + 6.*w6*y_0;
+                                EM_F[1] = tmp1 + 6.*w5*y_2 + 6.*w6*y_1;
+                                EM_F[4] = tmp1 + 6.*w5*y_1 + 6.*w6*y_2;
+                                EM_F[5] = tmp0 + 6.*w5*y_0 + 6.*w6*y_3;
                             } else { // constant data
-                                EM_F[0] = 36*w7*y_p[0];
-                                EM_F[1] = 36*w7*y_p[0];
-                                EM_F[4] = 36*w7*y_p[0];
-                                EM_F[5] = 36*w7*y_p[0];
+                                EM_F[0] = 36.*w7*y_p[0];
+                                EM_F[1] = 36.*w7*y_p[0];
+                                EM_F[4] = 36.*w7*y_p[0];
+                                EM_F[5] = 36.*w7*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+k0;
@@ -2555,12 +2573,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[3] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
-                EM_F[4] = 0;
-                EM_F[5] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
+                EM_F[4] = zero;
+                EM_F[5] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -2572,23 +2590,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w5*(d_0 + d_2);
-                                const double tmp1 = w6*(d_1 + d_3);
-                                const double tmp2 = w5*(d_1 + d_3);
-                                const double tmp3 = w6*(d_0 + d_2);
-                                const double tmp4 = w7*(d_0 + d_1 + d_2 + d_3);
-                                const double tmp5 = w5*(d_0 + d_1);
-                                const double tmp6 = w6*(d_2 + d_3);
-                                const double tmp7 = w7*(d_0 + d_3);
-                                const double tmp8 = w7*(d_1 + d_2);
-                                const double tmp9 = w5*(d_2 + d_3);
-                                const double tmp10 = w6*(d_0 + d_1);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w5*(d_0 + d_2);
+                                const Scalar tmp1 = w6*(d_1 + d_3);
+                                const Scalar tmp2 = w5*(d_1 + d_3);
+                                const Scalar tmp3 = w6*(d_0 + d_2);
+                                const Scalar tmp4 = w7*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar tmp5 = w5*(d_0 + d_1);
+                                const Scalar tmp6 = w6*(d_2 + d_3);
+                                const Scalar tmp7 = w7*(d_0 + d_3);
+                                const Scalar tmp8 = w7*(d_1 + d_2);
+                                const Scalar tmp9 = w5*(d_2 + d_3);
+                                const Scalar tmp10 = w6*(d_0 + d_1);
                                 EM_S[INDEX2(2,2,8)] = d_0*w9 + d_3*w8 + tmp8;
                                 EM_S[INDEX2(3,2,8)] = tmp10 + tmp9;
                                 EM_S[INDEX2(6,2,8)] = tmp2 + tmp3;
@@ -2606,46 +2624,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(6,7,8)] = tmp5 + tmp6;
                                 EM_S[INDEX2(7,7,8)] = d_0*w8 + d_3*w9 + tmp8;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w7;
-                                EM_S[INDEX2(2,2,8)] = 4*wd0;
-                                EM_S[INDEX2(3,2,8)] = 2*wd0;
-                                EM_S[INDEX2(6,2,8)] = 2*wd0;
-                                EM_S[INDEX2(7,2,8)] =   wd0;
-                                EM_S[INDEX2(2,3,8)] = 2*wd0;
-                                EM_S[INDEX2(3,3,8)] = 4*wd0;
-                                EM_S[INDEX2(6,3,8)] =   wd0;
-                                EM_S[INDEX2(7,3,8)] = 2*wd0;
-                                EM_S[INDEX2(2,6,8)] = 2*wd0;
-                                EM_S[INDEX2(3,6,8)] =   wd0;
-                                EM_S[INDEX2(6,6,8)] = 4*wd0;
-                                EM_S[INDEX2(7,6,8)] = 2*wd0;
-                                EM_S[INDEX2(2,7,8)] =   wd0;
-                                EM_S[INDEX2(3,7,8)] = 2*wd0;
-                                EM_S[INDEX2(6,7,8)] = 2*wd0;
-                                EM_S[INDEX2(7,7,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w7;
+                                EM_S[INDEX2(2,2,8)] = 4.*wd0;
+                                EM_S[INDEX2(3,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,2,8)] =    wd0;
+                                EM_S[INDEX2(2,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,3,8)] = 4.*wd0;
+                                EM_S[INDEX2(6,3,8)] =    wd0;
+                                EM_S[INDEX2(7,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,6,8)] =    wd0;
+                                EM_S[INDEX2(6,6,8)] = 4.*wd0;
+                                EM_S[INDEX2(7,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,7,8)] =    wd0;
+                                EM_S[INDEX2(3,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,7,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w7*(y_1 + y_2);
-                                const double tmp1 = 6*w7*(y_0 + y_3);
-                                EM_F[2] = tmp0 + 6*w5*y_3 + 6*w6*y_0;
-                                EM_F[3] = tmp1 + 6*w5*y_2 + 6*w6*y_1;
-                                EM_F[6] = tmp1 + 6*w5*y_1 + 6*w6*y_2;
-                                EM_F[7] = tmp0 + 6*w5*y_0 + 6*w6*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w7*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w7*(y_0 + y_3);
+                                EM_F[2] = tmp0 + 6.*w5*y_3 + 6.*w6*y_0;
+                                EM_F[3] = tmp1 + 6.*w5*y_2 + 6.*w6*y_1;
+                                EM_F[6] = tmp1 + 6.*w5*y_1 + 6.*w6*y_2;
+                                EM_F[7] = tmp0 + 6.*w5*y_0 + 6.*w6*y_3;
                             } else { // constant data
-                                EM_F[2] = 36*w7*y_p[0];
-                                EM_F[3] = 36*w7*y_p[0];
-                                EM_F[6] = 36*w7*y_p[0];
-                                EM_F[7] = 36*w7*y_p[0];
+                                EM_F[2] = 36.*w7*y_p[0];
+                                EM_F[3] = 36.*w7*y_p[0];
+                                EM_F[6] = 36.*w7*y_p[0];
+                                EM_F[7] = 36.*w7*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(m_NN[1]-2)+k0;
@@ -2658,12 +2676,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[4] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[4] = 0;
-                EM_F[5] = 0;
-                EM_F[6] = 0;
-                EM_F[7] = 0;
+                EM_F[4] = zero;
+                EM_F[5] = zero;
+                EM_F[6] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -2675,23 +2693,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w10*(d_0 + d_2);
-                                const double tmp1 = w11*(d_1 + d_3);
-                                const double tmp2 = w12*(d_0 + d_1 + d_2 + d_3);
-                                const double tmp3 = w12*(d_1 + d_2);
-                                const double tmp4 = w10*(d_1 + d_3);
-                                const double tmp5 = w11*(d_0 + d_2);
-                                const double tmp6 = w12*(d_0 + d_3);
-                                const double tmp7 = w10*(d_0 + d_1);
-                                const double tmp8 = w11*(d_2 + d_3);
-                                const double tmp9 = w10*(d_2 + d_3);
-                                const double tmp10 = w11*(d_0 + d_1);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w10*(d_0 + d_2);
+                                const Scalar tmp1 = w11*(d_1 + d_3);
+                                const Scalar tmp2 = w12*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar tmp3 = w12*(d_1 + d_2);
+                                const Scalar tmp4 = w10*(d_1 + d_3);
+                                const Scalar tmp5 = w11*(d_0 + d_2);
+                                const Scalar tmp6 = w12*(d_0 + d_3);
+                                const Scalar tmp7 = w10*(d_0 + d_1);
+                                const Scalar tmp8 = w11*(d_2 + d_3);
+                                const Scalar tmp9 = w10*(d_2 + d_3);
+                                const Scalar tmp10 = w11*(d_0 + d_1);
                                 EM_S[INDEX2(0,0,8)] = d_0*w14 + d_3*w13 + tmp3;
                                 EM_S[INDEX2(1,0,8)] = tmp10 + tmp9;
                                 EM_S[INDEX2(2,0,8)] = tmp4 + tmp5;
@@ -2709,46 +2727,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(2,3,8)] = tmp7 + tmp8;
                                 EM_S[INDEX2(3,3,8)] = d_0*w13 + d_3*w14 + tmp3;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w12;
-                                EM_S[INDEX2(0,0,8)] = 4*wd0;
-                                EM_S[INDEX2(1,0,8)] = 2*wd0;
-                                EM_S[INDEX2(2,0,8)] = 2*wd0;
-                                EM_S[INDEX2(3,0,8)] =   wd0;
-                                EM_S[INDEX2(0,1,8)] = 2*wd0;
-                                EM_S[INDEX2(1,1,8)] = 4*wd0;
-                                EM_S[INDEX2(2,1,8)] =   wd0;
-                                EM_S[INDEX2(3,1,8)] = 2*wd0;
-                                EM_S[INDEX2(0,2,8)] = 2*wd0;
-                                EM_S[INDEX2(1,2,8)] =   wd0;
-                                EM_S[INDEX2(2,2,8)] = 4*wd0;
-                                EM_S[INDEX2(3,2,8)] = 2*wd0;
-                                EM_S[INDEX2(0,3,8)] =   wd0;
-                                EM_S[INDEX2(1,3,8)] = 2*wd0;
-                                EM_S[INDEX2(2,3,8)] = 2*wd0;
-                                EM_S[INDEX2(3,3,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w12;
+                                EM_S[INDEX2(0,0,8)] = 4.*wd0;
+                                EM_S[INDEX2(1,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,0,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,0,8)] =    wd0;
+                                EM_S[INDEX2(0,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,1,8)] = 4.*wd0;
+                                EM_S[INDEX2(2,1,8)] =    wd0;
+                                EM_S[INDEX2(3,1,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(1,2,8)] =    wd0;
+                                EM_S[INDEX2(2,2,8)] = 4.*wd0;
+                                EM_S[INDEX2(3,2,8)] = 2.*wd0;
+                                EM_S[INDEX2(0,3,8)] =    wd0;
+                                EM_S[INDEX2(1,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(2,3,8)] = 2.*wd0;
+                                EM_S[INDEX2(3,3,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w12*(y_1 + y_2);
-                                const double tmp1 = 6*w12*(y_0 + y_3);
-                                EM_F[0] = tmp0 + 6*w10*y_3 + 6*w11*y_0;
-                                EM_F[1] = tmp1 + 6*w10*y_2 + 6*w11*y_1;
-                                EM_F[2] = tmp1 + 6*w10*y_1 + 6*w11*y_2;
-                                EM_F[3] = tmp0 + 6*w10*y_0 + 6*w11*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w12*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w12*(y_0 + y_3);
+                                EM_F[0] = tmp0 + 6.*w10*y_3 + 6.*w11*y_0;
+                                EM_F[1] = tmp1 + 6.*w10*y_2 + 6.*w11*y_1;
+                                EM_F[2] = tmp1 + 6.*w10*y_1 + 6.*w11*y_2;
+                                EM_F[3] = tmp0 + 6.*w10*y_0 + 6.*w11*y_3;
                             } else { // constant data
-                                EM_F[0] = 36*w12*y_p[0];
-                                EM_F[1] = 36*w12*y_p[0];
-                                EM_F[2] = 36*w12*y_p[0];
-                                EM_F[3] = 36*w12*y_p[0];
+                                EM_F[0] = 36.*w12*y_p[0];
+                                EM_F[1] = 36.*w12*y_p[0];
+                                EM_F[2] = 36.*w12*y_p[0];
+                                EM_F[3] = 36.*w12*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*k1+k0;
@@ -2761,12 +2779,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[5] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
-                EM_F[2] = 0;
-                EM_F[3] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -2778,23 +2796,23 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
-                                const double d_0 = d_p[0];
-                                const double d_1 = d_p[1];
-                                const double d_2 = d_p[2];
-                                const double d_3 = d_p[3];
-                                const double tmp0 = w12*(d_0 + d_1 + d_2 + d_3);
-                                const double tmp1 = w10*(d_1 + d_3);
-                                const double tmp2 = w11*(d_0 + d_2);
-                                const double tmp3 = w10*(d_2 + d_3);
-                                const double tmp4 = w11*(d_0 + d_1);
-                                const double tmp5 = w10*(d_0 + d_1);
-                                const double tmp6 = w11*(d_2 + d_3);
-                                const double tmp7 = w12*(d_1 + d_2);
-                                const double tmp8 = w10*(d_0 + d_2);
-                                const double tmp9 = w11*(d_1 + d_3);
-                                const double tmp10 = w12*(d_0 + d_3);
+                                const Scalar d_0 = d_p[0];
+                                const Scalar d_1 = d_p[1];
+                                const Scalar d_2 = d_p[2];
+                                const Scalar d_3 = d_p[3];
+                                const Scalar tmp0 = w12*(d_0 + d_1 + d_2 + d_3);
+                                const Scalar tmp1 = w10*(d_1 + d_3);
+                                const Scalar tmp2 = w11*(d_0 + d_2);
+                                const Scalar tmp3 = w10*(d_2 + d_3);
+                                const Scalar tmp4 = w11*(d_0 + d_1);
+                                const Scalar tmp5 = w10*(d_0 + d_1);
+                                const Scalar tmp6 = w11*(d_2 + d_3);
+                                const Scalar tmp7 = w12*(d_1 + d_2);
+                                const Scalar tmp8 = w10*(d_0 + d_2);
+                                const Scalar tmp9 = w11*(d_1 + d_3);
+                                const Scalar tmp10 = w12*(d_0 + d_3);
                                 EM_S[INDEX2(4,4,8)] = d_0*w14 + d_3*w13 + tmp7;
                                 EM_S[INDEX2(5,4,8)] = tmp3 + tmp4;
                                 EM_S[INDEX2(6,4,8)] = tmp1 + tmp2;
@@ -2812,46 +2830,46 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
                                 EM_S[INDEX2(6,7,8)] = tmp5 + tmp6;
                                 EM_S[INDEX2(7,7,8)] = d_0*w13 + d_3*w14 + tmp7;
                             } else { // constant data
-                                const double wd0 = 4*d_p[0]*w12;
-                                EM_S[INDEX2(4,4,8)] = 4*wd0;
-                                EM_S[INDEX2(5,4,8)] = 2*wd0;
-                                EM_S[INDEX2(6,4,8)] = 2*wd0;
-                                EM_S[INDEX2(7,4,8)] =   wd0;
-                                EM_S[INDEX2(4,5,8)] = 2*wd0;
-                                EM_S[INDEX2(5,5,8)] = 4*wd0;
-                                EM_S[INDEX2(6,5,8)] =   wd0;
-                                EM_S[INDEX2(7,5,8)] = 2*wd0;
-                                EM_S[INDEX2(4,6,8)] = 2*wd0;
-                                EM_S[INDEX2(5,6,8)] =   wd0;
-                                EM_S[INDEX2(6,6,8)] = 4*wd0;
-                                EM_S[INDEX2(7,6,8)] = 2*wd0;
-                                EM_S[INDEX2(4,7,8)] =   wd0;
-                                EM_S[INDEX2(5,7,8)] = 2*wd0;
-                                EM_S[INDEX2(6,7,8)] = 2*wd0;
-                                EM_S[INDEX2(7,7,8)] = 4*wd0;
+                                const Scalar wd0 = 4.*d_p[0]*w12;
+                                EM_S[INDEX2(4,4,8)] = 4.*wd0;
+                                EM_S[INDEX2(5,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,4,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,4,8)] =    wd0;
+                                EM_S[INDEX2(4,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,5,8)] = 4.*wd0;
+                                EM_S[INDEX2(6,5,8)] =    wd0;
+                                EM_S[INDEX2(7,5,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(5,6,8)] =    wd0;
+                                EM_S[INDEX2(6,6,8)] = 4.*wd0;
+                                EM_S[INDEX2(7,6,8)] = 2.*wd0;
+                                EM_S[INDEX2(4,7,8)] =    wd0;
+                                EM_S[INDEX2(5,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(6,7,8)] = 2.*wd0;
+                                EM_S[INDEX2(7,7,8)] = 4.*wd0;
                             }
                         }
                         ///////////////
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
-                                const double y_0 = y_p[0];
-                                const double y_1 = y_p[1];
-                                const double y_2 = y_p[2];
-                                const double y_3 = y_p[3];
-                                const double tmp0 = 6*w12*(y_1 + y_2);
-                                const double tmp1 = 6*w12*(y_0 + y_3);
-                                EM_F[4] = tmp0 + 6*w10*y_3 + 6*w11*y_0;
-                                EM_F[5] = tmp1 + 6*w10*y_2 + 6*w11*y_1;
-                                EM_F[6] = tmp1 + 6*w10*y_1 + 6*w11*y_2;
-                                EM_F[7] = tmp0 + 6*w10*y_0 + 6*w11*y_3;
+                                const Scalar y_0 = y_p[0];
+                                const Scalar y_1 = y_p[1];
+                                const Scalar y_2 = y_p[2];
+                                const Scalar y_3 = y_p[3];
+                                const Scalar tmp0 = 6.*w12*(y_1 + y_2);
+                                const Scalar tmp1 = 6.*w12*(y_0 + y_3);
+                                EM_F[4] = tmp0 + 6.*w10*y_3 + 6.*w11*y_0;
+                                EM_F[5] = tmp1 + 6.*w10*y_2 + 6.*w11*y_1;
+                                EM_F[6] = tmp1 + 6.*w10*y_1 + 6.*w11*y_2;
+                                EM_F[7] = tmp0 + 6.*w10*y_0 + 6.*w11*y_3;
                             } else { // constant data
-                                EM_F[4] = 36*w12*y_p[0];
-                                EM_F[5] = 36*w12*y_p[0];
-                                EM_F[6] = 36*w12*y_p[0];
-                                EM_F[7] = 36*w12*y_p[0];
+                                EM_F[4] = 36.*w12*y_p[0];
+                                EM_F[5] = 36.*w12*y_p[0];
+                                EM_F[6] = 36.*w12*y_p[0];
+                                EM_F[7] = 36.*w12*y_p[0];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*(m_NN[2]-2)+m_NN[0]*k1+k0;
@@ -2868,10 +2886,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
 // PDE SINGLE REDUCED
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
-                                    Data& rhs, const Data& A, const Data& B,
-                                    const Data& C, const Data& D,
-                                    const Data& X, const Data& Y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESingleReduced(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& A, const Data& B,
+                                        const Data& C, const Data& D,
+                                        const Data& X, const Data& Y) const
 {
     const double w6 = m_dx[0]/16;
     const double w5 = m_dx[1]/16;
@@ -2883,17 +2903,18 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
     const double w11 = m_dx[0]*m_dx[1]/(16*m_dx[2]);
     const double w3 = m_dx[0]*m_dx[2]/(16*m_dx[1]);
     const double w0 = m_dx[1]*m_dx[2]/(16*m_dx[0]);
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool add_EM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8, 0);
-        vector<double> EM_F(8, 0);
+        vector<Scalar> EM_S(8*8, zero);
+        vector<Scalar> EM_F(8, zero);
 
         for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -2902,24 +2923,24 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                     for (index_t k0=0; k0<NE0; ++k0)  {
                         const index_t e = k0 + NE0*k1 + NE0*NE1*k2;
                         if (add_EM_S)
-                            fill(EM_S.begin(), EM_S.end(), 0);
+                            fill(EM_S.begin(), EM_S.end(), zero);
                         if (add_EM_F)
-                            fill(EM_F.begin(), EM_F.end(), 0);
+                            fill(EM_F.begin(), EM_F.end(), zero);
 
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double* A_p=A.getSampleDataRO(e);
-                            const double Aw00 = A_p[INDEX2(0,0,3)]*w0;
-                            const double Aw10 = A_p[INDEX2(1,0,3)]*w1;
-                            const double Aw20 = A_p[INDEX2(2,0,3)]*w5;
-                            const double Aw01 = A_p[INDEX2(0,1,3)]*w1;
-                            const double Aw11 = A_p[INDEX2(1,1,3)]*w3;
-                            const double Aw21 = A_p[INDEX2(2,1,3)]*w6;
-                            const double Aw02 = A_p[INDEX2(0,2,3)]*w5;
-                            const double Aw12 = A_p[INDEX2(1,2,3)]*w6;
-                            const double Aw22 = A_p[INDEX2(2,2,3)]*w11;
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
+                            const Scalar Aw00 = A_p[INDEX2(0,0,3)]*w0;
+                            const Scalar Aw10 = A_p[INDEX2(1,0,3)]*w1;
+                            const Scalar Aw20 = A_p[INDEX2(2,0,3)]*w5;
+                            const Scalar Aw01 = A_p[INDEX2(0,1,3)]*w1;
+                            const Scalar Aw11 = A_p[INDEX2(1,1,3)]*w3;
+                            const Scalar Aw21 = A_p[INDEX2(2,1,3)]*w6;
+                            const Scalar Aw02 = A_p[INDEX2(0,2,3)]*w5;
+                            const Scalar Aw12 = A_p[INDEX2(1,2,3)]*w6;
+                            const Scalar Aw22 = A_p[INDEX2(2,2,3)]*w11;
                             EM_S[INDEX2(0,0,8)]+= Aw00 + Aw01 + Aw02 + Aw10 + Aw11 + Aw12 + Aw20 + Aw21 + Aw22;
                             EM_S[INDEX2(1,0,8)]+=-Aw00 - Aw01 - Aw02 + Aw10 + Aw11 + Aw12 + Aw20 + Aw21 + Aw22;
                             EM_S[INDEX2(2,0,8)]+= Aw00 + Aw01 + Aw02 - Aw10 - Aw11 - Aw12 + Aw20 + Aw21 + Aw22;
@@ -2989,10 +3010,10 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double* B_p=B.getSampleDataRO(e);
-                            const double wB0 = B_p[0]*w12;
-                            const double wB1 = B_p[1]*w13;
-                            const double wB2 = B_p[2]*w14;
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
+                            const Scalar wB0 = B_p[0]*w12;
+                            const Scalar wB1 = B_p[1]*w13;
+                            const Scalar wB2 = B_p[2]*w14;
                             EM_S[INDEX2(0,0,8)]+=-wB0 - wB1 - wB2;
                             EM_S[INDEX2(1,0,8)]+= wB0 - wB1 - wB2;
                             EM_S[INDEX2(2,0,8)]+=-wB0 + wB1 - wB2;
@@ -3062,10 +3083,10 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double* C_p=C.getSampleDataRO(e);
-                            const double wC0 = C_p[0]*w12;
-                            const double wC1 = C_p[1]*w13;
-                            const double wC2 = C_p[2]*w14;
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
+                            const Scalar wC0 = C_p[0]*w12;
+                            const Scalar wC1 = C_p[1]*w13;
+                            const Scalar wC2 = C_p[2]*w14;
                             EM_S[INDEX2(0,0,8)]+=-wC0 - wC1 - wC2;
                             EM_S[INDEX2(1,0,8)]+=-wC0 - wC1 - wC2;
                             EM_S[INDEX2(2,0,8)]+=-wC0 - wC1 - wC2;
@@ -3135,7 +3156,7 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double* D_p=D.getSampleDataRO(e);
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
                             EM_S[INDEX2(0,0,8)]+=D_p[0]*w18;
                             EM_S[INDEX2(1,0,8)]+=D_p[0]*w18;
                             EM_S[INDEX2(2,0,8)]+=D_p[0]*w18;
@@ -3205,10 +3226,10 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double* X_p=X.getSampleDataRO(e);
-                            const double wX0 = 8*X_p[0]*w12;
-                            const double wX1 = 8*X_p[1]*w13;
-                            const double wX2 = 8*X_p[2]*w14;
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
+                            const Scalar wX0 = 8.*X_p[0]*w12;
+                            const Scalar wX1 = 8.*X_p[1]*w13;
+                            const Scalar wX2 = 8.*X_p[2]*w14;
                             EM_F[0]+=-wX0 - wX1 - wX2;
                             EM_F[1]+= wX0 - wX1 - wX2;
                             EM_F[2]+=-wX0 + wX1 - wX2;
@@ -3222,15 +3243,15 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double* Y_p=Y.getSampleDataRO(e);
-                            EM_F[0]+=8*Y_p[0]*w18;
-                            EM_F[1]+=8*Y_p[0]*w18;
-                            EM_F[2]+=8*Y_p[0]*w18;
-                            EM_F[3]+=8*Y_p[0]*w18;
-                            EM_F[4]+=8*Y_p[0]*w18;
-                            EM_F[5]+=8*Y_p[0]*w18;
-                            EM_F[6]+=8*Y_p[0]*w18;
-                            EM_F[7]+=8*Y_p[0]*w18;
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
+                            EM_F[0]+=8.*Y_p[0]*w18;
+                            EM_F[1]+=8.*Y_p[0]*w18;
+                            EM_F[2]+=8.*Y_p[0]*w18;
+                            EM_F[3]+=8.*Y_p[0]*w18;
+                            EM_F[4]+=8.*Y_p[0]*w18;
+                            EM_F[5]+=8.*Y_p[0]*w18;
+                            EM_F[6]+=8.*Y_p[0]*w18;
+                            EM_F[7]+=8.*Y_p[0]*w18;
                         }
 
                         // add to matrix (if add_EM_S) and RHS (if add_EM_F)
@@ -3248,33 +3269,35 @@ void DefaultAssembler3D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
 // PDE SINGLE REDUCED BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySingleReduced(
                                         AbstractSystemMatrix* mat, Data& rhs,
                                         const Data& d, const Data& y) const
 {
     const double w0 = m_dx[0]*m_dx[1]/16;
     const double w1 = m_dx[0]*m_dx[2]/16;
     const double w2 = m_dx[1]*m_dx[2]/16;
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = !d.isEmpty();
     const bool add_EM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8);
-        vector<double> EM_F(8);
+        vector<Scalar> EM_S(8*8);
+        vector<Scalar> EM_F(8);
 
         if (domain->m_faceOffset[0] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[1] = 0;
-                EM_F[3] = 0;
-                EM_F[5] = 0;
-                EM_F[7] = 0;
+                EM_F[1] = zero;
+                EM_F[3] = zero;
+                EM_F[5] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -3286,7 +3309,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(0,0,8)] = d_p[0]*w2;
                             EM_S[INDEX2(2,0,8)] = d_p[0]*w2;
                             EM_S[INDEX2(4,0,8)] = d_p[0]*w2;
@@ -3308,11 +3331,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[0] = 4*w2*y_p[0];
-                            EM_F[2] = 4*w2*y_p[0];
-                            EM_F[4] = 4*w2*y_p[0];
-                            EM_F[6] = 4*w2*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[0] = 4.*w2*y_p[0];
+                            EM_F[2] = 4.*w2*y_p[0];
+                            EM_F[4] = 4.*w2*y_p[0];
+                            EM_F[6] = 4.*w2*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*k1;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3324,12 +3347,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[1] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[2] = 0;
-                EM_F[4] = 0;
-                EM_F[6] = 0;
+                EM_F[0] = zero;
+                EM_F[2] = zero;
+                EM_F[4] = zero;
+                EM_F[6] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -3341,7 +3364,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(1,1,8)] = d_p[0]*w2;
                             EM_S[INDEX2(3,1,8)] = d_p[0]*w2;
                             EM_S[INDEX2(5,1,8)] = d_p[0]*w2;
@@ -3363,11 +3386,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[1] = 4*w2*y_p[0];
-                            EM_F[3] = 4*w2*y_p[0];
-                            EM_F[5] = 4*w2*y_p[0];
-                            EM_F[7] = 4*w2*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[1] = 4.*w2*y_p[0];
+                            EM_F[3] = 4.*w2*y_p[0];
+                            EM_F[5] = 4.*w2*y_p[0];
+                            EM_F[7] = 4.*w2*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(k1+1)-2;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3379,12 +3402,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[2] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[2] = 0;
-                EM_F[3] = 0;
-                EM_F[6] = 0;
-                EM_F[7] = 0;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
+                EM_F[6] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -3396,7 +3419,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(0,0,8)] = d_p[0]*w1;
                             EM_S[INDEX2(1,0,8)] = d_p[0]*w1;
                             EM_S[INDEX2(4,0,8)] = d_p[0]*w1;
@@ -3418,11 +3441,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[0] = 4*w1*y_p[0];
-                            EM_F[1] = 4*w1*y_p[0];
-                            EM_F[4] = 4*w1*y_p[0];
-                            EM_F[5] = 4*w1*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[0] = 4.*w1*y_p[0];
+                            EM_F[1] = 4.*w1*y_p[0];
+                            EM_F[4] = 4.*w1*y_p[0];
+                            EM_F[5] = 4.*w1*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+k0;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3434,12 +3457,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[3] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
-                EM_F[4] = 0;
-                EM_F[5] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
+                EM_F[4] = zero;
+                EM_F[5] = zero;
             }
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
@@ -3451,7 +3474,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(2,2,8)] = d_p[0]*w1;
                             EM_S[INDEX2(3,2,8)] = d_p[0]*w1;
                             EM_S[INDEX2(6,2,8)] = d_p[0]*w1;
@@ -3473,11 +3496,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[2] = 4*w1*y_p[0];
-                            EM_F[3] = 4*w1*y_p[0];
-                            EM_F[6] = 4*w1*y_p[0];
-                            EM_F[7] = 4*w1*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[2] = 4.*w1*y_p[0];
+                            EM_F[3] = 4.*w1*y_p[0];
+                            EM_F[6] = 4.*w1*y_p[0];
+                            EM_F[7] = 4.*w1*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(m_NN[1]-2)+k0;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3489,12 +3512,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[4] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[4] = 0;
-                EM_F[5] = 0;
-                EM_F[6] = 0;
-                EM_F[7] = 0;
+                EM_F[4] = zero;
+                EM_F[5] = zero;
+                EM_F[6] = zero;
+                EM_F[7] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -3506,7 +3529,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(0,0,8)] = d_p[0]*w0;
                             EM_S[INDEX2(1,0,8)] = d_p[0]*w0;
                             EM_S[INDEX2(2,0,8)] = d_p[0]*w0;
@@ -3528,11 +3551,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[0] = 4*w0*y_p[0];
-                            EM_F[1] = 4*w0*y_p[0];
-                            EM_F[2] = 4*w0*y_p[0];
-                            EM_F[3] = 4*w0*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[0] = 4.*w0*y_p[0];
+                            EM_F[1] = 4.*w0*y_p[0];
+                            EM_F[2] = 4.*w0*y_p[0];
+                            EM_F[3] = 4.*w0*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*k1+k0;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3544,12 +3567,12 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 
         if (domain->m_faceOffset[5] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F) {
-                EM_F[0] = 0;
-                EM_F[1] = 0;
-                EM_F[2] = 0;
-                EM_F[3] = 0;
+                EM_F[0] = zero;
+                EM_F[1] = zero;
+                EM_F[2] = zero;
+                EM_F[3] = zero;
             }
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
@@ -3561,7 +3584,7 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             EM_S[INDEX2(4,4,8)] = d_p[0]*w0;
                             EM_S[INDEX2(5,4,8)] = d_p[0]*w0;
                             EM_S[INDEX2(6,4,8)] = d_p[0]*w0;
@@ -3583,11 +3606,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
-                            EM_F[4] = 4*w0*y_p[0];
-                            EM_F[5] = 4*w0*y_p[0];
-                            EM_F[6] = 4*w0*y_p[0];
-                            EM_F[7] = 4*w0*y_p[0];
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
+                            EM_F[4] = 4.*w0*y_p[0];
+                            EM_F[5] = 4.*w0*y_p[0];
+                            EM_F[6] = 4.*w0*y_p[0];
+                            EM_F[7] = 4.*w0*y_p[0];
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*(m_NN[2]-2)+m_NN[0]*k1+k0;
                         domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F,
@@ -3603,10 +3626,11 @@ void DefaultAssembler3D::assemblePDEBoundarySingleReduced(
 // PDE SYSTEM
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
-                                           const Data& A, const Data& B,
-                                           const Data& C, const Data& D,
-                                           const Data& X, const Data& Y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESystem(AbstractSystemMatrix* mat,
+                                       Data& rhs, const Data& A, const Data& B,
+                                       const Data& C, const Data& D,
+                                       const Data& X, const Data& Y) const
 {
     dim_t numEq, numComp;
     if (!mat)
@@ -3688,17 +3712,18 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
     const double w14 = w27*(-SQRT3 - 2);
     const double w28 = w27*(-4*SQRT3 + 7);
     const double w29 = w27*(4*SQRT3 + 7);
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool add_EM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8*numEq*numComp, 0);
-        vector<double> EM_F(8*numEq, 0);
+        vector<Scalar> EM_S(8*8*numEq*numComp, zero);
+        vector<Scalar> EM_F(8*numEq, zero);
 
         for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -3707,600 +3732,600 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                     for (index_t k0=0; k0<NE0; ++k0)  {
                         const index_t e = k0 + NE0*k1 + NE0*NE1*k2;
                         if (add_EM_S)
-                            fill(EM_S.begin(), EM_S.end(), 0);
+                            fill(EM_S.begin(), EM_S.end(), zero);
                         if (add_EM_F)
-                            fill(EM_F.begin(), EM_F.end(), 0);
+                            fill(EM_F.begin(), EM_F.end(), zero);
 
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double* A_p = A.getSampleDataRO(e);
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
                             if (A.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,3,numComp,3)];
-                                        const double A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,3,numComp,3)];
-                                        const double A_02_0 = A_p[INDEX5(k,0,m,2,0,numEq,3,numComp,3)];
-                                        const double A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,3,numComp,3)];
-                                        const double A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,3,numComp,3)];
-                                        const double A_12_0 = A_p[INDEX5(k,1,m,2,0,numEq,3,numComp,3)];
-                                        const double A_20_0 = A_p[INDEX5(k,2,m,0,0,numEq,3,numComp,3)];
-                                        const double A_21_0 = A_p[INDEX5(k,2,m,1,0,numEq,3,numComp,3)];
-                                        const double A_22_0 = A_p[INDEX5(k,2,m,2,0,numEq,3,numComp,3)];
-                                        const double A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,3,numComp,3)];
-                                        const double A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,3,numComp,3)];
-                                        const double A_02_1 = A_p[INDEX5(k,0,m,2,1,numEq,3,numComp,3)];
-                                        const double A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,3,numComp,3)];
-                                        const double A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,3,numComp,3)];
-                                        const double A_12_1 = A_p[INDEX5(k,1,m,2,1,numEq,3,numComp,3)];
-                                        const double A_20_1 = A_p[INDEX5(k,2,m,0,1,numEq,3,numComp,3)];
-                                        const double A_21_1 = A_p[INDEX5(k,2,m,1,1,numEq,3,numComp,3)];
-                                        const double A_22_1 = A_p[INDEX5(k,2,m,2,1,numEq,3,numComp,3)];
-                                        const double A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,3,numComp,3)];
-                                        const double A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,3,numComp,3)];
-                                        const double A_02_2 = A_p[INDEX5(k,0,m,2,2,numEq,3,numComp,3)];
-                                        const double A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,3,numComp,3)];
-                                        const double A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,3,numComp,3)];
-                                        const double A_12_2 = A_p[INDEX5(k,1,m,2,2,numEq,3,numComp,3)];
-                                        const double A_20_2 = A_p[INDEX5(k,2,m,0,2,numEq,3,numComp,3)];
-                                        const double A_21_2 = A_p[INDEX5(k,2,m,1,2,numEq,3,numComp,3)];
-                                        const double A_22_2 = A_p[INDEX5(k,2,m,2,2,numEq,3,numComp,3)];
-                                        const double A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,3,numComp,3)];
-                                        const double A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,3,numComp,3)];
-                                        const double A_02_3 = A_p[INDEX5(k,0,m,2,3,numEq,3,numComp,3)];
-                                        const double A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,3,numComp,3)];
-                                        const double A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,3,numComp,3)];
-                                        const double A_12_3 = A_p[INDEX5(k,1,m,2,3,numEq,3,numComp,3)];
-                                        const double A_20_3 = A_p[INDEX5(k,2,m,0,3,numEq,3,numComp,3)];
-                                        const double A_21_3 = A_p[INDEX5(k,2,m,1,3,numEq,3,numComp,3)];
-                                        const double A_22_3 = A_p[INDEX5(k,2,m,2,3,numEq,3,numComp,3)];
-                                        const double A_00_4 = A_p[INDEX5(k,0,m,0,4,numEq,3,numComp,3)];
-                                        const double A_01_4 = A_p[INDEX5(k,0,m,1,4,numEq,3,numComp,3)];
-                                        const double A_02_4 = A_p[INDEX5(k,0,m,2,4,numEq,3,numComp,3)];
-                                        const double A_10_4 = A_p[INDEX5(k,1,m,0,4,numEq,3,numComp,3)];
-                                        const double A_11_4 = A_p[INDEX5(k,1,m,1,4,numEq,3,numComp,3)];
-                                        const double A_12_4 = A_p[INDEX5(k,1,m,2,4,numEq,3,numComp,3)];
-                                        const double A_20_4 = A_p[INDEX5(k,2,m,0,4,numEq,3,numComp,3)];
-                                        const double A_21_4 = A_p[INDEX5(k,2,m,1,4,numEq,3,numComp,3)];
-                                        const double A_22_4 = A_p[INDEX5(k,2,m,2,4,numEq,3,numComp,3)];
-                                        const double A_00_5 = A_p[INDEX5(k,0,m,0,5,numEq,3,numComp,3)];
-                                        const double A_01_5 = A_p[INDEX5(k,0,m,1,5,numEq,3,numComp,3)];
-                                        const double A_02_5 = A_p[INDEX5(k,0,m,2,5,numEq,3,numComp,3)];
-                                        const double A_10_5 = A_p[INDEX5(k,1,m,0,5,numEq,3,numComp,3)];
-                                        const double A_11_5 = A_p[INDEX5(k,1,m,1,5,numEq,3,numComp,3)];
-                                        const double A_12_5 = A_p[INDEX5(k,1,m,2,5,numEq,3,numComp,3)];
-                                        const double A_20_5 = A_p[INDEX5(k,2,m,0,5,numEq,3,numComp,3)];
-                                        const double A_21_5 = A_p[INDEX5(k,2,m,1,5,numEq,3,numComp,3)];
-                                        const double A_22_5 = A_p[INDEX5(k,2,m,2,5,numEq,3,numComp,3)];
-                                        const double A_00_6 = A_p[INDEX5(k,0,m,0,6,numEq,3,numComp,3)];
-                                        const double A_01_6 = A_p[INDEX5(k,0,m,1,6,numEq,3,numComp,3)];
-                                        const double A_02_6 = A_p[INDEX5(k,0,m,2,6,numEq,3,numComp,3)];
-                                        const double A_10_6 = A_p[INDEX5(k,1,m,0,6,numEq,3,numComp,3)];
-                                        const double A_11_6 = A_p[INDEX5(k,1,m,1,6,numEq,3,numComp,3)];
-                                        const double A_12_6 = A_p[INDEX5(k,1,m,2,6,numEq,3,numComp,3)];
-                                        const double A_20_6 = A_p[INDEX5(k,2,m,0,6,numEq,3,numComp,3)];
-                                        const double A_21_6 = A_p[INDEX5(k,2,m,1,6,numEq,3,numComp,3)];
-                                        const double A_22_6 = A_p[INDEX5(k,2,m,2,6,numEq,3,numComp,3)];
-                                        const double A_00_7 = A_p[INDEX5(k,0,m,0,7,numEq,3,numComp,3)];
-                                        const double A_01_7 = A_p[INDEX5(k,0,m,1,7,numEq,3,numComp,3)];
-                                        const double A_02_7 = A_p[INDEX5(k,0,m,2,7,numEq,3,numComp,3)];
-                                        const double A_10_7 = A_p[INDEX5(k,1,m,0,7,numEq,3,numComp,3)];
-                                        const double A_11_7 = A_p[INDEX5(k,1,m,1,7,numEq,3,numComp,3)];
-                                        const double A_12_7 = A_p[INDEX5(k,1,m,2,7,numEq,3,numComp,3)];
-                                        const double A_20_7 = A_p[INDEX5(k,2,m,0,7,numEq,3,numComp,3)];
-                                        const double A_21_7 = A_p[INDEX5(k,2,m,1,7,numEq,3,numComp,3)];
-                                        const double A_22_7 = A_p[INDEX5(k,2,m,2,7,numEq,3,numComp,3)];
-                                        const double tmp0 = w18*(-A_12_7 + A_21_3);
-                                        const double tmp1 = w13*(A_22_1 + A_22_2 + A_22_5 + A_22_6);
-                                        const double tmp2 = w11*(-A_02_2 - A_02_5 + A_20_1 + A_20_6);
-                                        const double tmp3 = w14*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
-                                        const double tmp4 = w7*(A_22_0 + A_22_4);
-                                        const double tmp5 = w10*(A_12_1 + A_12_6 - A_21_2 - A_21_5);
-                                        const double tmp6 = w3*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
-                                        const double tmp7 = w1*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
-                                        const double tmp8 = w4*(A_12_0 - A_21_4);
-                                        const double tmp9 = w15*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
-                                        const double tmp10 = w0*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
-                                        const double tmp11 = w16*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
-                                        const double tmp12 = w9*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
-                                        const double tmp13 = w12*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
-                                        const double tmp14 = w5*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
-                                        const double tmp15 = w8*(A_01_1 + A_01_2 + A_01_5 + A_01_6 + A_10_1 + A_10_2 + A_10_5 + A_10_6);
-                                        const double tmp16 = w6*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
-                                        const double tmp17 = w19*(A_22_3 + A_22_7);
-                                        const double tmp18 = w17*(-A_02_7 + A_20_3);
-                                        const double tmp19 = w2*(A_02_0 - A_20_4);
-                                        const double tmp20 = w13*(-A_22_0 - A_22_1 - A_22_2 - A_22_3 - A_22_4 - A_22_5 - A_22_6 - A_22_7);
-                                        const double tmp21 = w11*(-A_02_1 - A_02_3 - A_02_4 - A_02_6 + A_20_0 + A_20_2 + A_20_5 + A_20_7);
-                                        const double tmp22 = w14*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                        const double tmp23 = w20*(A_01_2 + A_10_1);
-                                        const double tmp24 = w10*(A_12_2 + A_12_3 + A_12_4 + A_12_5 - A_21_0 - A_21_1 - A_21_6 - A_21_7);
-                                        const double tmp25 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                                        const double tmp26 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                                        const double tmp27 = w15*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
-                                        const double tmp28 = w0*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                                        const double tmp29 = w16*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
-                                        const double tmp30 = w9*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                        const double tmp31 = w21*(A_01_5 + A_10_6);
-                                        const double tmp32 = w12*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
-                                        const double tmp33 = w5*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
-                                        const double tmp34 = w8*(-A_01_1 - A_01_6 - A_10_2 - A_10_5);
-                                        const double tmp35 = w6*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
-                                        const double tmp36 = w20*(-A_01_6 + A_10_4);
-                                        const double tmp37 = w18*(A_12_3 - A_21_1);
-                                        const double tmp38 = w11*(-A_02_0 - A_02_2 - A_02_5 - A_02_7 - A_20_0 - A_20_2 - A_20_5 - A_20_7);
-                                        const double tmp39 = w14*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                                        const double tmp40 = w26*(A_11_4 + A_11_6);
-                                        const double tmp41 = w0*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                        const double tmp42 = w10*(-A_12_2 - A_12_5 + A_21_0 + A_21_7);
-                                        const double tmp43 = w22*(A_11_0 + A_11_2 + A_11_5 + A_11_7);
-                                        const double tmp44 = w1*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
-                                        const double tmp45 = w25*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
-                                        const double tmp46 = w4*(-A_12_4 + A_21_6);
-                                        const double tmp47 = w15*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
-                                        const double tmp48 = w21*(-A_01_1 + A_10_3);
-                                        const double tmp49 = w16*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                                        const double tmp50 = w5*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
-                                        const double tmp51 = w12*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
-                                        const double tmp52 = w24*(A_11_1 + A_11_3);
-                                        const double tmp53 = w8*(A_01_2 + A_01_5 - A_10_0 - A_10_7);
-                                        const double tmp54 = w6*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
-                                        const double tmp55 = w23*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
-                                        const double tmp56 = w18*(A_12_4 - A_21_6);
-                                        const double tmp57 = w14*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                        const double tmp58 = w26*(A_11_1 + A_11_3);
-                                        const double tmp59 = w20*(-A_01_1 + A_10_3);
-                                        const double tmp60 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
-                                        const double tmp61 = w25*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
-                                        const double tmp62 = w4*(-A_12_3 + A_21_1);
-                                        const double tmp63 = w15*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
-                                        const double tmp64 = w0*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
-                                        const double tmp65 = w16*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
-                                        const double tmp66 = w24*(A_11_4 + A_11_6);
-                                        const double tmp67 = w21*(-A_01_6 + A_10_4);
-                                        const double tmp68 = w12*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
-                                        const double tmp69 = w5*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
-                                        const double tmp70 = w6*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
-                                        const double tmp71 = w23*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
-                                        const double tmp72 = w20*(A_01_5 + A_10_6);
-                                        const double tmp73 = w14*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
-                                        const double tmp74 = w0*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                        const double tmp75 = w3*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                        const double tmp76 = w1*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
-                                        const double tmp77 = w15*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
-                                        const double tmp78 = w21*(A_01_2 + A_10_1);
-                                        const double tmp79 = w16*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
-                                        const double tmp80 = w9*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
-                                        const double tmp81 = w12*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
-                                        const double tmp82 = w5*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
-                                        const double tmp83 = w6*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
-                                        const double tmp84 = w6*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
-                                        const double tmp85 = w11*(A_02_1 + A_02_6 - A_20_0 - A_20_7);
-                                        const double tmp86 = w20*(A_01_3 - A_10_2);
-                                        const double tmp87 = w10*(A_12_0 + A_12_1 + A_12_6 + A_12_7 + A_21_0 + A_21_1 + A_21_6 + A_21_7);
-                                        const double tmp88 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                                        const double tmp89 = w23*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
-                                        const double tmp90 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                                        const double tmp91 = w25*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
-                                        const double tmp92 = w15*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
-                                        const double tmp93 = w21*(A_01_4 - A_10_5);
-                                        const double tmp94 = w16*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
-                                        const double tmp95 = w28*(A_00_2 + A_00_3);
-                                        const double tmp96 = w12*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
-                                        const double tmp97 = w29*(A_00_4 + A_00_5);
-                                        const double tmp98 = w5*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
-                                        const double tmp99 = w8*(-A_01_0 - A_01_7 + A_10_1 + A_10_6);
-                                        const double tmp100 = w9*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                        const double tmp101 = w27*(A_00_0 + A_00_1 + A_00_6 + A_00_7);
-                                        const double tmp102 = w17*(A_02_4 - A_20_5);
-                                        const double tmp103 = w2*(-A_02_3 + A_20_2);
-                                        const double tmp104 = w13*(A_22_0 + A_22_1 + A_22_2 + A_22_3 + A_22_4 + A_22_5 + A_22_6 + A_22_7);
-                                        const double tmp105 = w6*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
-                                        const double tmp106 = w22*(A_11_0 + A_11_1 + A_11_2 + A_11_3 + A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                        const double tmp107 = w1*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
-                                        const double tmp108 = w15*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
-                                        const double tmp109 = w16*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
-                                        const double tmp110 = w12*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
-                                        const double tmp111 = w5*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
-                                        const double tmp112 = w8*(-A_01_0 - A_01_3 - A_01_4 - A_01_7 - A_10_0 - A_10_3 - A_10_4 - A_10_7);
-                                        const double tmp113 = w27*(A_00_0 + A_00_1 + A_00_2 + A_00_3 + A_00_4 + A_00_5 + A_00_6 + A_00_7);
-                                        const double tmp114 = w11*(A_02_0 + A_02_2 + A_02_5 + A_02_7 - A_20_1 - A_20_3 - A_20_4 - A_20_6);
-                                        const double tmp115 = w21*(-A_01_4 - A_10_7);
-                                        const double tmp116 = w20*(-A_01_3 - A_10_0);
-                                        const double tmp117 = w15*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
-                                        const double tmp118 = w16*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
-                                        const double tmp119 = w5*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
-                                        const double tmp120 = w8*(A_01_0 + A_01_7 + A_10_3 + A_10_4);
-                                        const double tmp121 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                                        const double tmp122 = w18*(A_12_2 - A_21_6);
-                                        const double tmp123 = w13*(A_22_0 + A_22_3 + A_22_4 + A_22_7);
-                                        const double tmp124 = w11*(-A_02_0 - A_02_7 + A_20_3 + A_20_4);
-                                        const double tmp125 = w7*(A_22_1 + A_22_5);
-                                        const double tmp126 = w10*(-A_12_3 - A_12_4 + A_21_0 + A_21_7);
-                                        const double tmp127 = w3*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
-                                        const double tmp128 = w1*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
-                                        const double tmp129 = w4*(-A_12_5 + A_21_1);
-                                        const double tmp130 = w16*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
-                                        const double tmp131 = w9*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
-                                        const double tmp132 = w19*(A_22_2 + A_22_6);
-                                        const double tmp133 = w17*(-A_02_2 + A_20_6);
-                                        const double tmp134 = w2*(A_02_5 - A_20_1);
-                                        const double tmp135 = w11*(A_02_1 + A_02_3 + A_02_4 + A_02_6 + A_20_1 + A_20_3 + A_20_4 + A_20_6);
-                                        const double tmp136 = w1*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
-                                        const double tmp137 = w15*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
-                                        const double tmp138 = w16*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
-                                        const double tmp139 = w5*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
-                                        const double tmp140 = w18*(A_12_5 - A_21_1);
-                                        const double tmp141 = w14*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
-                                        const double tmp142 = w7*(A_22_2 + A_22_6);
-                                        const double tmp143 = w1*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
-                                        const double tmp144 = w4*(-A_12_2 + A_21_6);
-                                        const double tmp145 = w15*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
-                                        const double tmp146 = w0*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
-                                        const double tmp147 = w16*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
-                                        const double tmp148 = w5*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
-                                        const double tmp149 = w19*(A_22_1 + A_22_5);
-                                        const double tmp150 = w17*(-A_02_5 + A_20_1);
-                                        const double tmp151 = w2*(A_02_2 - A_20_6);
-                                        const double tmp152 = w18*(A_12_3 - A_21_7);
-                                        const double tmp153 = w11*(A_02_1 + A_02_6 - A_20_2 - A_20_5);
-                                        const double tmp154 = w10*(-A_12_2 - A_12_5 + A_21_1 + A_21_6);
-                                        const double tmp155 = w4*(-A_12_4 + A_21_0);
-                                        const double tmp156 = w15*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
-                                        const double tmp157 = w5*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
-                                        const double tmp158 = w17*(A_02_3 - A_20_7);
-                                        const double tmp159 = w2*(-A_02_4 + A_20_0);
-                                        const double tmp160 = w6*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
-                                        const double tmp161 = w10*(-A_12_2 - A_12_3 - A_12_4 - A_12_5 - A_21_2 - A_21_3 - A_21_4 - A_21_5);
-                                        const double tmp162 = w1*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
-                                        const double tmp163 = w16*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
-                                        const double tmp164 = w12*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
-                                        const double tmp165 = w20*(A_01_6 + A_10_5);
-                                        const double tmp166 = w10*(-A_12_0 - A_12_1 - A_12_6 - A_12_7 + A_21_2 + A_21_3 + A_21_4 + A_21_5);
-                                        const double tmp167 = w15*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
-                                        const double tmp168 = w21*(A_01_1 + A_10_2);
-                                        const double tmp169 = w12*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
-                                        const double tmp170 = w5*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
-                                        const double tmp171 = w8*(-A_01_2 - A_01_5 - A_10_1 - A_10_6);
-                                        const double tmp172 = w6*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
-                                        const double tmp173 = w2*(A_02_1 + A_20_4);
-                                        const double tmp174 = w11*(-A_02_3 - A_02_4 - A_20_1 - A_20_6);
-                                        const double tmp175 = w14*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
-                                        const double tmp176 = w22*(-A_11_0 - A_11_1 - A_11_2 - A_11_3 - A_11_4 - A_11_5 - A_11_6 - A_11_7);
-                                        const double tmp177 = w1*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
-                                        const double tmp178 = w25*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
-                                        const double tmp179 = w15*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
-                                        const double tmp180 = w0*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
-                                        const double tmp181 = w16*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
-                                        const double tmp182 = w12*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
-                                        const double tmp183 = w5*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
-                                        const double tmp184 = w8*(A_01_0 + A_01_3 + A_01_4 + A_01_7 - A_10_1 - A_10_2 - A_10_5 - A_10_6);
-                                        const double tmp185 = w6*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
-                                        const double tmp186 = w17*(-A_02_6 - A_20_3);
-                                        const double tmp187 = w23*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
-                                        const double tmp188 = w18*(A_12_4 - A_21_0);
-                                        const double tmp189 = w7*(A_22_3 + A_22_7);
-                                        const double tmp190 = w1*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
-                                        const double tmp191 = w4*(-A_12_3 + A_21_7);
-                                        const double tmp192 = w16*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
-                                        const double tmp193 = w19*(A_22_0 + A_22_4);
-                                        const double tmp194 = w17*(A_02_4 - A_20_0);
-                                        const double tmp195 = w2*(-A_02_3 + A_20_7);
-                                        const double tmp196 = w20*(-A_01_7 - A_10_4);
-                                        const double tmp197 = w21*(-A_01_0 - A_10_3);
-                                        const double tmp198 = w16*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
-                                        const double tmp199 = w8*(A_01_3 + A_01_4 + A_10_0 + A_10_7);
-                                        const double tmp200 = w1*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
-                                        const double tmp201 = w27*(A_00_2 + A_00_3 + A_00_4 + A_00_5);
-                                        const double tmp202 = w11*(-A_02_2 - A_02_5 + A_20_3 + A_20_4);
-                                        const double tmp203 = w20*(A_01_0 - A_10_1);
-                                        const double tmp204 = w23*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
-                                        const double tmp205 = w25*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
-                                        const double tmp206 = w21*(A_01_7 - A_10_6);
-                                        const double tmp207 = w12*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
-                                        const double tmp208 = w28*(A_00_0 + A_00_1);
-                                        const double tmp209 = w29*(A_00_6 + A_00_7);
-                                        const double tmp210 = w8*(-A_01_3 - A_01_4 + A_10_2 + A_10_5);
-                                        const double tmp211 = w6*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
-                                        const double tmp212 = w17*(-A_02_7 + A_20_6);
-                                        const double tmp213 = w2*(A_02_0 - A_20_1);
-                                        const double tmp214 = w13*(-A_22_1 - A_22_2 - A_22_5 - A_22_6);
-                                        const double tmp215 = w22*(-A_11_0 - A_11_2 - A_11_5 - A_11_7);
-                                        const double tmp216 = w8*(A_01_0 + A_01_7 + A_10_0 + A_10_7);
-                                        const double tmp217 = w27*(-A_00_0 - A_00_1 - A_00_6 - A_00_7);
-                                        const double tmp218 = w17*(-A_02_3 - A_20_3);
-                                        const double tmp219 = w2*(A_02_4 + A_20_4);
-                                        const double tmp220 = w11*(-A_02_1 - A_02_6 - A_20_1 - A_20_6);
-                                        const double tmp221 = w26*(-A_11_4 - A_11_6);
-                                        const double tmp222 = w10*(A_12_2 + A_12_5 + A_21_2 + A_21_5);
-                                        const double tmp223 = w20*(-A_01_4 - A_10_4);
-                                        const double tmp224 = w21*(-A_01_3 - A_10_3);
-                                        const double tmp225 = w6*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
-                                        const double tmp226 = w7*(-A_22_0 - A_22_4);
-                                        const double tmp227 = w24*(-A_11_1 - A_11_3);
-                                        const double tmp228 = w19*(-A_22_3 - A_22_7);
-                                        const double tmp229 = w18*(-A_12_3 - A_21_3);
-                                        const double tmp230 = w4*(A_12_4 + A_21_4);
-                                        const double tmp231 = w28*(-A_00_4 - A_00_5);
-                                        const double tmp232 = w12*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
-                                        const double tmp233 = w29*(-A_00_2 - A_00_3);
-                                        const double tmp234 = w20*(-A_01_5 + A_10_7);
-                                        const double tmp235 = w18*(-A_12_0 + A_21_2);
-                                        const double tmp236 = w26*(A_11_5 + A_11_7);
-                                        const double tmp237 = w10*(A_12_1 + A_12_6 - A_21_3 - A_21_4);
-                                        const double tmp238 = w22*(A_11_1 + A_11_3 + A_11_4 + A_11_6);
-                                        const double tmp239 = w4*(A_12_7 - A_21_5);
-                                        const double tmp240 = w15*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
-                                        const double tmp241 = w21*(-A_01_2 + A_10_0);
-                                        const double tmp242 = w5*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
-                                        const double tmp243 = w12*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
-                                        const double tmp244 = w24*(A_11_0 + A_11_2);
-                                        const double tmp245 = w8*(A_01_1 + A_01_6 - A_10_3 - A_10_4);
-                                        const double tmp246 = w6*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
-                                        const double tmp247 = w11*(A_02_3 + A_02_4 - A_20_2 - A_20_5);
-                                        const double tmp248 = w20*(-A_01_1 + A_10_0);
-                                        const double tmp249 = w21*(-A_01_6 + A_10_7);
-                                        const double tmp250 = w8*(A_01_2 + A_01_5 - A_10_3 - A_10_4);
-                                        const double tmp251 = w17*(A_02_6 - A_20_7);
-                                        const double tmp252 = w2*(-A_02_1 + A_20_0);
-                                        const double tmp253 = w17*(-A_02_4 - A_20_4);
-                                        const double tmp254 = w2*(A_02_3 + A_20_3);
-                                        const double tmp255 = w26*(-A_11_1 - A_11_3);
-                                        const double tmp256 = w20*(-A_01_3 - A_10_3);
-                                        const double tmp257 = w21*(-A_01_4 - A_10_4);
-                                        const double tmp258 = w6*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
-                                        const double tmp259 = w7*(-A_22_3 - A_22_7);
-                                        const double tmp260 = w15*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
-                                        const double tmp261 = w24*(-A_11_4 - A_11_6);
-                                        const double tmp262 = w19*(-A_22_0 - A_22_4);
-                                        const double tmp263 = w18*(-A_12_4 - A_21_4);
-                                        const double tmp264 = w4*(A_12_3 + A_21_3);
-                                        const double tmp265 = w28*(-A_00_2 - A_00_3);
-                                        const double tmp266 = w12*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
-                                        const double tmp267 = w5*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
-                                        const double tmp268 = w29*(-A_00_4 - A_00_5);
-                                        const double tmp269 = w11*(A_02_2 + A_02_5 + A_20_0 + A_20_7);
-                                        const double tmp270 = w1*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
-                                        const double tmp271 = w15*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
-                                        const double tmp272 = w16*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
-                                        const double tmp273 = w5*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
-                                        const double tmp274 = w8*(-A_01_1 - A_01_2 - A_01_5 - A_01_6 + A_10_0 + A_10_3 + A_10_4 + A_10_7);
-                                        const double tmp275 = w17*(A_02_7 + A_20_2);
-                                        const double tmp276 = w2*(-A_02_0 - A_20_5);
-                                        const double tmp277 = w18*(-A_12_1 + A_21_5);
-                                        const double tmp278 = w11*(A_02_3 + A_02_4 - A_20_0 - A_20_7);
-                                        const double tmp279 = w10*(A_12_0 + A_12_7 - A_21_3 - A_21_4);
-                                        const double tmp280 = w4*(A_12_6 - A_21_2);
-                                        const double tmp281 = w17*(A_02_1 - A_20_5);
-                                        const double tmp282 = w2*(-A_02_6 + A_20_2);
-                                        const double tmp283 = w11*(A_02_0 + A_02_7 + A_20_2 + A_20_5);
-                                        const double tmp284 = w12*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
-                                        const double tmp285 = w6*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
-                                        const double tmp286 = w17*(A_02_2 + A_20_7);
-                                        const double tmp287 = w2*(-A_02_5 - A_20_0);
-                                        const double tmp288 = w13*(-A_22_0 - A_22_3 - A_22_4 - A_22_7);
-                                        const double tmp289 = w22*(-A_11_1 - A_11_3 - A_11_4 - A_11_6);
-                                        const double tmp290 = w8*(-A_01_1 - A_01_6 - A_10_1 - A_10_6);
-                                        const double tmp291 = w17*(A_02_2 + A_20_2);
-                                        const double tmp292 = w2*(-A_02_5 - A_20_5);
-                                        const double tmp293 = w11*(A_02_0 + A_02_7 + A_20_0 + A_20_7);
-                                        const double tmp294 = w26*(-A_11_5 - A_11_7);
-                                        const double tmp295 = w10*(A_12_3 + A_12_4 + A_21_3 + A_21_4);
-                                        const double tmp296 = w20*(A_01_5 + A_10_5);
-                                        const double tmp297 = w21*(A_01_2 + A_10_2);
-                                        const double tmp298 = w7*(-A_22_1 - A_22_5);
-                                        const double tmp299 = w24*(-A_11_0 - A_11_2);
-                                        const double tmp300 = w19*(-A_22_2 - A_22_6);
-                                        const double tmp301 = w18*(-A_12_2 - A_21_2);
-                                        const double tmp302 = w4*(A_12_5 + A_21_5);
-                                        const double tmp303 = w8*(A_01_3 + A_01_4 + A_10_3 + A_10_4);
-                                        const double tmp304 = w27*(-A_00_2 - A_00_3 - A_00_4 - A_00_5);
-                                        const double tmp305 = w17*(A_02_7 + A_20_7);
-                                        const double tmp306 = w2*(-A_02_0 - A_20_0);
-                                        const double tmp307 = w11*(A_02_2 + A_02_5 + A_20_2 + A_20_5);
-                                        const double tmp308 = w26*(-A_11_0 - A_11_2);
-                                        const double tmp309 = w10*(-A_12_1 - A_12_6 - A_21_1 - A_21_6);
-                                        const double tmp310 = w20*(-A_01_0 - A_10_0);
-                                        const double tmp311 = w21*(-A_01_7 - A_10_7);
-                                        const double tmp312 = w6*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
-                                        const double tmp313 = w24*(-A_11_5 - A_11_7);
-                                        const double tmp314 = w18*(A_12_7 + A_21_7);
-                                        const double tmp315 = w4*(-A_12_0 - A_21_0);
-                                        const double tmp316 = w28*(-A_00_0 - A_00_1);
-                                        const double tmp317 = w12*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
-                                        const double tmp318 = w29*(-A_00_6 - A_00_7);
-                                        const double tmp319 = w18*(-A_12_7 + A_21_5);
-                                        const double tmp320 = w26*(A_11_0 + A_11_2);
-                                        const double tmp321 = w21*(-A_01_5 + A_10_7);
-                                        const double tmp322 = w20*(-A_01_2 + A_10_0);
-                                        const double tmp323 = w4*(A_12_0 - A_21_2);
-                                        const double tmp324 = w15*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
-                                        const double tmp325 = w24*(A_11_5 + A_11_7);
-                                        const double tmp326 = w5*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
-                                        const double tmp327 = w18*(A_12_7 + A_21_1);
-                                        const double tmp328 = w10*(-A_12_1 - A_12_6 - A_21_0 - A_21_7);
-                                        const double tmp329 = w3*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
-                                        const double tmp330 = w1*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
-                                        const double tmp331 = w4*(-A_12_0 - A_21_6);
-                                        const double tmp332 = w25*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
-                                        const double tmp333 = w15*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
-                                        const double tmp334 = w16*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
-                                        const double tmp335 = w9*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
-                                        const double tmp336 = w5*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
-                                        const double tmp337 = w27*(-A_00_0 - A_00_1 - A_00_2 - A_00_3 - A_00_4 - A_00_5 - A_00_6 - A_00_7);
-                                        const double tmp338 = w23*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
-                                        const double tmp339 = w14*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
-                                        const double tmp340 = w23*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
-                                        const double tmp341 = w1*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
-                                        const double tmp342 = w25*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
-                                        const double tmp343 = w15*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
-                                        const double tmp344 = w0*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
-                                        const double tmp345 = w16*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
-                                        const double tmp346 = w12*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
-                                        const double tmp347 = w5*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
-                                        const double tmp348 = w6*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
-                                        const double tmp349 = w17*(A_02_5 + A_20_0);
-                                        const double tmp350 = w2*(-A_02_2 - A_20_7);
-                                        const double tmp351 = w8*(-A_01_2 - A_01_5 - A_10_2 - A_10_5);
-                                        const double tmp352 = w17*(-A_02_1 - A_20_1);
-                                        const double tmp353 = w2*(A_02_6 + A_20_6);
-                                        const double tmp354 = w11*(-A_02_3 - A_02_4 - A_20_3 - A_20_4);
-                                        const double tmp355 = w10*(-A_12_0 - A_12_7 - A_21_0 - A_21_7);
-                                        const double tmp356 = w20*(A_01_6 + A_10_6);
-                                        const double tmp357 = w21*(A_01_1 + A_10_1);
-                                        const double tmp358 = w7*(-A_22_2 - A_22_6);
-                                        const double tmp359 = w19*(-A_22_1 - A_22_5);
-                                        const double tmp360 = w18*(A_12_1 + A_21_1);
-                                        const double tmp361 = w4*(-A_12_6 - A_21_6);
-                                        const double tmp362 = w28*(-A_00_6 - A_00_7);
-                                        const double tmp363 = w29*(-A_00_0 - A_00_1);
-                                        const double tmp364 = w2*(A_02_4 + A_20_1);
-                                        const double tmp365 = w11*(-A_02_1 - A_02_6 - A_20_3 - A_20_4);
-                                        const double tmp366 = w17*(-A_02_3 - A_20_6);
-                                        const double tmp367 = w2*(A_02_5 - A_20_4);
-                                        const double tmp368 = w6*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
-                                        const double tmp369 = w11*(-A_02_0 - A_02_7 + A_20_1 + A_20_6);
-                                        const double tmp370 = w20*(-A_01_5 + A_10_4);
-                                        const double tmp371 = w3*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
-                                        const double tmp372 = w12*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
-                                        const double tmp373 = w21*(-A_01_2 + A_10_3);
-                                        const double tmp374 = w9*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
-                                        const double tmp375 = w29*(A_00_2 + A_00_3);
-                                        const double tmp376 = w8*(A_01_1 + A_01_6 - A_10_0 - A_10_7);
-                                        const double tmp377 = w28*(A_00_4 + A_00_5);
-                                        const double tmp378 = w17*(-A_02_2 + A_20_3);
-                                        const double tmp379 = w17*(A_02_0 + A_20_0);
-                                        const double tmp380 = w2*(-A_02_7 - A_20_7);
-                                        const double tmp381 = w20*(-A_01_7 - A_10_7);
-                                        const double tmp382 = w21*(-A_01_0 - A_10_0);
-                                        const double tmp383 = w6*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
-                                        const double tmp384 = w18*(A_12_0 + A_21_0);
-                                        const double tmp385 = w4*(-A_12_7 - A_21_7);
-                                        const double tmp386 = w12*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
-                                        const double tmp387 = w17*(-A_02_6 - A_20_6);
-                                        const double tmp388 = w2*(A_02_1 + A_20_1);
-                                        const double tmp389 = w20*(A_01_1 + A_10_1);
-                                        const double tmp390 = w21*(A_01_6 + A_10_6);
-                                        const double tmp391 = w18*(A_12_6 + A_21_6);
-                                        const double tmp392 = w4*(-A_12_1 - A_21_1);
-                                        const double tmp393 = w2*(A_02_3 + A_20_6);
-                                        const double tmp394 = w1*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
-                                        const double tmp395 = w16*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
-                                        const double tmp396 = w17*(-A_02_4 - A_20_1);
-                                        const double tmp397 = w18*(-A_12_5 - A_21_3);
-                                        const double tmp398 = w10*(A_12_3 + A_12_4 + A_21_2 + A_21_5);
-                                        const double tmp399 = w1*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
-                                        const double tmp400 = w4*(A_12_2 + A_21_4);
-                                        const double tmp401 = w16*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
-                                        const double tmp402 = w20*(-A_01_2 + A_10_3);
-                                        const double tmp403 = w21*(-A_01_5 + A_10_4);
-                                        const double tmp404 = w17*(-A_02_5 + A_20_4);
-                                        const double tmp405 = w2*(A_02_2 - A_20_3);
-                                        const double tmp406 = w18*(-A_12_0 + A_21_4);
-                                        const double tmp407 = w4*(A_12_7 - A_21_3);
-                                        const double tmp408 = w17*(-A_02_0 + A_20_4);
-                                        const double tmp409 = w2*(A_02_7 - A_20_3);
-                                        const double tmp410 = w17*(A_02_5 + A_20_5);
-                                        const double tmp411 = w2*(-A_02_2 - A_20_2);
-                                        const double tmp412 = w20*(A_01_2 + A_10_2);
-                                        const double tmp413 = w21*(A_01_5 + A_10_5);
-                                        const double tmp414 = w18*(-A_12_5 - A_21_5);
-                                        const double tmp415 = w4*(A_12_2 + A_21_2);
-                                        const double tmp416 = w12*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
-                                        const double tmp417 = w6*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
-                                        const double tmp418 = w17*(A_02_0 + A_20_5);
-                                        const double tmp419 = w2*(-A_02_7 - A_20_2);
-                                        const double tmp420 = w18*(-A_12_4 - A_21_2);
-                                        const double tmp421 = w10*(A_12_2 + A_12_5 + A_21_3 + A_21_4);
-                                        const double tmp422 = w3*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
-                                        const double tmp423 = w1*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
-                                        const double tmp424 = w25*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
-                                        const double tmp425 = w4*(A_12_3 + A_21_5);
-                                        const double tmp426 = w15*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
-                                        const double tmp427 = w16*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
-                                        const double tmp428 = w9*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
-                                        const double tmp429 = w5*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
-                                        const double tmp430 = w23*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
-                                        const double tmp431 = w18*(A_12_5 - A_21_7);
-                                        const double tmp432 = w10*(-A_12_3 - A_12_4 + A_21_1 + A_21_6);
-                                        const double tmp433 = w21*(A_01_7 - A_10_5);
-                                        const double tmp434 = w20*(A_01_0 - A_10_2);
-                                        const double tmp435 = w4*(-A_12_2 + A_21_0);
-                                        const double tmp436 = w8*(-A_01_3 - A_01_4 + A_10_1 + A_10_6);
-                                        const double tmp437 = w2*(-A_02_4 + A_20_5);
-                                        const double tmp438 = w20*(A_01_4 - A_10_5);
-                                        const double tmp439 = w21*(A_01_3 - A_10_2);
-                                        const double tmp440 = w16*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
-                                        const double tmp441 = w1*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
-                                        const double tmp442 = w17*(A_02_3 - A_20_2);
-                                        const double tmp443 = w20*(-A_01_4 - A_10_7);
-                                        const double tmp444 = w21*(-A_01_3 - A_10_0);
-                                        const double tmp445 = w18*(A_12_6 + A_21_0);
-                                        const double tmp446 = w10*(-A_12_0 - A_12_7 - A_21_1 - A_21_6);
-                                        const double tmp447 = w1*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
-                                        const double tmp448 = w4*(-A_12_1 - A_21_7);
-                                        const double tmp449 = w16*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
-                                        const double tmp450 = w2*(A_02_7 - A_20_6);
-                                        const double tmp451 = w6*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
-                                        const double tmp452 = w20*(A_01_7 - A_10_6);
-                                        const double tmp453 = w21*(A_01_0 - A_10_1);
-                                        const double tmp454 = w12*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
-                                        const double tmp455 = w29*(A_00_0 + A_00_1);
-                                        const double tmp456 = w28*(A_00_6 + A_00_7);
-                                        const double tmp457 = w17*(-A_02_0 + A_20_1);
-                                        const double tmp458 = w21*(-A_01_7 - A_10_4);
-                                        const double tmp459 = w20*(-A_01_0 - A_10_3);
-                                        const double tmp460 = w12*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
-                                        const double tmp461 = w6*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
-                                        const double tmp462 = w18*(A_12_1 + A_21_7);
-                                        const double tmp463 = w4*(-A_12_6 - A_21_0);
-                                        const double tmp464 = w15*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
-                                        const double tmp465 = w5*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
-                                        const double tmp466 = w2*(-A_02_6 + A_20_7);
-                                        const double tmp467 = w20*(-A_01_6 + A_10_7);
-                                        const double tmp468 = w21*(-A_01_1 + A_10_0);
-                                        const double tmp469 = w17*(A_02_1 - A_20_0);
-                                        const double tmp470 = w6*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
-                                        const double tmp471 = w1*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
-                                        const double tmp472 = w15*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
-                                        const double tmp473 = w16*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
-                                        const double tmp474 = w12*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
-                                        const double tmp475 = w5*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
-                                        const double tmp476 = w18*(-A_12_6 + A_21_4);
-                                        const double tmp477 = w20*(A_01_3 - A_10_1);
-                                        const double tmp478 = w10*(A_12_0 + A_12_7 - A_21_2 - A_21_5);
-                                        const double tmp479 = w4*(A_12_1 - A_21_3);
-                                        const double tmp480 = w21*(A_01_4 - A_10_6);
-                                        const double tmp481 = w8*(-A_01_0 - A_01_7 + A_10_2 + A_10_5);
-                                        const double tmp482 = w6*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
-                                        const double tmp483 = w12*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
-                                        const double tmp484 = w15*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
-                                        const double tmp485 = w5*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
-                                        const double tmp486 = w18*(-A_12_1 + A_21_3);
-                                        const double tmp487 = w20*(A_01_4 - A_10_6);
-                                        const double tmp488 = w4*(A_12_6 - A_21_4);
-                                        const double tmp489 = w21*(A_01_3 - A_10_1);
-                                        const double tmp490 = w20*(A_01_7 - A_10_5);
-                                        const double tmp491 = w18*(A_12_2 - A_21_0);
-                                        const double tmp492 = w4*(-A_12_5 + A_21_7);
-                                        const double tmp493 = w21*(A_01_0 - A_10_2);
-                                        const double tmp494 = w20*(A_01_1 + A_10_2);
-                                        const double tmp495 = w21*(A_01_6 + A_10_5);
-                                        const double tmp496 = w18*(-A_12_2 - A_21_4);
-                                        const double tmp497 = w4*(A_12_5 + A_21_3);
-                                        const double tmp498 = w15*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
-                                        const double tmp499 = w5*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
-                                        const double tmp500 = w18*(-A_12_6 + A_21_2);
-                                        const double tmp501 = w4*(A_12_1 - A_21_5);
-                                        const double tmp502 = w17*(A_02_6 - A_20_2);
-                                        const double tmp503 = w2*(-A_02_1 + A_20_5);
-                                        const double tmp504 = w18*(-A_12_3 - A_21_5);
-                                        const double tmp505 = w4*(A_12_4 + A_21_2);
-                                        const double tmp506 = w2*(A_02_6 + A_20_3);
-                                        const double tmp507 = w17*(-A_02_1 - A_20_4);
-                                        const double tmp508 = w18*(A_12_0 + A_21_6);
-                                        const double tmp509 = w4*(-A_12_7 - A_21_1);
+                                        const Scalar A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,3,numComp,3)];
+                                        const Scalar A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,3,numComp,3)];
+                                        const Scalar A_02_0 = A_p[INDEX5(k,0,m,2,0,numEq,3,numComp,3)];
+                                        const Scalar A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,3,numComp,3)];
+                                        const Scalar A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,3,numComp,3)];
+                                        const Scalar A_12_0 = A_p[INDEX5(k,1,m,2,0,numEq,3,numComp,3)];
+                                        const Scalar A_20_0 = A_p[INDEX5(k,2,m,0,0,numEq,3,numComp,3)];
+                                        const Scalar A_21_0 = A_p[INDEX5(k,2,m,1,0,numEq,3,numComp,3)];
+                                        const Scalar A_22_0 = A_p[INDEX5(k,2,m,2,0,numEq,3,numComp,3)];
+                                        const Scalar A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,3,numComp,3)];
+                                        const Scalar A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,3,numComp,3)];
+                                        const Scalar A_02_1 = A_p[INDEX5(k,0,m,2,1,numEq,3,numComp,3)];
+                                        const Scalar A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,3,numComp,3)];
+                                        const Scalar A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,3,numComp,3)];
+                                        const Scalar A_12_1 = A_p[INDEX5(k,1,m,2,1,numEq,3,numComp,3)];
+                                        const Scalar A_20_1 = A_p[INDEX5(k,2,m,0,1,numEq,3,numComp,3)];
+                                        const Scalar A_21_1 = A_p[INDEX5(k,2,m,1,1,numEq,3,numComp,3)];
+                                        const Scalar A_22_1 = A_p[INDEX5(k,2,m,2,1,numEq,3,numComp,3)];
+                                        const Scalar A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,3,numComp,3)];
+                                        const Scalar A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,3,numComp,3)];
+                                        const Scalar A_02_2 = A_p[INDEX5(k,0,m,2,2,numEq,3,numComp,3)];
+                                        const Scalar A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,3,numComp,3)];
+                                        const Scalar A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,3,numComp,3)];
+                                        const Scalar A_12_2 = A_p[INDEX5(k,1,m,2,2,numEq,3,numComp,3)];
+                                        const Scalar A_20_2 = A_p[INDEX5(k,2,m,0,2,numEq,3,numComp,3)];
+                                        const Scalar A_21_2 = A_p[INDEX5(k,2,m,1,2,numEq,3,numComp,3)];
+                                        const Scalar A_22_2 = A_p[INDEX5(k,2,m,2,2,numEq,3,numComp,3)];
+                                        const Scalar A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,3,numComp,3)];
+                                        const Scalar A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,3,numComp,3)];
+                                        const Scalar A_02_3 = A_p[INDEX5(k,0,m,2,3,numEq,3,numComp,3)];
+                                        const Scalar A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,3,numComp,3)];
+                                        const Scalar A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,3,numComp,3)];
+                                        const Scalar A_12_3 = A_p[INDEX5(k,1,m,2,3,numEq,3,numComp,3)];
+                                        const Scalar A_20_3 = A_p[INDEX5(k,2,m,0,3,numEq,3,numComp,3)];
+                                        const Scalar A_21_3 = A_p[INDEX5(k,2,m,1,3,numEq,3,numComp,3)];
+                                        const Scalar A_22_3 = A_p[INDEX5(k,2,m,2,3,numEq,3,numComp,3)];
+                                        const Scalar A_00_4 = A_p[INDEX5(k,0,m,0,4,numEq,3,numComp,3)];
+                                        const Scalar A_01_4 = A_p[INDEX5(k,0,m,1,4,numEq,3,numComp,3)];
+                                        const Scalar A_02_4 = A_p[INDEX5(k,0,m,2,4,numEq,3,numComp,3)];
+                                        const Scalar A_10_4 = A_p[INDEX5(k,1,m,0,4,numEq,3,numComp,3)];
+                                        const Scalar A_11_4 = A_p[INDEX5(k,1,m,1,4,numEq,3,numComp,3)];
+                                        const Scalar A_12_4 = A_p[INDEX5(k,1,m,2,4,numEq,3,numComp,3)];
+                                        const Scalar A_20_4 = A_p[INDEX5(k,2,m,0,4,numEq,3,numComp,3)];
+                                        const Scalar A_21_4 = A_p[INDEX5(k,2,m,1,4,numEq,3,numComp,3)];
+                                        const Scalar A_22_4 = A_p[INDEX5(k,2,m,2,4,numEq,3,numComp,3)];
+                                        const Scalar A_00_5 = A_p[INDEX5(k,0,m,0,5,numEq,3,numComp,3)];
+                                        const Scalar A_01_5 = A_p[INDEX5(k,0,m,1,5,numEq,3,numComp,3)];
+                                        const Scalar A_02_5 = A_p[INDEX5(k,0,m,2,5,numEq,3,numComp,3)];
+                                        const Scalar A_10_5 = A_p[INDEX5(k,1,m,0,5,numEq,3,numComp,3)];
+                                        const Scalar A_11_5 = A_p[INDEX5(k,1,m,1,5,numEq,3,numComp,3)];
+                                        const Scalar A_12_5 = A_p[INDEX5(k,1,m,2,5,numEq,3,numComp,3)];
+                                        const Scalar A_20_5 = A_p[INDEX5(k,2,m,0,5,numEq,3,numComp,3)];
+                                        const Scalar A_21_5 = A_p[INDEX5(k,2,m,1,5,numEq,3,numComp,3)];
+                                        const Scalar A_22_5 = A_p[INDEX5(k,2,m,2,5,numEq,3,numComp,3)];
+                                        const Scalar A_00_6 = A_p[INDEX5(k,0,m,0,6,numEq,3,numComp,3)];
+                                        const Scalar A_01_6 = A_p[INDEX5(k,0,m,1,6,numEq,3,numComp,3)];
+                                        const Scalar A_02_6 = A_p[INDEX5(k,0,m,2,6,numEq,3,numComp,3)];
+                                        const Scalar A_10_6 = A_p[INDEX5(k,1,m,0,6,numEq,3,numComp,3)];
+                                        const Scalar A_11_6 = A_p[INDEX5(k,1,m,1,6,numEq,3,numComp,3)];
+                                        const Scalar A_12_6 = A_p[INDEX5(k,1,m,2,6,numEq,3,numComp,3)];
+                                        const Scalar A_20_6 = A_p[INDEX5(k,2,m,0,6,numEq,3,numComp,3)];
+                                        const Scalar A_21_6 = A_p[INDEX5(k,2,m,1,6,numEq,3,numComp,3)];
+                                        const Scalar A_22_6 = A_p[INDEX5(k,2,m,2,6,numEq,3,numComp,3)];
+                                        const Scalar A_00_7 = A_p[INDEX5(k,0,m,0,7,numEq,3,numComp,3)];
+                                        const Scalar A_01_7 = A_p[INDEX5(k,0,m,1,7,numEq,3,numComp,3)];
+                                        const Scalar A_02_7 = A_p[INDEX5(k,0,m,2,7,numEq,3,numComp,3)];
+                                        const Scalar A_10_7 = A_p[INDEX5(k,1,m,0,7,numEq,3,numComp,3)];
+                                        const Scalar A_11_7 = A_p[INDEX5(k,1,m,1,7,numEq,3,numComp,3)];
+                                        const Scalar A_12_7 = A_p[INDEX5(k,1,m,2,7,numEq,3,numComp,3)];
+                                        const Scalar A_20_7 = A_p[INDEX5(k,2,m,0,7,numEq,3,numComp,3)];
+                                        const Scalar A_21_7 = A_p[INDEX5(k,2,m,1,7,numEq,3,numComp,3)];
+                                        const Scalar A_22_7 = A_p[INDEX5(k,2,m,2,7,numEq,3,numComp,3)];
+                                        const Scalar tmp0 = w18*(-A_12_7 + A_21_3);
+                                        const Scalar tmp1 = w13*(A_22_1 + A_22_2 + A_22_5 + A_22_6);
+                                        const Scalar tmp2 = w11*(-A_02_2 - A_02_5 + A_20_1 + A_20_6);
+                                        const Scalar tmp3 = w14*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
+                                        const Scalar tmp4 = w7*(A_22_0 + A_22_4);
+                                        const Scalar tmp5 = w10*(A_12_1 + A_12_6 - A_21_2 - A_21_5);
+                                        const Scalar tmp6 = w3*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
+                                        const Scalar tmp7 = w1*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
+                                        const Scalar tmp8 = w4*(A_12_0 - A_21_4);
+                                        const Scalar tmp9 = w15*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
+                                        const Scalar tmp10 = w0*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
+                                        const Scalar tmp11 = w16*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
+                                        const Scalar tmp12 = w9*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
+                                        const Scalar tmp13 = w12*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
+                                        const Scalar tmp14 = w5*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
+                                        const Scalar tmp15 = w8*(A_01_1 + A_01_2 + A_01_5 + A_01_6 + A_10_1 + A_10_2 + A_10_5 + A_10_6);
+                                        const Scalar tmp16 = w6*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
+                                        const Scalar tmp17 = w19*(A_22_3 + A_22_7);
+                                        const Scalar tmp18 = w17*(-A_02_7 + A_20_3);
+                                        const Scalar tmp19 = w2*(A_02_0 - A_20_4);
+                                        const Scalar tmp20 = w13*(-A_22_0 - A_22_1 - A_22_2 - A_22_3 - A_22_4 - A_22_5 - A_22_6 - A_22_7);
+                                        const Scalar tmp21 = w11*(-A_02_1 - A_02_3 - A_02_4 - A_02_6 + A_20_0 + A_20_2 + A_20_5 + A_20_7);
+                                        const Scalar tmp22 = w14*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                        const Scalar tmp23 = w20*(A_01_2 + A_10_1);
+                                        const Scalar tmp24 = w10*(A_12_2 + A_12_3 + A_12_4 + A_12_5 - A_21_0 - A_21_1 - A_21_6 - A_21_7);
+                                        const Scalar tmp25 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                                        const Scalar tmp26 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                                        const Scalar tmp27 = w15*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
+                                        const Scalar tmp28 = w0*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                                        const Scalar tmp29 = w16*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
+                                        const Scalar tmp30 = w9*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                        const Scalar tmp31 = w21*(A_01_5 + A_10_6);
+                                        const Scalar tmp32 = w12*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
+                                        const Scalar tmp33 = w5*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
+                                        const Scalar tmp34 = w8*(-A_01_1 - A_01_6 - A_10_2 - A_10_5);
+                                        const Scalar tmp35 = w6*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
+                                        const Scalar tmp36 = w20*(-A_01_6 + A_10_4);
+                                        const Scalar tmp37 = w18*(A_12_3 - A_21_1);
+                                        const Scalar tmp38 = w11*(-A_02_0 - A_02_2 - A_02_5 - A_02_7 - A_20_0 - A_20_2 - A_20_5 - A_20_7);
+                                        const Scalar tmp39 = w14*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                                        const Scalar tmp40 = w26*(A_11_4 + A_11_6);
+                                        const Scalar tmp41 = w0*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                        const Scalar tmp42 = w10*(-A_12_2 - A_12_5 + A_21_0 + A_21_7);
+                                        const Scalar tmp43 = w22*(A_11_0 + A_11_2 + A_11_5 + A_11_7);
+                                        const Scalar tmp44 = w1*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
+                                        const Scalar tmp45 = w25*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
+                                        const Scalar tmp46 = w4*(-A_12_4 + A_21_6);
+                                        const Scalar tmp47 = w15*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
+                                        const Scalar tmp48 = w21*(-A_01_1 + A_10_3);
+                                        const Scalar tmp49 = w16*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                                        const Scalar tmp50 = w5*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
+                                        const Scalar tmp51 = w12*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
+                                        const Scalar tmp52 = w24*(A_11_1 + A_11_3);
+                                        const Scalar tmp53 = w8*(A_01_2 + A_01_5 - A_10_0 - A_10_7);
+                                        const Scalar tmp54 = w6*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
+                                        const Scalar tmp55 = w23*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
+                                        const Scalar tmp56 = w18*(A_12_4 - A_21_6);
+                                        const Scalar tmp57 = w14*(A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                        const Scalar tmp58 = w26*(A_11_1 + A_11_3);
+                                        const Scalar tmp59 = w20*(-A_01_1 + A_10_3);
+                                        const Scalar tmp60 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
+                                        const Scalar tmp61 = w25*(A_22_0 + A_22_2 + A_22_4 + A_22_6);
+                                        const Scalar tmp62 = w4*(-A_12_3 + A_21_1);
+                                        const Scalar tmp63 = w15*(-A_02_4 - A_02_6 - A_20_4 - A_20_6);
+                                        const Scalar tmp64 = w0*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
+                                        const Scalar tmp65 = w16*(A_01_4 + A_01_7 - A_10_5 - A_10_6);
+                                        const Scalar tmp66 = w24*(A_11_4 + A_11_6);
+                                        const Scalar tmp67 = w21*(-A_01_6 + A_10_4);
+                                        const Scalar tmp68 = w12*(A_12_0 + A_12_6 - A_21_2 - A_21_4);
+                                        const Scalar tmp69 = w5*(-A_02_1 - A_02_3 - A_20_1 - A_20_3);
+                                        const Scalar tmp70 = w6*(A_12_1 + A_12_7 - A_21_3 - A_21_5);
+                                        const Scalar tmp71 = w23*(A_22_1 + A_22_3 + A_22_5 + A_22_7);
+                                        const Scalar tmp72 = w20*(A_01_5 + A_10_6);
+                                        const Scalar tmp73 = w14*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
+                                        const Scalar tmp74 = w0*(-A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                        const Scalar tmp75 = w3*(-A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                        const Scalar tmp76 = w1*(-A_01_4 - A_01_7 - A_10_4 - A_10_7);
+                                        const Scalar tmp77 = w15*(-A_02_0 - A_02_2 + A_20_1 + A_20_3);
+                                        const Scalar tmp78 = w21*(A_01_2 + A_10_1);
+                                        const Scalar tmp79 = w16*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
+                                        const Scalar tmp80 = w9*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
+                                        const Scalar tmp81 = w12*(-A_12_0 - A_12_1 + A_21_2 + A_21_3);
+                                        const Scalar tmp82 = w5*(-A_02_5 - A_02_7 + A_20_4 + A_20_6);
+                                        const Scalar tmp83 = w6*(-A_12_6 - A_12_7 + A_21_4 + A_21_5);
+                                        const Scalar tmp84 = w6*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
+                                        const Scalar tmp85 = w11*(A_02_1 + A_02_6 - A_20_0 - A_20_7);
+                                        const Scalar tmp86 = w20*(A_01_3 - A_10_2);
+                                        const Scalar tmp87 = w10*(A_12_0 + A_12_1 + A_12_6 + A_12_7 + A_21_0 + A_21_1 + A_21_6 + A_21_7);
+                                        const Scalar tmp88 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                                        const Scalar tmp89 = w23*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
+                                        const Scalar tmp90 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                                        const Scalar tmp91 = w25*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
+                                        const Scalar tmp92 = w15*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
+                                        const Scalar tmp93 = w21*(A_01_4 - A_10_5);
+                                        const Scalar tmp94 = w16*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
+                                        const Scalar tmp95 = w28*(A_00_2 + A_00_3);
+                                        const Scalar tmp96 = w12*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
+                                        const Scalar tmp97 = w29*(A_00_4 + A_00_5);
+                                        const Scalar tmp98 = w5*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
+                                        const Scalar tmp99 = w8*(-A_01_0 - A_01_7 + A_10_1 + A_10_6);
+                                        const Scalar tmp100 = w9*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                        const Scalar tmp101 = w27*(A_00_0 + A_00_1 + A_00_6 + A_00_7);
+                                        const Scalar tmp102 = w17*(A_02_4 - A_20_5);
+                                        const Scalar tmp103 = w2*(-A_02_3 + A_20_2);
+                                        const Scalar tmp104 = w13*(A_22_0 + A_22_1 + A_22_2 + A_22_3 + A_22_4 + A_22_5 + A_22_6 + A_22_7);
+                                        const Scalar tmp105 = w6*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
+                                        const Scalar tmp106 = w22*(A_11_0 + A_11_1 + A_11_2 + A_11_3 + A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                        const Scalar tmp107 = w1*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
+                                        const Scalar tmp108 = w15*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
+                                        const Scalar tmp109 = w16*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
+                                        const Scalar tmp110 = w12*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
+                                        const Scalar tmp111 = w5*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
+                                        const Scalar tmp112 = w8*(-A_01_0 - A_01_3 - A_01_4 - A_01_7 - A_10_0 - A_10_3 - A_10_4 - A_10_7);
+                                        const Scalar tmp113 = w27*(A_00_0 + A_00_1 + A_00_2 + A_00_3 + A_00_4 + A_00_5 + A_00_6 + A_00_7);
+                                        const Scalar tmp114 = w11*(A_02_0 + A_02_2 + A_02_5 + A_02_7 - A_20_1 - A_20_3 - A_20_4 - A_20_6);
+                                        const Scalar tmp115 = w21*(-A_01_4 - A_10_7);
+                                        const Scalar tmp116 = w20*(-A_01_3 - A_10_0);
+                                        const Scalar tmp117 = w15*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
+                                        const Scalar tmp118 = w16*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
+                                        const Scalar tmp119 = w5*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
+                                        const Scalar tmp120 = w8*(A_01_0 + A_01_7 + A_10_3 + A_10_4);
+                                        const Scalar tmp121 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                                        const Scalar tmp122 = w18*(A_12_2 - A_21_6);
+                                        const Scalar tmp123 = w13*(A_22_0 + A_22_3 + A_22_4 + A_22_7);
+                                        const Scalar tmp124 = w11*(-A_02_0 - A_02_7 + A_20_3 + A_20_4);
+                                        const Scalar tmp125 = w7*(A_22_1 + A_22_5);
+                                        const Scalar tmp126 = w10*(-A_12_3 - A_12_4 + A_21_0 + A_21_7);
+                                        const Scalar tmp127 = w3*(A_11_1 + A_11_3 + A_11_5 + A_11_7);
+                                        const Scalar tmp128 = w1*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
+                                        const Scalar tmp129 = w4*(-A_12_5 + A_21_1);
+                                        const Scalar tmp130 = w16*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
+                                        const Scalar tmp131 = w9*(A_11_0 + A_11_2 + A_11_4 + A_11_6);
+                                        const Scalar tmp132 = w19*(A_22_2 + A_22_6);
+                                        const Scalar tmp133 = w17*(-A_02_2 + A_20_6);
+                                        const Scalar tmp134 = w2*(A_02_5 - A_20_1);
+                                        const Scalar tmp135 = w11*(A_02_1 + A_02_3 + A_02_4 + A_02_6 + A_20_1 + A_20_3 + A_20_4 + A_20_6);
+                                        const Scalar tmp136 = w1*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
+                                        const Scalar tmp137 = w15*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
+                                        const Scalar tmp138 = w16*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
+                                        const Scalar tmp139 = w5*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
+                                        const Scalar tmp140 = w18*(A_12_5 - A_21_1);
+                                        const Scalar tmp141 = w14*(A_00_0 + A_00_1 + A_00_4 + A_00_5);
+                                        const Scalar tmp142 = w7*(A_22_2 + A_22_6);
+                                        const Scalar tmp143 = w1*(-A_01_2 - A_01_6 - A_10_2 - A_10_6);
+                                        const Scalar tmp144 = w4*(-A_12_2 + A_21_6);
+                                        const Scalar tmp145 = w15*(-A_02_1 - A_02_4 + A_20_0 + A_20_5);
+                                        const Scalar tmp146 = w0*(A_00_2 + A_00_3 + A_00_6 + A_00_7);
+                                        const Scalar tmp147 = w16*(-A_01_1 - A_01_5 - A_10_1 - A_10_5);
+                                        const Scalar tmp148 = w5*(-A_02_3 - A_02_6 + A_20_2 + A_20_7);
+                                        const Scalar tmp149 = w19*(A_22_1 + A_22_5);
+                                        const Scalar tmp150 = w17*(-A_02_5 + A_20_1);
+                                        const Scalar tmp151 = w2*(A_02_2 - A_20_6);
+                                        const Scalar tmp152 = w18*(A_12_3 - A_21_7);
+                                        const Scalar tmp153 = w11*(A_02_1 + A_02_6 - A_20_2 - A_20_5);
+                                        const Scalar tmp154 = w10*(-A_12_2 - A_12_5 + A_21_1 + A_21_6);
+                                        const Scalar tmp155 = w4*(-A_12_4 + A_21_0);
+                                        const Scalar tmp156 = w15*(A_02_2 + A_02_7 - A_20_3 - A_20_6);
+                                        const Scalar tmp157 = w5*(A_02_0 + A_02_5 - A_20_1 - A_20_4);
+                                        const Scalar tmp158 = w17*(A_02_3 - A_20_7);
+                                        const Scalar tmp159 = w2*(-A_02_4 + A_20_0);
+                                        const Scalar tmp160 = w6*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
+                                        const Scalar tmp161 = w10*(-A_12_2 - A_12_3 - A_12_4 - A_12_5 - A_21_2 - A_21_3 - A_21_4 - A_21_5);
+                                        const Scalar tmp162 = w1*(A_01_0 + A_01_4 + A_10_3 + A_10_7);
+                                        const Scalar tmp163 = w16*(A_01_3 + A_01_7 + A_10_0 + A_10_4);
+                                        const Scalar tmp164 = w12*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
+                                        const Scalar tmp165 = w20*(A_01_6 + A_10_5);
+                                        const Scalar tmp166 = w10*(-A_12_0 - A_12_1 - A_12_6 - A_12_7 + A_21_2 + A_21_3 + A_21_4 + A_21_5);
+                                        const Scalar tmp167 = w15*(A_02_1 + A_02_3 - A_20_0 - A_20_2);
+                                        const Scalar tmp168 = w21*(A_01_1 + A_10_2);
+                                        const Scalar tmp169 = w12*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
+                                        const Scalar tmp170 = w5*(A_02_4 + A_02_6 - A_20_5 - A_20_7);
+                                        const Scalar tmp171 = w8*(-A_01_2 - A_01_5 - A_10_1 - A_10_6);
+                                        const Scalar tmp172 = w6*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
+                                        const Scalar tmp173 = w2*(A_02_1 + A_20_4);
+                                        const Scalar tmp174 = w11*(-A_02_3 - A_02_4 - A_20_1 - A_20_6);
+                                        const Scalar tmp175 = w14*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
+                                        const Scalar tmp176 = w22*(-A_11_0 - A_11_1 - A_11_2 - A_11_3 - A_11_4 - A_11_5 - A_11_6 - A_11_7);
+                                        const Scalar tmp177 = w1*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
+                                        const Scalar tmp178 = w25*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
+                                        const Scalar tmp179 = w15*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
+                                        const Scalar tmp180 = w0*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
+                                        const Scalar tmp181 = w16*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
+                                        const Scalar tmp182 = w12*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
+                                        const Scalar tmp183 = w5*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
+                                        const Scalar tmp184 = w8*(A_01_0 + A_01_3 + A_01_4 + A_01_7 - A_10_1 - A_10_2 - A_10_5 - A_10_6);
+                                        const Scalar tmp185 = w6*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
+                                        const Scalar tmp186 = w17*(-A_02_6 - A_20_3);
+                                        const Scalar tmp187 = w23*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
+                                        const Scalar tmp188 = w18*(A_12_4 - A_21_0);
+                                        const Scalar tmp189 = w7*(A_22_3 + A_22_7);
+                                        const Scalar tmp190 = w1*(A_01_3 + A_01_7 + A_10_3 + A_10_7);
+                                        const Scalar tmp191 = w4*(-A_12_3 + A_21_7);
+                                        const Scalar tmp192 = w16*(A_01_0 + A_01_4 + A_10_0 + A_10_4);
+                                        const Scalar tmp193 = w19*(A_22_0 + A_22_4);
+                                        const Scalar tmp194 = w17*(A_02_4 - A_20_0);
+                                        const Scalar tmp195 = w2*(-A_02_3 + A_20_7);
+                                        const Scalar tmp196 = w20*(-A_01_7 - A_10_4);
+                                        const Scalar tmp197 = w21*(-A_01_0 - A_10_3);
+                                        const Scalar tmp198 = w16*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
+                                        const Scalar tmp199 = w8*(A_01_3 + A_01_4 + A_10_0 + A_10_7);
+                                        const Scalar tmp200 = w1*(A_01_5 + A_01_6 + A_10_5 + A_10_6);
+                                        const Scalar tmp201 = w27*(A_00_2 + A_00_3 + A_00_4 + A_00_5);
+                                        const Scalar tmp202 = w11*(-A_02_2 - A_02_5 + A_20_3 + A_20_4);
+                                        const Scalar tmp203 = w20*(A_01_0 - A_10_1);
+                                        const Scalar tmp204 = w23*(A_22_0 + A_22_1 + A_22_4 + A_22_5);
+                                        const Scalar tmp205 = w25*(A_22_2 + A_22_3 + A_22_6 + A_22_7);
+                                        const Scalar tmp206 = w21*(A_01_7 - A_10_6);
+                                        const Scalar tmp207 = w12*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
+                                        const Scalar tmp208 = w28*(A_00_0 + A_00_1);
+                                        const Scalar tmp209 = w29*(A_00_6 + A_00_7);
+                                        const Scalar tmp210 = w8*(-A_01_3 - A_01_4 + A_10_2 + A_10_5);
+                                        const Scalar tmp211 = w6*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
+                                        const Scalar tmp212 = w17*(-A_02_7 + A_20_6);
+                                        const Scalar tmp213 = w2*(A_02_0 - A_20_1);
+                                        const Scalar tmp214 = w13*(-A_22_1 - A_22_2 - A_22_5 - A_22_6);
+                                        const Scalar tmp215 = w22*(-A_11_0 - A_11_2 - A_11_5 - A_11_7);
+                                        const Scalar tmp216 = w8*(A_01_0 + A_01_7 + A_10_0 + A_10_7);
+                                        const Scalar tmp217 = w27*(-A_00_0 - A_00_1 - A_00_6 - A_00_7);
+                                        const Scalar tmp218 = w17*(-A_02_3 - A_20_3);
+                                        const Scalar tmp219 = w2*(A_02_4 + A_20_4);
+                                        const Scalar tmp220 = w11*(-A_02_1 - A_02_6 - A_20_1 - A_20_6);
+                                        const Scalar tmp221 = w26*(-A_11_4 - A_11_6);
+                                        const Scalar tmp222 = w10*(A_12_2 + A_12_5 + A_21_2 + A_21_5);
+                                        const Scalar tmp223 = w20*(-A_01_4 - A_10_4);
+                                        const Scalar tmp224 = w21*(-A_01_3 - A_10_3);
+                                        const Scalar tmp225 = w6*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
+                                        const Scalar tmp226 = w7*(-A_22_0 - A_22_4);
+                                        const Scalar tmp227 = w24*(-A_11_1 - A_11_3);
+                                        const Scalar tmp228 = w19*(-A_22_3 - A_22_7);
+                                        const Scalar tmp229 = w18*(-A_12_3 - A_21_3);
+                                        const Scalar tmp230 = w4*(A_12_4 + A_21_4);
+                                        const Scalar tmp231 = w28*(-A_00_4 - A_00_5);
+                                        const Scalar tmp232 = w12*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
+                                        const Scalar tmp233 = w29*(-A_00_2 - A_00_3);
+                                        const Scalar tmp234 = w20*(-A_01_5 + A_10_7);
+                                        const Scalar tmp235 = w18*(-A_12_0 + A_21_2);
+                                        const Scalar tmp236 = w26*(A_11_5 + A_11_7);
+                                        const Scalar tmp237 = w10*(A_12_1 + A_12_6 - A_21_3 - A_21_4);
+                                        const Scalar tmp238 = w22*(A_11_1 + A_11_3 + A_11_4 + A_11_6);
+                                        const Scalar tmp239 = w4*(A_12_7 - A_21_5);
+                                        const Scalar tmp240 = w15*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
+                                        const Scalar tmp241 = w21*(-A_01_2 + A_10_0);
+                                        const Scalar tmp242 = w5*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
+                                        const Scalar tmp243 = w12*(-A_12_2 - A_12_4 + A_21_0 + A_21_6);
+                                        const Scalar tmp244 = w24*(A_11_0 + A_11_2);
+                                        const Scalar tmp245 = w8*(A_01_1 + A_01_6 - A_10_3 - A_10_4);
+                                        const Scalar tmp246 = w6*(-A_12_3 - A_12_5 + A_21_1 + A_21_7);
+                                        const Scalar tmp247 = w11*(A_02_3 + A_02_4 - A_20_2 - A_20_5);
+                                        const Scalar tmp248 = w20*(-A_01_1 + A_10_0);
+                                        const Scalar tmp249 = w21*(-A_01_6 + A_10_7);
+                                        const Scalar tmp250 = w8*(A_01_2 + A_01_5 - A_10_3 - A_10_4);
+                                        const Scalar tmp251 = w17*(A_02_6 - A_20_7);
+                                        const Scalar tmp252 = w2*(-A_02_1 + A_20_0);
+                                        const Scalar tmp253 = w17*(-A_02_4 - A_20_4);
+                                        const Scalar tmp254 = w2*(A_02_3 + A_20_3);
+                                        const Scalar tmp255 = w26*(-A_11_1 - A_11_3);
+                                        const Scalar tmp256 = w20*(-A_01_3 - A_10_3);
+                                        const Scalar tmp257 = w21*(-A_01_4 - A_10_4);
+                                        const Scalar tmp258 = w6*(-A_12_1 - A_12_7 - A_21_1 - A_21_7);
+                                        const Scalar tmp259 = w7*(-A_22_3 - A_22_7);
+                                        const Scalar tmp260 = w15*(-A_02_0 - A_02_5 - A_20_0 - A_20_5);
+                                        const Scalar tmp261 = w24*(-A_11_4 - A_11_6);
+                                        const Scalar tmp262 = w19*(-A_22_0 - A_22_4);
+                                        const Scalar tmp263 = w18*(-A_12_4 - A_21_4);
+                                        const Scalar tmp264 = w4*(A_12_3 + A_21_3);
+                                        const Scalar tmp265 = w28*(-A_00_2 - A_00_3);
+                                        const Scalar tmp266 = w12*(-A_12_0 - A_12_6 - A_21_0 - A_21_6);
+                                        const Scalar tmp267 = w5*(-A_02_2 - A_02_7 - A_20_2 - A_20_7);
+                                        const Scalar tmp268 = w29*(-A_00_4 - A_00_5);
+                                        const Scalar tmp269 = w11*(A_02_2 + A_02_5 + A_20_0 + A_20_7);
+                                        const Scalar tmp270 = w1*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
+                                        const Scalar tmp271 = w15*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
+                                        const Scalar tmp272 = w16*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
+                                        const Scalar tmp273 = w5*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
+                                        const Scalar tmp274 = w8*(-A_01_1 - A_01_2 - A_01_5 - A_01_6 + A_10_0 + A_10_3 + A_10_4 + A_10_7);
+                                        const Scalar tmp275 = w17*(A_02_7 + A_20_2);
+                                        const Scalar tmp276 = w2*(-A_02_0 - A_20_5);
+                                        const Scalar tmp277 = w18*(-A_12_1 + A_21_5);
+                                        const Scalar tmp278 = w11*(A_02_3 + A_02_4 - A_20_0 - A_20_7);
+                                        const Scalar tmp279 = w10*(A_12_0 + A_12_7 - A_21_3 - A_21_4);
+                                        const Scalar tmp280 = w4*(A_12_6 - A_21_2);
+                                        const Scalar tmp281 = w17*(A_02_1 - A_20_5);
+                                        const Scalar tmp282 = w2*(-A_02_6 + A_20_2);
+                                        const Scalar tmp283 = w11*(A_02_0 + A_02_7 + A_20_2 + A_20_5);
+                                        const Scalar tmp284 = w12*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
+                                        const Scalar tmp285 = w6*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
+                                        const Scalar tmp286 = w17*(A_02_2 + A_20_7);
+                                        const Scalar tmp287 = w2*(-A_02_5 - A_20_0);
+                                        const Scalar tmp288 = w13*(-A_22_0 - A_22_3 - A_22_4 - A_22_7);
+                                        const Scalar tmp289 = w22*(-A_11_1 - A_11_3 - A_11_4 - A_11_6);
+                                        const Scalar tmp290 = w8*(-A_01_1 - A_01_6 - A_10_1 - A_10_6);
+                                        const Scalar tmp291 = w17*(A_02_2 + A_20_2);
+                                        const Scalar tmp292 = w2*(-A_02_5 - A_20_5);
+                                        const Scalar tmp293 = w11*(A_02_0 + A_02_7 + A_20_0 + A_20_7);
+                                        const Scalar tmp294 = w26*(-A_11_5 - A_11_7);
+                                        const Scalar tmp295 = w10*(A_12_3 + A_12_4 + A_21_3 + A_21_4);
+                                        const Scalar tmp296 = w20*(A_01_5 + A_10_5);
+                                        const Scalar tmp297 = w21*(A_01_2 + A_10_2);
+                                        const Scalar tmp298 = w7*(-A_22_1 - A_22_5);
+                                        const Scalar tmp299 = w24*(-A_11_0 - A_11_2);
+                                        const Scalar tmp300 = w19*(-A_22_2 - A_22_6);
+                                        const Scalar tmp301 = w18*(-A_12_2 - A_21_2);
+                                        const Scalar tmp302 = w4*(A_12_5 + A_21_5);
+                                        const Scalar tmp303 = w8*(A_01_3 + A_01_4 + A_10_3 + A_10_4);
+                                        const Scalar tmp304 = w27*(-A_00_2 - A_00_3 - A_00_4 - A_00_5);
+                                        const Scalar tmp305 = w17*(A_02_7 + A_20_7);
+                                        const Scalar tmp306 = w2*(-A_02_0 - A_20_0);
+                                        const Scalar tmp307 = w11*(A_02_2 + A_02_5 + A_20_2 + A_20_5);
+                                        const Scalar tmp308 = w26*(-A_11_0 - A_11_2);
+                                        const Scalar tmp309 = w10*(-A_12_1 - A_12_6 - A_21_1 - A_21_6);
+                                        const Scalar tmp310 = w20*(-A_01_0 - A_10_0);
+                                        const Scalar tmp311 = w21*(-A_01_7 - A_10_7);
+                                        const Scalar tmp312 = w6*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
+                                        const Scalar tmp313 = w24*(-A_11_5 - A_11_7);
+                                        const Scalar tmp314 = w18*(A_12_7 + A_21_7);
+                                        const Scalar tmp315 = w4*(-A_12_0 - A_21_0);
+                                        const Scalar tmp316 = w28*(-A_00_0 - A_00_1);
+                                        const Scalar tmp317 = w12*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
+                                        const Scalar tmp318 = w29*(-A_00_6 - A_00_7);
+                                        const Scalar tmp319 = w18*(-A_12_7 + A_21_5);
+                                        const Scalar tmp320 = w26*(A_11_0 + A_11_2);
+                                        const Scalar tmp321 = w21*(-A_01_5 + A_10_7);
+                                        const Scalar tmp322 = w20*(-A_01_2 + A_10_0);
+                                        const Scalar tmp323 = w4*(A_12_0 - A_21_2);
+                                        const Scalar tmp324 = w15*(A_02_5 + A_02_7 + A_20_5 + A_20_7);
+                                        const Scalar tmp325 = w24*(A_11_5 + A_11_7);
+                                        const Scalar tmp326 = w5*(A_02_0 + A_02_2 + A_20_0 + A_20_2);
+                                        const Scalar tmp327 = w18*(A_12_7 + A_21_1);
+                                        const Scalar tmp328 = w10*(-A_12_1 - A_12_6 - A_21_0 - A_21_7);
+                                        const Scalar tmp329 = w3*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
+                                        const Scalar tmp330 = w1*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
+                                        const Scalar tmp331 = w4*(-A_12_0 - A_21_6);
+                                        const Scalar tmp332 = w25*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
+                                        const Scalar tmp333 = w15*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
+                                        const Scalar tmp334 = w16*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
+                                        const Scalar tmp335 = w9*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
+                                        const Scalar tmp336 = w5*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
+                                        const Scalar tmp337 = w27*(-A_00_0 - A_00_1 - A_00_2 - A_00_3 - A_00_4 - A_00_5 - A_00_6 - A_00_7);
+                                        const Scalar tmp338 = w23*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
+                                        const Scalar tmp339 = w14*(-A_00_0 - A_00_1 - A_00_4 - A_00_5);
+                                        const Scalar tmp340 = w23*(-A_22_2 - A_22_3 - A_22_6 - A_22_7);
+                                        const Scalar tmp341 = w1*(A_01_2 + A_01_6 - A_10_3 - A_10_7);
+                                        const Scalar tmp342 = w25*(-A_22_0 - A_22_1 - A_22_4 - A_22_5);
+                                        const Scalar tmp343 = w15*(A_02_1 + A_02_4 + A_20_1 + A_20_4);
+                                        const Scalar tmp344 = w0*(-A_00_2 - A_00_3 - A_00_6 - A_00_7);
+                                        const Scalar tmp345 = w16*(A_01_1 + A_01_5 - A_10_0 - A_10_4);
+                                        const Scalar tmp346 = w12*(A_12_4 + A_12_5 - A_21_0 - A_21_1);
+                                        const Scalar tmp347 = w5*(A_02_3 + A_02_6 + A_20_3 + A_20_6);
+                                        const Scalar tmp348 = w6*(A_12_2 + A_12_3 - A_21_6 - A_21_7);
+                                        const Scalar tmp349 = w17*(A_02_5 + A_20_0);
+                                        const Scalar tmp350 = w2*(-A_02_2 - A_20_7);
+                                        const Scalar tmp351 = w8*(-A_01_2 - A_01_5 - A_10_2 - A_10_5);
+                                        const Scalar tmp352 = w17*(-A_02_1 - A_20_1);
+                                        const Scalar tmp353 = w2*(A_02_6 + A_20_6);
+                                        const Scalar tmp354 = w11*(-A_02_3 - A_02_4 - A_20_3 - A_20_4);
+                                        const Scalar tmp355 = w10*(-A_12_0 - A_12_7 - A_21_0 - A_21_7);
+                                        const Scalar tmp356 = w20*(A_01_6 + A_10_6);
+                                        const Scalar tmp357 = w21*(A_01_1 + A_10_1);
+                                        const Scalar tmp358 = w7*(-A_22_2 - A_22_6);
+                                        const Scalar tmp359 = w19*(-A_22_1 - A_22_5);
+                                        const Scalar tmp360 = w18*(A_12_1 + A_21_1);
+                                        const Scalar tmp361 = w4*(-A_12_6 - A_21_6);
+                                        const Scalar tmp362 = w28*(-A_00_6 - A_00_7);
+                                        const Scalar tmp363 = w29*(-A_00_0 - A_00_1);
+                                        const Scalar tmp364 = w2*(A_02_4 + A_20_1);
+                                        const Scalar tmp365 = w11*(-A_02_1 - A_02_6 - A_20_3 - A_20_4);
+                                        const Scalar tmp366 = w17*(-A_02_3 - A_20_6);
+                                        const Scalar tmp367 = w2*(A_02_5 - A_20_4);
+                                        const Scalar tmp368 = w6*(-A_12_4 - A_12_5 - A_21_4 - A_21_5);
+                                        const Scalar tmp369 = w11*(-A_02_0 - A_02_7 + A_20_1 + A_20_6);
+                                        const Scalar tmp370 = w20*(-A_01_5 + A_10_4);
+                                        const Scalar tmp371 = w3*(A_11_4 + A_11_5 + A_11_6 + A_11_7);
+                                        const Scalar tmp372 = w12*(-A_12_2 - A_12_3 - A_21_2 - A_21_3);
+                                        const Scalar tmp373 = w21*(-A_01_2 + A_10_3);
+                                        const Scalar tmp374 = w9*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
+                                        const Scalar tmp375 = w29*(A_00_2 + A_00_3);
+                                        const Scalar tmp376 = w8*(A_01_1 + A_01_6 - A_10_0 - A_10_7);
+                                        const Scalar tmp377 = w28*(A_00_4 + A_00_5);
+                                        const Scalar tmp378 = w17*(-A_02_2 + A_20_3);
+                                        const Scalar tmp379 = w17*(A_02_0 + A_20_0);
+                                        const Scalar tmp380 = w2*(-A_02_7 - A_20_7);
+                                        const Scalar tmp381 = w20*(-A_01_7 - A_10_7);
+                                        const Scalar tmp382 = w21*(-A_01_0 - A_10_0);
+                                        const Scalar tmp383 = w6*(A_12_3 + A_12_5 + A_21_3 + A_21_5);
+                                        const Scalar tmp384 = w18*(A_12_0 + A_21_0);
+                                        const Scalar tmp385 = w4*(-A_12_7 - A_21_7);
+                                        const Scalar tmp386 = w12*(A_12_2 + A_12_4 + A_21_2 + A_21_4);
+                                        const Scalar tmp387 = w17*(-A_02_6 - A_20_6);
+                                        const Scalar tmp388 = w2*(A_02_1 + A_20_1);
+                                        const Scalar tmp389 = w20*(A_01_1 + A_10_1);
+                                        const Scalar tmp390 = w21*(A_01_6 + A_10_6);
+                                        const Scalar tmp391 = w18*(A_12_6 + A_21_6);
+                                        const Scalar tmp392 = w4*(-A_12_1 - A_21_1);
+                                        const Scalar tmp393 = w2*(A_02_3 + A_20_6);
+                                        const Scalar tmp394 = w1*(-A_01_3 - A_01_7 + A_10_2 + A_10_6);
+                                        const Scalar tmp395 = w16*(-A_01_0 - A_01_4 + A_10_1 + A_10_5);
+                                        const Scalar tmp396 = w17*(-A_02_4 - A_20_1);
+                                        const Scalar tmp397 = w18*(-A_12_5 - A_21_3);
+                                        const Scalar tmp398 = w10*(A_12_3 + A_12_4 + A_21_2 + A_21_5);
+                                        const Scalar tmp399 = w1*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
+                                        const Scalar tmp400 = w4*(A_12_2 + A_21_4);
+                                        const Scalar tmp401 = w16*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
+                                        const Scalar tmp402 = w20*(-A_01_2 + A_10_3);
+                                        const Scalar tmp403 = w21*(-A_01_5 + A_10_4);
+                                        const Scalar tmp404 = w17*(-A_02_5 + A_20_4);
+                                        const Scalar tmp405 = w2*(A_02_2 - A_20_3);
+                                        const Scalar tmp406 = w18*(-A_12_0 + A_21_4);
+                                        const Scalar tmp407 = w4*(A_12_7 - A_21_3);
+                                        const Scalar tmp408 = w17*(-A_02_0 + A_20_4);
+                                        const Scalar tmp409 = w2*(A_02_7 - A_20_3);
+                                        const Scalar tmp410 = w17*(A_02_5 + A_20_5);
+                                        const Scalar tmp411 = w2*(-A_02_2 - A_20_2);
+                                        const Scalar tmp412 = w20*(A_01_2 + A_10_2);
+                                        const Scalar tmp413 = w21*(A_01_5 + A_10_5);
+                                        const Scalar tmp414 = w18*(-A_12_5 - A_21_5);
+                                        const Scalar tmp415 = w4*(A_12_2 + A_21_2);
+                                        const Scalar tmp416 = w12*(-A_12_0 - A_12_1 + A_21_4 + A_21_5);
+                                        const Scalar tmp417 = w6*(-A_12_6 - A_12_7 + A_21_2 + A_21_3);
+                                        const Scalar tmp418 = w17*(A_02_0 + A_20_5);
+                                        const Scalar tmp419 = w2*(-A_02_7 - A_20_2);
+                                        const Scalar tmp420 = w18*(-A_12_4 - A_21_2);
+                                        const Scalar tmp421 = w10*(A_12_2 + A_12_5 + A_21_3 + A_21_4);
+                                        const Scalar tmp422 = w3*(-A_11_1 - A_11_3 - A_11_5 - A_11_7);
+                                        const Scalar tmp423 = w1*(A_01_1 + A_01_5 - A_10_3 - A_10_7);
+                                        const Scalar tmp424 = w25*(-A_22_0 - A_22_2 - A_22_4 - A_22_6);
+                                        const Scalar tmp425 = w4*(A_12_3 + A_21_5);
+                                        const Scalar tmp426 = w15*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
+                                        const Scalar tmp427 = w16*(A_01_2 + A_01_6 - A_10_0 - A_10_4);
+                                        const Scalar tmp428 = w9*(-A_11_0 - A_11_2 - A_11_4 - A_11_6);
+                                        const Scalar tmp429 = w5*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
+                                        const Scalar tmp430 = w23*(-A_22_1 - A_22_3 - A_22_5 - A_22_7);
+                                        const Scalar tmp431 = w18*(A_12_5 - A_21_7);
+                                        const Scalar tmp432 = w10*(-A_12_3 - A_12_4 + A_21_1 + A_21_6);
+                                        const Scalar tmp433 = w21*(A_01_7 - A_10_5);
+                                        const Scalar tmp434 = w20*(A_01_0 - A_10_2);
+                                        const Scalar tmp435 = w4*(-A_12_2 + A_21_0);
+                                        const Scalar tmp436 = w8*(-A_01_3 - A_01_4 + A_10_1 + A_10_6);
+                                        const Scalar tmp437 = w2*(-A_02_4 + A_20_5);
+                                        const Scalar tmp438 = w20*(A_01_4 - A_10_5);
+                                        const Scalar tmp439 = w21*(A_01_3 - A_10_2);
+                                        const Scalar tmp440 = w16*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
+                                        const Scalar tmp441 = w1*(-A_01_5 - A_01_6 + A_10_4 + A_10_7);
+                                        const Scalar tmp442 = w17*(A_02_3 - A_20_2);
+                                        const Scalar tmp443 = w20*(-A_01_4 - A_10_7);
+                                        const Scalar tmp444 = w21*(-A_01_3 - A_10_0);
+                                        const Scalar tmp445 = w18*(A_12_6 + A_21_0);
+                                        const Scalar tmp446 = w10*(-A_12_0 - A_12_7 - A_21_1 - A_21_6);
+                                        const Scalar tmp447 = w1*(-A_01_3 - A_01_7 + A_10_1 + A_10_5);
+                                        const Scalar tmp448 = w4*(-A_12_1 - A_21_7);
+                                        const Scalar tmp449 = w16*(-A_01_0 - A_01_4 + A_10_2 + A_10_6);
+                                        const Scalar tmp450 = w2*(A_02_7 - A_20_6);
+                                        const Scalar tmp451 = w6*(A_12_6 + A_12_7 + A_21_6 + A_21_7);
+                                        const Scalar tmp452 = w20*(A_01_7 - A_10_6);
+                                        const Scalar tmp453 = w21*(A_01_0 - A_10_1);
+                                        const Scalar tmp454 = w12*(A_12_0 + A_12_1 + A_21_0 + A_21_1);
+                                        const Scalar tmp455 = w29*(A_00_0 + A_00_1);
+                                        const Scalar tmp456 = w28*(A_00_6 + A_00_7);
+                                        const Scalar tmp457 = w17*(-A_02_0 + A_20_1);
+                                        const Scalar tmp458 = w21*(-A_01_7 - A_10_4);
+                                        const Scalar tmp459 = w20*(-A_01_0 - A_10_3);
+                                        const Scalar tmp460 = w12*(A_12_4 + A_12_5 - A_21_6 - A_21_7);
+                                        const Scalar tmp461 = w6*(A_12_2 + A_12_3 - A_21_0 - A_21_1);
+                                        const Scalar tmp462 = w18*(A_12_1 + A_21_7);
+                                        const Scalar tmp463 = w4*(-A_12_6 - A_21_0);
+                                        const Scalar tmp464 = w15*(A_02_1 + A_02_3 - A_20_5 - A_20_7);
+                                        const Scalar tmp465 = w5*(A_02_4 + A_02_6 - A_20_0 - A_20_2);
+                                        const Scalar tmp466 = w2*(-A_02_6 + A_20_7);
+                                        const Scalar tmp467 = w20*(-A_01_6 + A_10_7);
+                                        const Scalar tmp468 = w21*(-A_01_1 + A_10_0);
+                                        const Scalar tmp469 = w17*(A_02_1 - A_20_0);
+                                        const Scalar tmp470 = w6*(-A_12_2 - A_12_3 - A_21_4 - A_21_5);
+                                        const Scalar tmp471 = w1*(-A_01_1 - A_01_5 - A_10_2 - A_10_6);
+                                        const Scalar tmp472 = w15*(-A_02_4 - A_02_6 - A_20_1 - A_20_3);
+                                        const Scalar tmp473 = w16*(-A_01_2 - A_01_6 - A_10_1 - A_10_5);
+                                        const Scalar tmp474 = w12*(-A_12_4 - A_12_5 - A_21_2 - A_21_3);
+                                        const Scalar tmp475 = w5*(-A_02_1 - A_02_3 - A_20_4 - A_20_6);
+                                        const Scalar tmp476 = w18*(-A_12_6 + A_21_4);
+                                        const Scalar tmp477 = w20*(A_01_3 - A_10_1);
+                                        const Scalar tmp478 = w10*(A_12_0 + A_12_7 - A_21_2 - A_21_5);
+                                        const Scalar tmp479 = w4*(A_12_1 - A_21_3);
+                                        const Scalar tmp480 = w21*(A_01_4 - A_10_6);
+                                        const Scalar tmp481 = w8*(-A_01_0 - A_01_7 + A_10_2 + A_10_5);
+                                        const Scalar tmp482 = w6*(A_12_0 + A_12_1 + A_21_6 + A_21_7);
+                                        const Scalar tmp483 = w12*(A_12_6 + A_12_7 + A_21_0 + A_21_1);
+                                        const Scalar tmp484 = w15*(A_02_5 + A_02_7 + A_20_0 + A_20_2);
+                                        const Scalar tmp485 = w5*(A_02_0 + A_02_2 + A_20_5 + A_20_7);
+                                        const Scalar tmp486 = w18*(-A_12_1 + A_21_3);
+                                        const Scalar tmp487 = w20*(A_01_4 - A_10_6);
+                                        const Scalar tmp488 = w4*(A_12_6 - A_21_4);
+                                        const Scalar tmp489 = w21*(A_01_3 - A_10_1);
+                                        const Scalar tmp490 = w20*(A_01_7 - A_10_5);
+                                        const Scalar tmp491 = w18*(A_12_2 - A_21_0);
+                                        const Scalar tmp492 = w4*(-A_12_5 + A_21_7);
+                                        const Scalar tmp493 = w21*(A_01_0 - A_10_2);
+                                        const Scalar tmp494 = w20*(A_01_1 + A_10_2);
+                                        const Scalar tmp495 = w21*(A_01_6 + A_10_5);
+                                        const Scalar tmp496 = w18*(-A_12_2 - A_21_4);
+                                        const Scalar tmp497 = w4*(A_12_5 + A_21_3);
+                                        const Scalar tmp498 = w15*(-A_02_0 - A_02_2 + A_20_4 + A_20_6);
+                                        const Scalar tmp499 = w5*(-A_02_5 - A_02_7 + A_20_1 + A_20_3);
+                                        const Scalar tmp500 = w18*(-A_12_6 + A_21_2);
+                                        const Scalar tmp501 = w4*(A_12_1 - A_21_5);
+                                        const Scalar tmp502 = w17*(A_02_6 - A_20_2);
+                                        const Scalar tmp503 = w2*(-A_02_1 + A_20_5);
+                                        const Scalar tmp504 = w18*(-A_12_3 - A_21_5);
+                                        const Scalar tmp505 = w4*(A_12_4 + A_21_2);
+                                        const Scalar tmp506 = w2*(A_02_6 + A_20_3);
+                                        const Scalar tmp507 = w17*(-A_02_1 - A_20_4);
+                                        const Scalar tmp508 = w18*(A_12_0 + A_21_6);
+                                        const Scalar tmp509 = w4*(-A_12_7 - A_21_1);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=tmp198 + tmp200 + tmp214 + tmp259 + tmp262 + tmp289 + tmp294 + tmp299 + tmp303 + tmp304 + tmp307 + tmp309 + tmp343 + tmp347 + tmp362 + tmp363 + tmp379 + tmp380 + tmp381 + tmp382 + tmp383 + tmp384 + tmp385 + tmp386;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=tmp161 + tmp201 + tmp247 + tmp250 + tmp371 + tmp374 + tmp44 + tmp451 + tmp454 + tmp455 + tmp456 + tmp466 + tmp467 + tmp468 + tmp469 + tmp49 + tmp89 + tmp91 + tmp92 + tmp98;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=tmp135 + tmp236 + tmp238 + tmp240 + tmp242 + tmp244 + tmp39 + tmp41 + tmp432 + tmp436 + tmp440 + tmp441 + tmp490 + tmp491 + tmp492 + tmp493 + tmp61 + tmp68 + tmp70 + tmp71;
@@ -4370,85 +4395,85 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double Aw00 = 8*A_p[INDEX4(k,0,m,0, numEq,3, numComp)]*w27;
-                                        const double Aw01 = 12*A_p[INDEX4(k,0,m,1, numEq,3, numComp)]*w8;
-                                        const double Aw02 = 12*A_p[INDEX4(k,0,m,2, numEq,3, numComp)]*w11;
-                                        const double Aw10 = 12*A_p[INDEX4(k,1,m,0, numEq,3, numComp)]*w8;
-                                        const double Aw11 = 8*A_p[INDEX4(k,1,m,1, numEq,3, numComp)]*w22;
-                                        const double Aw12 = 12*A_p[INDEX4(k,1,m,2, numEq,3, numComp)]*w10;
-                                        const double Aw20 = 12*A_p[INDEX4(k,2,m,0, numEq,3, numComp)]*w11;
-                                        const double Aw21 = 12*A_p[INDEX4(k,2,m,1, numEq,3, numComp)]*w10;
-                                        const double Aw22 = 8*A_p[INDEX4(k,2,m,2, numEq,3, numComp)]*w13;
-                                        const double tmp0 = Aw01 + Aw10;
-                                        const double tmp1 = Aw01 - Aw10;
-                                        const double tmp2 = Aw02 + Aw20;
-                                        const double tmp3 = Aw02 - Aw20;
-                                        const double tmp4 = Aw12 + Aw21;
-                                        const double tmp5 = Aw12 - Aw21;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp2 - 2*tmp4;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - tmp4 + 2*tmp1 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 - 2*tmp1 + tmp2 - 2*tmp5;
-                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 - 2*tmp0 + tmp3 - tmp5;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 - 2*tmp3 + 2*tmp5 + tmp0;
-                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - 2*tmp2 + tmp1 + tmp5;
-                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + 2*tmp4 - tmp1 - tmp3;
-                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp4 - tmp0 - tmp2;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - 2*tmp3 - 2*tmp1 - tmp4;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 - 2*tmp2 - 2*tmp4 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + 2*tmp0 - tmp5 - tmp3;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 - tmp2 - 2*tmp5 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp2 - tmp1 + tmp5;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp5 - tmp0 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp4 + tmp2 + tmp0;
-                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp3 + tmp1 + 2*tmp4;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp5 + tmp2 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp3 + 2*tmp0 + tmp5;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp4 + 2*tmp2 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp4 - 2*tmp1 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp1 - 2*tmp4 - tmp3;
-                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 - tmp4 + tmp0 - tmp2;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 - 2*tmp3 - tmp0 - 2*tmp5;
-                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - tmp5 - 2*tmp2 - tmp1;
-                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 - tmp3 + tmp5 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp5 - 2*tmp1 - tmp2;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - 2*tmp3 + tmp4 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 - 2*tmp2 + 2*tmp4;
-                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 - tmp0 + tmp2 - tmp4;
-                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp3 - tmp1 - 2*tmp4;
-                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - tmp5 + tmp1 + 2*tmp2;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + tmp0 - 2*tmp5 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + tmp0 - 2*tmp5 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - tmp5 + tmp1 + 2*tmp2;
-                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp3 - tmp1 - 2*tmp4;
-                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 - tmp0 + tmp2 - tmp4;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 - 2*tmp2 + 2*tmp4;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - 2*tmp3 + tmp4 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp5 - 2*tmp1 - tmp2;
-                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 - tmp3 + tmp5 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - tmp5 - 2*tmp2 - tmp1;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 - 2*tmp3 - tmp0 - 2*tmp5;
-                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 - tmp4 + tmp0 - tmp2;
-                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp1 - 2*tmp4 - tmp3;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 + tmp4 - 2*tmp1 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp4 + 2*tmp2 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp3 + 2*tmp0 + tmp5;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 + 2*tmp5 + tmp2 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + tmp3 + tmp1 + 2*tmp4;
-                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp4 + tmp2 + tmp0;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 + 2*tmp5 - tmp0 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 + 2*tmp2 - tmp1 + tmp5;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 - tmp2 - 2*tmp5 + 2*tmp1;
-                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + 2*tmp0 - tmp5 - tmp3;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 - 2*tmp2 - 2*tmp4 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - 2*tmp3 - 2*tmp1 - tmp4;
-                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=   Aw00 +   Aw11 +   Aw22 + tmp4 - tmp0 - tmp2;
-                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=  -Aw00 + 2*Aw11 + 2*Aw22 + 2*tmp4 - tmp1 - tmp3;
-                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+= 2*Aw00 -   Aw11 + 2*Aw22 - 2*tmp2 + tmp1 + tmp5;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2*Aw00 - 2*Aw11 + 4*Aw22 - 2*tmp3 + 2*tmp5 + tmp0;
-                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+= 2*Aw00 + 2*Aw11 -   Aw22 + tmp3 - tmp5 - 2*tmp0;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2*Aw00 + 4*Aw11 - 2*Aw22 - 2*tmp1 + tmp2 - 2*tmp5;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+= 4*Aw00 - 2*Aw11 - 2*Aw22 - tmp4 + 2*tmp1 + 2*tmp3;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4*Aw00 - 4*Aw11 - 4*Aw22 + 2*tmp0 + 2*tmp2 - 2*tmp4;
+                                        const Scalar Aw00 =  8.*A_p[INDEX4(k,0,m,0, numEq,3, numComp)]*w27;
+                                        const Scalar Aw01 = 12.*A_p[INDEX4(k,0,m,1, numEq,3, numComp)]*w8;
+                                        const Scalar Aw02 = 12.*A_p[INDEX4(k,0,m,2, numEq,3, numComp)]*w11;
+                                        const Scalar Aw10 = 12.*A_p[INDEX4(k,1,m,0, numEq,3, numComp)]*w8;
+                                        const Scalar Aw11 =  8.*A_p[INDEX4(k,1,m,1, numEq,3, numComp)]*w22;
+                                        const Scalar Aw12 = 12.*A_p[INDEX4(k,1,m,2, numEq,3, numComp)]*w10;
+                                        const Scalar Aw20 = 12.*A_p[INDEX4(k,2,m,0, numEq,3, numComp)]*w11;
+                                        const Scalar Aw21 = 12.*A_p[INDEX4(k,2,m,1, numEq,3, numComp)]*w10;
+                                        const Scalar Aw22 =  8.*A_p[INDEX4(k,2,m,2, numEq,3, numComp)]*w13;
+                                        const Scalar tmp0 = Aw01 + Aw10;
+                                        const Scalar tmp1 = Aw01 - Aw10;
+                                        const Scalar tmp2 = Aw02 + Aw20;
+                                        const Scalar tmp3 = Aw02 - Aw20;
+                                        const Scalar tmp4 = Aw12 + Aw21;
+                                        const Scalar tmp5 = Aw12 - Aw21;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp2 - 2.*tmp4;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - tmp4 + 2.*tmp1 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 - 2.*tmp1 + tmp2 - 2.*tmp5;
+                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 - 2.*tmp0 + tmp3 - tmp5;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 - 2.*tmp3 + 2.*tmp5 + tmp0;
+                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - 2.*tmp2 + tmp1 + tmp5;
+                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + 2.*tmp4 - tmp1 - tmp3;
+                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp4 - tmp0 - tmp2;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - 2.*tmp3 - 2.*tmp1 - tmp4;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 - 2.*tmp2 - 2.*tmp4 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + 2.*tmp0 - tmp5 - tmp3;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 - tmp2 - 2.*tmp5 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp2 - tmp1 + tmp5;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp5 - tmp0 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp4 + tmp2 + tmp0;
+                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp3 + tmp1 + 2.*tmp4;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp5 + tmp2 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp3 + 2.*tmp0 + tmp5;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp4 + 2.*tmp2 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp4 - 2.*tmp1 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp1 - 2.*tmp4 - tmp3;
+                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 - tmp4 + tmp0 - tmp2;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 - 2.*tmp3 - tmp0 - 2.*tmp5;
+                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - tmp5 - 2.*tmp2 - tmp1;
+                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 - tmp3 + tmp5 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp5 - 2.*tmp1 - tmp2;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - 2.*tmp3 + tmp4 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 - 2.*tmp2 + 2.*tmp4;
+                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 - tmp0 + tmp2 - tmp4;
+                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp3 - tmp1 - 2.*tmp4;
+                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - tmp5 + tmp1 + 2.*tmp2;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + tmp0 - 2.*tmp5 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + tmp0 - 2.*tmp5 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - tmp5 + tmp1 + 2.*tmp2;
+                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp3 - tmp1 - 2.*tmp4;
+                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 - tmp0 + tmp2 - tmp4;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 - 2.*tmp2 + 2.*tmp4;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - 2.*tmp3 + tmp4 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp5 - 2.*tmp1 - tmp2;
+                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 - tmp3 + tmp5 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - tmp5 - 2.*tmp2 - tmp1;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 - 2.*tmp3 - tmp0 - 2.*tmp5;
+                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 - tmp4 + tmp0 - tmp2;
+                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp1 - 2.*tmp4 - tmp3;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 + tmp4 - 2.*tmp1 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp4 + 2.*tmp2 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp3 + 2.*tmp0 + tmp5;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 + 2.*tmp5 + tmp2 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + tmp3 + tmp1 + 2.*tmp4;
+                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp4 + tmp2 + tmp0;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 + 2.*tmp5 - tmp0 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 + 2.*tmp2 - tmp1 + tmp5;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 - tmp2 - 2.*tmp5 + 2.*tmp1;
+                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + 2.*tmp0 - tmp5 - tmp3;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 - 2.*tmp2 - 2.*tmp4 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - 2.*tmp3 - 2.*tmp1 - tmp4;
+                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=    Aw00 +    Aw11 +    Aw22 + tmp4 - tmp0 - tmp2;
+                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=   -Aw00 + 2.*Aw11 + 2.*Aw22 + 2.*tmp4 - tmp1 - tmp3;
+                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+= 2.*Aw00 -    Aw11 + 2.*Aw22 - 2.*tmp2 + tmp1 + tmp5;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2.*Aw00 - 2.*Aw11 + 4.*Aw22 - 2.*tmp3 + 2.*tmp5 + tmp0;
+                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+= 2.*Aw00 + 2.*Aw11 -    Aw22 + tmp3 - tmp5 - 2.*tmp0;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2.*Aw00 + 4.*Aw11 - 2.*Aw22 - 2.*tmp1 + tmp2 - 2.*tmp5;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+= 4.*Aw00 - 2.*Aw11 - 2.*Aw22 - tmp4 + 2.*tmp1 + 2.*tmp3;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4.*Aw00 - 4.*Aw11 - 4.*Aw22 + 2.*tmp0 + 2.*tmp2 - 2.*tmp4;
                                     }
                                 }
                             }
@@ -4457,298 +4482,298 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double* B_p=B.getSampleDataRO(e);
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
                             if (B.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,3,numComp)];
-                                        const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,3,numComp)];
-                                        const double B_2_0 = B_p[INDEX4(k,2,m,0, numEq,3,numComp)];
-                                        const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,3,numComp)];
-                                        const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,3,numComp)];
-                                        const double B_2_1 = B_p[INDEX4(k,2,m,1, numEq,3,numComp)];
-                                        const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,3,numComp)];
-                                        const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,3,numComp)];
-                                        const double B_2_2 = B_p[INDEX4(k,2,m,2, numEq,3,numComp)];
-                                        const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,3,numComp)];
-                                        const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,3,numComp)];
-                                        const double B_2_3 = B_p[INDEX4(k,2,m,3, numEq,3,numComp)];
-                                        const double B_0_4 = B_p[INDEX4(k,0,m,4, numEq,3,numComp)];
-                                        const double B_1_4 = B_p[INDEX4(k,1,m,4, numEq,3,numComp)];
-                                        const double B_2_4 = B_p[INDEX4(k,2,m,4, numEq,3,numComp)];
-                                        const double B_0_5 = B_p[INDEX4(k,0,m,5, numEq,3,numComp)];
-                                        const double B_1_5 = B_p[INDEX4(k,1,m,5, numEq,3,numComp)];
-                                        const double B_2_5 = B_p[INDEX4(k,2,m,5, numEq,3,numComp)];
-                                        const double B_0_6 = B_p[INDEX4(k,0,m,6, numEq,3,numComp)];
-                                        const double B_1_6 = B_p[INDEX4(k,1,m,6, numEq,3,numComp)];
-                                        const double B_2_6 = B_p[INDEX4(k,2,m,6, numEq,3,numComp)];
-                                        const double B_0_7 = B_p[INDEX4(k,0,m,7, numEq,3,numComp)];
-                                        const double B_1_7 = B_p[INDEX4(k,1,m,7, numEq,3,numComp)];
-                                        const double B_2_7 = B_p[INDEX4(k,2,m,7, numEq,3,numComp)];
-                                        const double tmp0 = w38*(B_2_1 + B_2_2);
-                                        const double tmp1 = w42*(B_1_3 + B_1_7);
-                                        const double tmp2 = w41*(B_0_3 + B_0_7);
-                                        const double tmp3 = w37*(B_1_1 + B_1_5);
-                                        const double tmp4 = w39*(B_0_2 + B_0_6);
-                                        const double tmp5 = w45*(B_2_5 + B_2_6);
-                                        const double tmp6 = w36*(B_0_1 + B_0_5);
-                                        const double tmp7 = w40*(B_1_2 + B_1_6);
-                                        const double tmp8 = w33*(B_0_0 + B_0_4);
-                                        const double tmp9 = w34*(B_1_0 + B_1_4);
-                                        const double tmp10 = w38*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
-                                        const double tmp11 = w42*(-B_1_6 - B_1_7);
-                                        const double tmp12 = w41*(-B_0_5 - B_0_7);
-                                        const double tmp13 = w37*(-B_1_4 - B_1_5);
-                                        const double tmp14 = w39*(-B_0_4 - B_0_6);
-                                        const double tmp15 = w45*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
-                                        const double tmp16 = w36*(-B_0_1 - B_0_3);
-                                        const double tmp17 = w40*(-B_1_2 - B_1_3);
-                                        const double tmp18 = w33*(-B_0_0 - B_0_2);
-                                        const double tmp19 = w34*(-B_1_0 - B_1_1);
-                                        const double tmp20 = w38*(-B_2_5 - B_2_7);
-                                        const double tmp21 = w35*(-B_2_4 - B_2_6);
-                                        const double tmp22 = w41*(B_0_1 + B_0_3);
-                                        const double tmp23 = w37*(-B_1_2 - B_1_7);
-                                        const double tmp24 = w39*(B_0_0 + B_0_2);
-                                        const double tmp25 = w45*(-B_2_0 - B_2_2);
-                                        const double tmp26 = w36*(B_0_5 + B_0_7);
-                                        const double tmp27 = w40*(-B_1_0 - B_1_5);
-                                        const double tmp28 = w33*(B_0_4 + B_0_6);
-                                        const double tmp29 = w46*(-B_2_1 - B_2_3);
-                                        const double tmp30 = w38*(B_2_0 + B_2_2);
-                                        const double tmp31 = w35*(B_2_1 + B_2_3);
-                                        const double tmp32 = w41*(-B_0_4 - B_0_6);
-                                        const double tmp33 = w37*(B_1_0 + B_1_5);
-                                        const double tmp34 = w39*(-B_0_5 - B_0_7);
-                                        const double tmp35 = w45*(B_2_5 + B_2_7);
-                                        const double tmp36 = w36*(-B_0_0 - B_0_2);
-                                        const double tmp37 = w40*(B_1_2 + B_1_7);
-                                        const double tmp38 = w33*(-B_0_1 - B_0_3);
-                                        const double tmp39 = w46*(B_2_4 + B_2_6);
-                                        const double tmp40 = w38*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
-                                        const double tmp41 = w42*(B_1_0 + B_1_1);
-                                        const double tmp42 = w41*(B_0_0 + B_0_2);
-                                        const double tmp43 = w37*(B_1_2 + B_1_3);
-                                        const double tmp44 = w39*(B_0_1 + B_0_3);
-                                        const double tmp45 = w45*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
-                                        const double tmp46 = w36*(B_0_4 + B_0_6);
-                                        const double tmp47 = w40*(B_1_4 + B_1_5);
-                                        const double tmp48 = w33*(B_0_5 + B_0_7);
-                                        const double tmp49 = w34*(B_1_6 + B_1_7);
-                                        const double tmp50 = w38*(B_2_0 + B_2_1);
-                                        const double tmp51 = w42*(-B_1_4 - B_1_5);
-                                        const double tmp52 = w35*(B_2_2 + B_2_3);
-                                        const double tmp53 = w37*(-B_1_6 - B_1_7);
-                                        const double tmp54 = w39*(B_0_0 + B_0_6);
-                                        const double tmp55 = w45*(B_2_6 + B_2_7);
-                                        const double tmp56 = w36*(B_0_1 + B_0_7);
-                                        const double tmp57 = w40*(-B_1_0 - B_1_1);
-                                        const double tmp58 = w46*(B_2_4 + B_2_5);
-                                        const double tmp59 = w34*(-B_1_2 - B_1_3);
-                                        const double tmp60 = w38*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
-                                        const double tmp61 = w37*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
-                                        const double tmp62 = w39*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
-                                        const double tmp63 = w45*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
-                                        const double tmp64 = w36*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
-                                        const double tmp65 = w40*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
-                                        const double tmp66 = w41*(B_0_4 + B_0_6);
-                                        const double tmp67 = w39*(B_0_5 + B_0_7);
-                                        const double tmp68 = w36*(B_0_0 + B_0_2);
-                                        const double tmp69 = w33*(B_0_1 + B_0_3);
-                                        const double tmp70 = w38*(-B_2_4 - B_2_7);
-                                        const double tmp71 = w42*(B_1_2 + B_1_6);
-                                        const double tmp72 = w41*(-B_0_2 - B_0_6);
-                                        const double tmp73 = w37*(B_1_0 + B_1_4);
-                                        const double tmp74 = w39*(-B_0_3 - B_0_7);
-                                        const double tmp75 = w45*(-B_2_0 - B_2_3);
-                                        const double tmp76 = w36*(-B_0_0 - B_0_4);
-                                        const double tmp77 = w40*(B_1_3 + B_1_7);
-                                        const double tmp78 = w33*(-B_0_1 - B_0_5);
-                                        const double tmp79 = w34*(B_1_1 + B_1_5);
-                                        const double tmp80 = w39*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
-                                        const double tmp81 = w36*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
-                                        const double tmp82 = w38*(B_2_0 + B_2_3);
-                                        const double tmp83 = w42*(-B_1_1 - B_1_5);
-                                        const double tmp84 = w41*(B_0_1 + B_0_5);
-                                        const double tmp85 = w37*(-B_1_3 - B_1_7);
-                                        const double tmp86 = w39*(B_0_0 + B_0_4);
-                                        const double tmp87 = w45*(B_2_4 + B_2_7);
-                                        const double tmp88 = w36*(B_0_3 + B_0_7);
-                                        const double tmp89 = w40*(-B_1_0 - B_1_4);
-                                        const double tmp90 = w33*(B_0_2 + B_0_6);
-                                        const double tmp91 = w34*(-B_1_2 - B_1_6);
-                                        const double tmp92 = w38*(-B_2_5 - B_2_6);
-                                        const double tmp93 = w45*(-B_2_1 - B_2_2);
-                                        const double tmp94 = w37*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
-                                        const double tmp95 = w40*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
-                                        const double tmp96 = w42*(-B_1_2 - B_1_3);
-                                        const double tmp97 = w41*(-B_0_1 - B_0_3);
-                                        const double tmp98 = w37*(-B_1_0 - B_1_1);
-                                        const double tmp99 = w39*(-B_0_0 - B_0_2);
-                                        const double tmp100 = w36*(-B_0_5 - B_0_7);
-                                        const double tmp101 = w40*(-B_1_6 - B_1_7);
-                                        const double tmp102 = w33*(-B_0_4 - B_0_6);
-                                        const double tmp103 = w34*(-B_1_4 - B_1_5);
-                                        const double tmp104 = w38*(B_2_6 + B_2_7);
-                                        const double tmp105 = w35*(B_2_4 + B_2_5);
-                                        const double tmp106 = w41*(B_0_2 + B_0_6);
-                                        const double tmp107 = w37*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
-                                        const double tmp108 = w39*(B_0_3 + B_0_7);
-                                        const double tmp109 = w45*(B_2_0 + B_2_1);
-                                        const double tmp110 = w36*(B_0_0 + B_0_4);
-                                        const double tmp111 = w40*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
-                                        const double tmp112 = w33*(B_0_1 + B_0_5);
-                                        const double tmp113 = w46*(B_2_2 + B_2_3);
-                                        const double tmp114 = w42*(-B_1_0 - B_1_4);
-                                        const double tmp115 = w41*(-B_0_0 - B_0_4);
-                                        const double tmp116 = w37*(-B_1_2 - B_1_6);
-                                        const double tmp117 = w39*(-B_0_1 - B_0_5);
-                                        const double tmp118 = w36*(-B_0_2 - B_0_6);
-                                        const double tmp119 = w40*(-B_1_1 - B_1_5);
-                                        const double tmp120 = w33*(-B_0_3 - B_0_7);
-                                        const double tmp121 = w34*(-B_1_3 - B_1_7);
-                                        const double tmp122 = w38*(B_2_2 + B_2_3);
-                                        const double tmp123 = w42*(B_1_6 + B_1_7);
-                                        const double tmp124 = w35*(B_2_0 + B_2_1);
-                                        const double tmp125 = w37*(B_1_4 + B_1_5);
-                                        const double tmp126 = w39*(-B_0_3 - B_0_5);
-                                        const double tmp127 = w45*(B_2_4 + B_2_5);
-                                        const double tmp128 = w36*(-B_0_2 - B_0_4);
-                                        const double tmp129 = w40*(B_1_2 + B_1_3);
-                                        const double tmp130 = w46*(B_2_6 + B_2_7);
-                                        const double tmp131 = w34*(B_1_0 + B_1_1);
-                                        const double tmp132 = w38*(-B_2_1 - B_2_2);
-                                        const double tmp133 = w37*(B_1_2 + B_1_7);
-                                        const double tmp134 = w39*(B_0_1 + B_0_7);
-                                        const double tmp135 = w36*(B_0_0 + B_0_6);
-                                        const double tmp136 = w40*(B_1_0 + B_1_5);
-                                        const double tmp137 = w45*(-B_2_5 - B_2_6);
-                                        const double tmp138 = w38*(-B_2_4 - B_2_6);
-                                        const double tmp139 = w35*(-B_2_5 - B_2_7);
-                                        const double tmp140 = w41*(-B_0_0 - B_0_2);
-                                        const double tmp141 = w37*(B_1_1 + B_1_4);
-                                        const double tmp142 = w39*(-B_0_1 - B_0_3);
-                                        const double tmp143 = w45*(-B_2_1 - B_2_3);
-                                        const double tmp144 = w36*(-B_0_4 - B_0_6);
-                                        const double tmp145 = w40*(B_1_3 + B_1_6);
-                                        const double tmp146 = w33*(-B_0_5 - B_0_7);
-                                        const double tmp147 = w46*(-B_2_0 - B_2_2);
-                                        const double tmp148 = w39*(B_0_2 + B_0_4);
-                                        const double tmp149 = w36*(B_0_3 + B_0_5);
-                                        const double tmp150 = w38*(B_2_5 + B_2_6);
-                                        const double tmp151 = w37*(-B_1_0 - B_1_5);
-                                        const double tmp152 = w39*(-B_0_0 - B_0_6);
-                                        const double tmp153 = w45*(B_2_1 + B_2_2);
-                                        const double tmp154 = w36*(-B_0_1 - B_0_7);
-                                        const double tmp155 = w40*(-B_1_2 - B_1_7);
-                                        const double tmp156 = w41*(-B_0_3 - B_0_7);
-                                        const double tmp157 = w39*(-B_0_2 - B_0_6);
-                                        const double tmp158 = w36*(-B_0_1 - B_0_5);
-                                        const double tmp159 = w33*(-B_0_0 - B_0_4);
-                                        const double tmp160 = w38*(-B_2_2 - B_2_3);
-                                        const double tmp161 = w35*(-B_2_0 - B_2_1);
-                                        const double tmp162 = w45*(-B_2_4 - B_2_5);
-                                        const double tmp163 = w46*(-B_2_6 - B_2_7);
-                                        const double tmp164 = w38*(-B_2_0 - B_2_3);
-                                        const double tmp165 = w37*(B_1_3 + B_1_6);
-                                        const double tmp166 = w40*(B_1_1 + B_1_4);
-                                        const double tmp167 = w45*(-B_2_4 - B_2_7);
-                                        const double tmp168 = w39*(B_0_3 + B_0_5);
-                                        const double tmp169 = w36*(B_0_2 + B_0_4);
-                                        const double tmp170 = w38*(B_2_1 + B_2_3);
-                                        const double tmp171 = w35*(B_2_0 + B_2_2);
-                                        const double tmp172 = w41*(B_0_5 + B_0_7);
-                                        const double tmp173 = w37*(-B_1_3 - B_1_6);
-                                        const double tmp174 = w39*(B_0_4 + B_0_6);
-                                        const double tmp175 = w45*(B_2_4 + B_2_6);
-                                        const double tmp176 = w36*(B_0_1 + B_0_3);
-                                        const double tmp177 = w40*(-B_1_1 - B_1_4);
-                                        const double tmp178 = w33*(B_0_0 + B_0_2);
-                                        const double tmp179 = w46*(B_2_5 + B_2_7);
-                                        const double tmp180 = w38*(B_2_5 + B_2_7);
-                                        const double tmp181 = w42*(-B_1_3 - B_1_7);
-                                        const double tmp182 = w35*(B_2_4 + B_2_6);
-                                        const double tmp183 = w37*(-B_1_1 - B_1_5);
-                                        const double tmp184 = w39*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
-                                        const double tmp185 = w45*(B_2_0 + B_2_2);
-                                        const double tmp186 = w36*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
-                                        const double tmp187 = w40*(-B_1_2 - B_1_6);
-                                        const double tmp188 = w46*(B_2_1 + B_2_3);
-                                        const double tmp189 = w34*(-B_1_0 - B_1_4);
-                                        const double tmp190 = w38*(B_2_4 + B_2_5);
-                                        const double tmp191 = w35*(B_2_6 + B_2_7);
-                                        const double tmp192 = w41*(-B_0_1 - B_0_5);
-                                        const double tmp193 = w37*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
-                                        const double tmp194 = w39*(-B_0_0 - B_0_4);
-                                        const double tmp195 = w45*(B_2_2 + B_2_3);
-                                        const double tmp196 = w36*(-B_0_3 - B_0_7);
-                                        const double tmp197 = w40*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
-                                        const double tmp198 = w33*(-B_0_2 - B_0_6);
-                                        const double tmp199 = w46*(B_2_0 + B_2_1);
-                                        const double tmp200 = w38*(-B_2_6 - B_2_7);
-                                        const double tmp201 = w42*(B_1_2 + B_1_3);
-                                        const double tmp202 = w35*(-B_2_4 - B_2_5);
-                                        const double tmp203 = w37*(B_1_0 + B_1_1);
-                                        const double tmp204 = w45*(-B_2_0 - B_2_1);
-                                        const double tmp205 = w40*(B_1_6 + B_1_7);
-                                        const double tmp206 = w46*(-B_2_2 - B_2_3);
-                                        const double tmp207 = w34*(B_1_4 + B_1_5);
-                                        const double tmp208 = w37*(-B_1_1 - B_1_4);
-                                        const double tmp209 = w39*(-B_0_2 - B_0_4);
-                                        const double tmp210 = w36*(-B_0_3 - B_0_5);
-                                        const double tmp211 = w40*(-B_1_3 - B_1_6);
-                                        const double tmp212 = w38*(B_2_4 + B_2_7);
-                                        const double tmp213 = w45*(B_2_0 + B_2_3);
-                                        const double tmp214 = w41*(B_0_0 + B_0_4);
-                                        const double tmp215 = w39*(B_0_1 + B_0_5);
-                                        const double tmp216 = w36*(B_0_2 + B_0_6);
-                                        const double tmp217 = w33*(B_0_3 + B_0_7);
-                                        const double tmp218 = w42*(B_1_1 + B_1_5);
-                                        const double tmp219 = w37*(B_1_3 + B_1_7);
-                                        const double tmp220 = w40*(B_1_0 + B_1_4);
-                                        const double tmp221 = w34*(B_1_2 + B_1_6);
-                                        const double tmp222 = w39*(-B_0_1 - B_0_7);
-                                        const double tmp223 = w36*(-B_0_0 - B_0_6);
-                                        const double tmp224 = w38*(-B_2_0 - B_2_1);
-                                        const double tmp225 = w35*(-B_2_2 - B_2_3);
-                                        const double tmp226 = w45*(-B_2_6 - B_2_7);
-                                        const double tmp227 = w46*(-B_2_4 - B_2_5);
-                                        const double tmp228 = w38*(B_2_4 + B_2_6);
-                                        const double tmp229 = w42*(B_1_0 + B_1_4);
-                                        const double tmp230 = w35*(B_2_5 + B_2_7);
-                                        const double tmp231 = w37*(B_1_2 + B_1_6);
-                                        const double tmp232 = w39*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
-                                        const double tmp233 = w45*(B_2_1 + B_2_3);
-                                        const double tmp234 = w36*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
-                                        const double tmp235 = w40*(B_1_1 + B_1_5);
-                                        const double tmp236 = w46*(B_2_0 + B_2_2);
-                                        const double tmp237 = w34*(B_1_3 + B_1_7);
-                                        const double tmp238 = w42*(-B_1_2 - B_1_6);
-                                        const double tmp239 = w37*(-B_1_0 - B_1_4);
-                                        const double tmp240 = w40*(-B_1_3 - B_1_7);
-                                        const double tmp241 = w34*(-B_1_1 - B_1_5);
-                                        const double tmp242 = w38*(-B_2_4 - B_2_5);
-                                        const double tmp243 = w42*(-B_1_0 - B_1_1);
-                                        const double tmp244 = w35*(-B_2_6 - B_2_7);
-                                        const double tmp245 = w37*(-B_1_2 - B_1_3);
-                                        const double tmp246 = w45*(-B_2_2 - B_2_3);
-                                        const double tmp247 = w40*(-B_1_4 - B_1_5);
-                                        const double tmp248 = w46*(-B_2_0 - B_2_1);
-                                        const double tmp249 = w34*(-B_1_6 - B_1_7);
-                                        const double tmp250 = w42*(B_1_4 + B_1_5);
-                                        const double tmp251 = w37*(B_1_6 + B_1_7);
-                                        const double tmp252 = w40*(B_1_0 + B_1_1);
-                                        const double tmp253 = w34*(B_1_2 + B_1_3);
-                                        const double tmp254 = w38*(-B_2_1 - B_2_3);
-                                        const double tmp255 = w35*(-B_2_0 - B_2_2);
-                                        const double tmp256 = w45*(-B_2_4 - B_2_6);
-                                        const double tmp257 = w46*(-B_2_5 - B_2_7);
-                                        const double tmp258 = w38*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
-                                        const double tmp259 = w45*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
-                                        const double tmp260 = w38*(-B_2_0 - B_2_2);
-                                        const double tmp261 = w35*(-B_2_1 - B_2_3);
-                                        const double tmp262 = w45*(-B_2_5 - B_2_7);
-                                        const double tmp263 = w46*(-B_2_4 - B_2_6);
+                                        const Scalar B_0_0 = B_p[INDEX4(k,0,m,0, numEq,3,numComp)];
+                                        const Scalar B_1_0 = B_p[INDEX4(k,1,m,0, numEq,3,numComp)];
+                                        const Scalar B_2_0 = B_p[INDEX4(k,2,m,0, numEq,3,numComp)];
+                                        const Scalar B_0_1 = B_p[INDEX4(k,0,m,1, numEq,3,numComp)];
+                                        const Scalar B_1_1 = B_p[INDEX4(k,1,m,1, numEq,3,numComp)];
+                                        const Scalar B_2_1 = B_p[INDEX4(k,2,m,1, numEq,3,numComp)];
+                                        const Scalar B_0_2 = B_p[INDEX4(k,0,m,2, numEq,3,numComp)];
+                                        const Scalar B_1_2 = B_p[INDEX4(k,1,m,2, numEq,3,numComp)];
+                                        const Scalar B_2_2 = B_p[INDEX4(k,2,m,2, numEq,3,numComp)];
+                                        const Scalar B_0_3 = B_p[INDEX4(k,0,m,3, numEq,3,numComp)];
+                                        const Scalar B_1_3 = B_p[INDEX4(k,1,m,3, numEq,3,numComp)];
+                                        const Scalar B_2_3 = B_p[INDEX4(k,2,m,3, numEq,3,numComp)];
+                                        const Scalar B_0_4 = B_p[INDEX4(k,0,m,4, numEq,3,numComp)];
+                                        const Scalar B_1_4 = B_p[INDEX4(k,1,m,4, numEq,3,numComp)];
+                                        const Scalar B_2_4 = B_p[INDEX4(k,2,m,4, numEq,3,numComp)];
+                                        const Scalar B_0_5 = B_p[INDEX4(k,0,m,5, numEq,3,numComp)];
+                                        const Scalar B_1_5 = B_p[INDEX4(k,1,m,5, numEq,3,numComp)];
+                                        const Scalar B_2_5 = B_p[INDEX4(k,2,m,5, numEq,3,numComp)];
+                                        const Scalar B_0_6 = B_p[INDEX4(k,0,m,6, numEq,3,numComp)];
+                                        const Scalar B_1_6 = B_p[INDEX4(k,1,m,6, numEq,3,numComp)];
+                                        const Scalar B_2_6 = B_p[INDEX4(k,2,m,6, numEq,3,numComp)];
+                                        const Scalar B_0_7 = B_p[INDEX4(k,0,m,7, numEq,3,numComp)];
+                                        const Scalar B_1_7 = B_p[INDEX4(k,1,m,7, numEq,3,numComp)];
+                                        const Scalar B_2_7 = B_p[INDEX4(k,2,m,7, numEq,3,numComp)];
+                                        const Scalar tmp0 = w38*(B_2_1 + B_2_2);
+                                        const Scalar tmp1 = w42*(B_1_3 + B_1_7);
+                                        const Scalar tmp2 = w41*(B_0_3 + B_0_7);
+                                        const Scalar tmp3 = w37*(B_1_1 + B_1_5);
+                                        const Scalar tmp4 = w39*(B_0_2 + B_0_6);
+                                        const Scalar tmp5 = w45*(B_2_5 + B_2_6);
+                                        const Scalar tmp6 = w36*(B_0_1 + B_0_5);
+                                        const Scalar tmp7 = w40*(B_1_2 + B_1_6);
+                                        const Scalar tmp8 = w33*(B_0_0 + B_0_4);
+                                        const Scalar tmp9 = w34*(B_1_0 + B_1_4);
+                                        const Scalar tmp10 = w38*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
+                                        const Scalar tmp11 = w42*(-B_1_6 - B_1_7);
+                                        const Scalar tmp12 = w41*(-B_0_5 - B_0_7);
+                                        const Scalar tmp13 = w37*(-B_1_4 - B_1_5);
+                                        const Scalar tmp14 = w39*(-B_0_4 - B_0_6);
+                                        const Scalar tmp15 = w45*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
+                                        const Scalar tmp16 = w36*(-B_0_1 - B_0_3);
+                                        const Scalar tmp17 = w40*(-B_1_2 - B_1_3);
+                                        const Scalar tmp18 = w33*(-B_0_0 - B_0_2);
+                                        const Scalar tmp19 = w34*(-B_1_0 - B_1_1);
+                                        const Scalar tmp20 = w38*(-B_2_5 - B_2_7);
+                                        const Scalar tmp21 = w35*(-B_2_4 - B_2_6);
+                                        const Scalar tmp22 = w41*(B_0_1 + B_0_3);
+                                        const Scalar tmp23 = w37*(-B_1_2 - B_1_7);
+                                        const Scalar tmp24 = w39*(B_0_0 + B_0_2);
+                                        const Scalar tmp25 = w45*(-B_2_0 - B_2_2);
+                                        const Scalar tmp26 = w36*(B_0_5 + B_0_7);
+                                        const Scalar tmp27 = w40*(-B_1_0 - B_1_5);
+                                        const Scalar tmp28 = w33*(B_0_4 + B_0_6);
+                                        const Scalar tmp29 = w46*(-B_2_1 - B_2_3);
+                                        const Scalar tmp30 = w38*(B_2_0 + B_2_2);
+                                        const Scalar tmp31 = w35*(B_2_1 + B_2_3);
+                                        const Scalar tmp32 = w41*(-B_0_4 - B_0_6);
+                                        const Scalar tmp33 = w37*(B_1_0 + B_1_5);
+                                        const Scalar tmp34 = w39*(-B_0_5 - B_0_7);
+                                        const Scalar tmp35 = w45*(B_2_5 + B_2_7);
+                                        const Scalar tmp36 = w36*(-B_0_0 - B_0_2);
+                                        const Scalar tmp37 = w40*(B_1_2 + B_1_7);
+                                        const Scalar tmp38 = w33*(-B_0_1 - B_0_3);
+                                        const Scalar tmp39 = w46*(B_2_4 + B_2_6);
+                                        const Scalar tmp40 = w38*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
+                                        const Scalar tmp41 = w42*(B_1_0 + B_1_1);
+                                        const Scalar tmp42 = w41*(B_0_0 + B_0_2);
+                                        const Scalar tmp43 = w37*(B_1_2 + B_1_3);
+                                        const Scalar tmp44 = w39*(B_0_1 + B_0_3);
+                                        const Scalar tmp45 = w45*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
+                                        const Scalar tmp46 = w36*(B_0_4 + B_0_6);
+                                        const Scalar tmp47 = w40*(B_1_4 + B_1_5);
+                                        const Scalar tmp48 = w33*(B_0_5 + B_0_7);
+                                        const Scalar tmp49 = w34*(B_1_6 + B_1_7);
+                                        const Scalar tmp50 = w38*(B_2_0 + B_2_1);
+                                        const Scalar tmp51 = w42*(-B_1_4 - B_1_5);
+                                        const Scalar tmp52 = w35*(B_2_2 + B_2_3);
+                                        const Scalar tmp53 = w37*(-B_1_6 - B_1_7);
+                                        const Scalar tmp54 = w39*(B_0_0 + B_0_6);
+                                        const Scalar tmp55 = w45*(B_2_6 + B_2_7);
+                                        const Scalar tmp56 = w36*(B_0_1 + B_0_7);
+                                        const Scalar tmp57 = w40*(-B_1_0 - B_1_1);
+                                        const Scalar tmp58 = w46*(B_2_4 + B_2_5);
+                                        const Scalar tmp59 = w34*(-B_1_2 - B_1_3);
+                                        const Scalar tmp60 = w38*(-B_2_4 - B_2_5 - B_2_6 - B_2_7);
+                                        const Scalar tmp61 = w37*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
+                                        const Scalar tmp62 = w39*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
+                                        const Scalar tmp63 = w45*(-B_2_0 - B_2_1 - B_2_2 - B_2_3);
+                                        const Scalar tmp64 = w36*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
+                                        const Scalar tmp65 = w40*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
+                                        const Scalar tmp66 = w41*(B_0_4 + B_0_6);
+                                        const Scalar tmp67 = w39*(B_0_5 + B_0_7);
+                                        const Scalar tmp68 = w36*(B_0_0 + B_0_2);
+                                        const Scalar tmp69 = w33*(B_0_1 + B_0_3);
+                                        const Scalar tmp70 = w38*(-B_2_4 - B_2_7);
+                                        const Scalar tmp71 = w42*(B_1_2 + B_1_6);
+                                        const Scalar tmp72 = w41*(-B_0_2 - B_0_6);
+                                        const Scalar tmp73 = w37*(B_1_0 + B_1_4);
+                                        const Scalar tmp74 = w39*(-B_0_3 - B_0_7);
+                                        const Scalar tmp75 = w45*(-B_2_0 - B_2_3);
+                                        const Scalar tmp76 = w36*(-B_0_0 - B_0_4);
+                                        const Scalar tmp77 = w40*(B_1_3 + B_1_7);
+                                        const Scalar tmp78 = w33*(-B_0_1 - B_0_5);
+                                        const Scalar tmp79 = w34*(B_1_1 + B_1_5);
+                                        const Scalar tmp80 = w39*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
+                                        const Scalar tmp81 = w36*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
+                                        const Scalar tmp82 = w38*(B_2_0 + B_2_3);
+                                        const Scalar tmp83 = w42*(-B_1_1 - B_1_5);
+                                        const Scalar tmp84 = w41*(B_0_1 + B_0_5);
+                                        const Scalar tmp85 = w37*(-B_1_3 - B_1_7);
+                                        const Scalar tmp86 = w39*(B_0_0 + B_0_4);
+                                        const Scalar tmp87 = w45*(B_2_4 + B_2_7);
+                                        const Scalar tmp88 = w36*(B_0_3 + B_0_7);
+                                        const Scalar tmp89 = w40*(-B_1_0 - B_1_4);
+                                        const Scalar tmp90 = w33*(B_0_2 + B_0_6);
+                                        const Scalar tmp91 = w34*(-B_1_2 - B_1_6);
+                                        const Scalar tmp92 = w38*(-B_2_5 - B_2_6);
+                                        const Scalar tmp93 = w45*(-B_2_1 - B_2_2);
+                                        const Scalar tmp94 = w37*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
+                                        const Scalar tmp95 = w40*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
+                                        const Scalar tmp96 = w42*(-B_1_2 - B_1_3);
+                                        const Scalar tmp97 = w41*(-B_0_1 - B_0_3);
+                                        const Scalar tmp98 = w37*(-B_1_0 - B_1_1);
+                                        const Scalar tmp99 = w39*(-B_0_0 - B_0_2);
+                                        const Scalar tmp100 = w36*(-B_0_5 - B_0_7);
+                                        const Scalar tmp101 = w40*(-B_1_6 - B_1_7);
+                                        const Scalar tmp102 = w33*(-B_0_4 - B_0_6);
+                                        const Scalar tmp103 = w34*(-B_1_4 - B_1_5);
+                                        const Scalar tmp104 = w38*(B_2_6 + B_2_7);
+                                        const Scalar tmp105 = w35*(B_2_4 + B_2_5);
+                                        const Scalar tmp106 = w41*(B_0_2 + B_0_6);
+                                        const Scalar tmp107 = w37*(B_1_2 + B_1_3 + B_1_6 + B_1_7);
+                                        const Scalar tmp108 = w39*(B_0_3 + B_0_7);
+                                        const Scalar tmp109 = w45*(B_2_0 + B_2_1);
+                                        const Scalar tmp110 = w36*(B_0_0 + B_0_4);
+                                        const Scalar tmp111 = w40*(B_1_0 + B_1_1 + B_1_4 + B_1_5);
+                                        const Scalar tmp112 = w33*(B_0_1 + B_0_5);
+                                        const Scalar tmp113 = w46*(B_2_2 + B_2_3);
+                                        const Scalar tmp114 = w42*(-B_1_0 - B_1_4);
+                                        const Scalar tmp115 = w41*(-B_0_0 - B_0_4);
+                                        const Scalar tmp116 = w37*(-B_1_2 - B_1_6);
+                                        const Scalar tmp117 = w39*(-B_0_1 - B_0_5);
+                                        const Scalar tmp118 = w36*(-B_0_2 - B_0_6);
+                                        const Scalar tmp119 = w40*(-B_1_1 - B_1_5);
+                                        const Scalar tmp120 = w33*(-B_0_3 - B_0_7);
+                                        const Scalar tmp121 = w34*(-B_1_3 - B_1_7);
+                                        const Scalar tmp122 = w38*(B_2_2 + B_2_3);
+                                        const Scalar tmp123 = w42*(B_1_6 + B_1_7);
+                                        const Scalar tmp124 = w35*(B_2_0 + B_2_1);
+                                        const Scalar tmp125 = w37*(B_1_4 + B_1_5);
+                                        const Scalar tmp126 = w39*(-B_0_3 - B_0_5);
+                                        const Scalar tmp127 = w45*(B_2_4 + B_2_5);
+                                        const Scalar tmp128 = w36*(-B_0_2 - B_0_4);
+                                        const Scalar tmp129 = w40*(B_1_2 + B_1_3);
+                                        const Scalar tmp130 = w46*(B_2_6 + B_2_7);
+                                        const Scalar tmp131 = w34*(B_1_0 + B_1_1);
+                                        const Scalar tmp132 = w38*(-B_2_1 - B_2_2);
+                                        const Scalar tmp133 = w37*(B_1_2 + B_1_7);
+                                        const Scalar tmp134 = w39*(B_0_1 + B_0_7);
+                                        const Scalar tmp135 = w36*(B_0_0 + B_0_6);
+                                        const Scalar tmp136 = w40*(B_1_0 + B_1_5);
+                                        const Scalar tmp137 = w45*(-B_2_5 - B_2_6);
+                                        const Scalar tmp138 = w38*(-B_2_4 - B_2_6);
+                                        const Scalar tmp139 = w35*(-B_2_5 - B_2_7);
+                                        const Scalar tmp140 = w41*(-B_0_0 - B_0_2);
+                                        const Scalar tmp141 = w37*(B_1_1 + B_1_4);
+                                        const Scalar tmp142 = w39*(-B_0_1 - B_0_3);
+                                        const Scalar tmp143 = w45*(-B_2_1 - B_2_3);
+                                        const Scalar tmp144 = w36*(-B_0_4 - B_0_6);
+                                        const Scalar tmp145 = w40*(B_1_3 + B_1_6);
+                                        const Scalar tmp146 = w33*(-B_0_5 - B_0_7);
+                                        const Scalar tmp147 = w46*(-B_2_0 - B_2_2);
+                                        const Scalar tmp148 = w39*(B_0_2 + B_0_4);
+                                        const Scalar tmp149 = w36*(B_0_3 + B_0_5);
+                                        const Scalar tmp150 = w38*(B_2_5 + B_2_6);
+                                        const Scalar tmp151 = w37*(-B_1_0 - B_1_5);
+                                        const Scalar tmp152 = w39*(-B_0_0 - B_0_6);
+                                        const Scalar tmp153 = w45*(B_2_1 + B_2_2);
+                                        const Scalar tmp154 = w36*(-B_0_1 - B_0_7);
+                                        const Scalar tmp155 = w40*(-B_1_2 - B_1_7);
+                                        const Scalar tmp156 = w41*(-B_0_3 - B_0_7);
+                                        const Scalar tmp157 = w39*(-B_0_2 - B_0_6);
+                                        const Scalar tmp158 = w36*(-B_0_1 - B_0_5);
+                                        const Scalar tmp159 = w33*(-B_0_0 - B_0_4);
+                                        const Scalar tmp160 = w38*(-B_2_2 - B_2_3);
+                                        const Scalar tmp161 = w35*(-B_2_0 - B_2_1);
+                                        const Scalar tmp162 = w45*(-B_2_4 - B_2_5);
+                                        const Scalar tmp163 = w46*(-B_2_6 - B_2_7);
+                                        const Scalar tmp164 = w38*(-B_2_0 - B_2_3);
+                                        const Scalar tmp165 = w37*(B_1_3 + B_1_6);
+                                        const Scalar tmp166 = w40*(B_1_1 + B_1_4);
+                                        const Scalar tmp167 = w45*(-B_2_4 - B_2_7);
+                                        const Scalar tmp168 = w39*(B_0_3 + B_0_5);
+                                        const Scalar tmp169 = w36*(B_0_2 + B_0_4);
+                                        const Scalar tmp170 = w38*(B_2_1 + B_2_3);
+                                        const Scalar tmp171 = w35*(B_2_0 + B_2_2);
+                                        const Scalar tmp172 = w41*(B_0_5 + B_0_7);
+                                        const Scalar tmp173 = w37*(-B_1_3 - B_1_6);
+                                        const Scalar tmp174 = w39*(B_0_4 + B_0_6);
+                                        const Scalar tmp175 = w45*(B_2_4 + B_2_6);
+                                        const Scalar tmp176 = w36*(B_0_1 + B_0_3);
+                                        const Scalar tmp177 = w40*(-B_1_1 - B_1_4);
+                                        const Scalar tmp178 = w33*(B_0_0 + B_0_2);
+                                        const Scalar tmp179 = w46*(B_2_5 + B_2_7);
+                                        const Scalar tmp180 = w38*(B_2_5 + B_2_7);
+                                        const Scalar tmp181 = w42*(-B_1_3 - B_1_7);
+                                        const Scalar tmp182 = w35*(B_2_4 + B_2_6);
+                                        const Scalar tmp183 = w37*(-B_1_1 - B_1_5);
+                                        const Scalar tmp184 = w39*(B_0_1 + B_0_3 + B_0_5 + B_0_7);
+                                        const Scalar tmp185 = w45*(B_2_0 + B_2_2);
+                                        const Scalar tmp186 = w36*(B_0_0 + B_0_2 + B_0_4 + B_0_6);
+                                        const Scalar tmp187 = w40*(-B_1_2 - B_1_6);
+                                        const Scalar tmp188 = w46*(B_2_1 + B_2_3);
+                                        const Scalar tmp189 = w34*(-B_1_0 - B_1_4);
+                                        const Scalar tmp190 = w38*(B_2_4 + B_2_5);
+                                        const Scalar tmp191 = w35*(B_2_6 + B_2_7);
+                                        const Scalar tmp192 = w41*(-B_0_1 - B_0_5);
+                                        const Scalar tmp193 = w37*(-B_1_0 - B_1_1 - B_1_4 - B_1_5);
+                                        const Scalar tmp194 = w39*(-B_0_0 - B_0_4);
+                                        const Scalar tmp195 = w45*(B_2_2 + B_2_3);
+                                        const Scalar tmp196 = w36*(-B_0_3 - B_0_7);
+                                        const Scalar tmp197 = w40*(-B_1_2 - B_1_3 - B_1_6 - B_1_7);
+                                        const Scalar tmp198 = w33*(-B_0_2 - B_0_6);
+                                        const Scalar tmp199 = w46*(B_2_0 + B_2_1);
+                                        const Scalar tmp200 = w38*(-B_2_6 - B_2_7);
+                                        const Scalar tmp201 = w42*(B_1_2 + B_1_3);
+                                        const Scalar tmp202 = w35*(-B_2_4 - B_2_5);
+                                        const Scalar tmp203 = w37*(B_1_0 + B_1_1);
+                                        const Scalar tmp204 = w45*(-B_2_0 - B_2_1);
+                                        const Scalar tmp205 = w40*(B_1_6 + B_1_7);
+                                        const Scalar tmp206 = w46*(-B_2_2 - B_2_3);
+                                        const Scalar tmp207 = w34*(B_1_4 + B_1_5);
+                                        const Scalar tmp208 = w37*(-B_1_1 - B_1_4);
+                                        const Scalar tmp209 = w39*(-B_0_2 - B_0_4);
+                                        const Scalar tmp210 = w36*(-B_0_3 - B_0_5);
+                                        const Scalar tmp211 = w40*(-B_1_3 - B_1_6);
+                                        const Scalar tmp212 = w38*(B_2_4 + B_2_7);
+                                        const Scalar tmp213 = w45*(B_2_0 + B_2_3);
+                                        const Scalar tmp214 = w41*(B_0_0 + B_0_4);
+                                        const Scalar tmp215 = w39*(B_0_1 + B_0_5);
+                                        const Scalar tmp216 = w36*(B_0_2 + B_0_6);
+                                        const Scalar tmp217 = w33*(B_0_3 + B_0_7);
+                                        const Scalar tmp218 = w42*(B_1_1 + B_1_5);
+                                        const Scalar tmp219 = w37*(B_1_3 + B_1_7);
+                                        const Scalar tmp220 = w40*(B_1_0 + B_1_4);
+                                        const Scalar tmp221 = w34*(B_1_2 + B_1_6);
+                                        const Scalar tmp222 = w39*(-B_0_1 - B_0_7);
+                                        const Scalar tmp223 = w36*(-B_0_0 - B_0_6);
+                                        const Scalar tmp224 = w38*(-B_2_0 - B_2_1);
+                                        const Scalar tmp225 = w35*(-B_2_2 - B_2_3);
+                                        const Scalar tmp226 = w45*(-B_2_6 - B_2_7);
+                                        const Scalar tmp227 = w46*(-B_2_4 - B_2_5);
+                                        const Scalar tmp228 = w38*(B_2_4 + B_2_6);
+                                        const Scalar tmp229 = w42*(B_1_0 + B_1_4);
+                                        const Scalar tmp230 = w35*(B_2_5 + B_2_7);
+                                        const Scalar tmp231 = w37*(B_1_2 + B_1_6);
+                                        const Scalar tmp232 = w39*(-B_0_0 - B_0_2 - B_0_4 - B_0_6);
+                                        const Scalar tmp233 = w45*(B_2_1 + B_2_3);
+                                        const Scalar tmp234 = w36*(-B_0_1 - B_0_3 - B_0_5 - B_0_7);
+                                        const Scalar tmp235 = w40*(B_1_1 + B_1_5);
+                                        const Scalar tmp236 = w46*(B_2_0 + B_2_2);
+                                        const Scalar tmp237 = w34*(B_1_3 + B_1_7);
+                                        const Scalar tmp238 = w42*(-B_1_2 - B_1_6);
+                                        const Scalar tmp239 = w37*(-B_1_0 - B_1_4);
+                                        const Scalar tmp240 = w40*(-B_1_3 - B_1_7);
+                                        const Scalar tmp241 = w34*(-B_1_1 - B_1_5);
+                                        const Scalar tmp242 = w38*(-B_2_4 - B_2_5);
+                                        const Scalar tmp243 = w42*(-B_1_0 - B_1_1);
+                                        const Scalar tmp244 = w35*(-B_2_6 - B_2_7);
+                                        const Scalar tmp245 = w37*(-B_1_2 - B_1_3);
+                                        const Scalar tmp246 = w45*(-B_2_2 - B_2_3);
+                                        const Scalar tmp247 = w40*(-B_1_4 - B_1_5);
+                                        const Scalar tmp248 = w46*(-B_2_0 - B_2_1);
+                                        const Scalar tmp249 = w34*(-B_1_6 - B_1_7);
+                                        const Scalar tmp250 = w42*(B_1_4 + B_1_5);
+                                        const Scalar tmp251 = w37*(B_1_6 + B_1_7);
+                                        const Scalar tmp252 = w40*(B_1_0 + B_1_1);
+                                        const Scalar tmp253 = w34*(B_1_2 + B_1_3);
+                                        const Scalar tmp254 = w38*(-B_2_1 - B_2_3);
+                                        const Scalar tmp255 = w35*(-B_2_0 - B_2_2);
+                                        const Scalar tmp256 = w45*(-B_2_4 - B_2_6);
+                                        const Scalar tmp257 = w46*(-B_2_5 - B_2_7);
+                                        const Scalar tmp258 = w38*(B_2_0 + B_2_1 + B_2_2 + B_2_3);
+                                        const Scalar tmp259 = w45*(B_2_4 + B_2_5 + B_2_6 + B_2_7);
+                                        const Scalar tmp260 = w38*(-B_2_0 - B_2_2);
+                                        const Scalar tmp261 = w35*(-B_2_1 - B_2_3);
+                                        const Scalar tmp262 = w45*(-B_2_5 - B_2_7);
+                                        const Scalar tmp263 = w46*(-B_2_4 - B_2_6);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-B_0_0*w50 - B_0_1*w41 - B_0_6*w33 - B_0_7*w49 + B_1_0*w47 - B_1_2*w42 - B_1_5*w34 + B_1_7*w48 - B_2_0*w43 - B_2_3*w35 - B_2_4*w46 - B_2_7*w44 + tmp132 + tmp137 + tmp208 + tmp209 + tmp210 + tmp211;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=-B_0_0*w41 - B_0_1*w50 - B_0_6*w49 - B_0_7*w33 + tmp126 + tmp128 + tmp242 + tmp243 + tmp244 + tmp245 + tmp246 + tmp247 + tmp248 + tmp249;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=-B_1_0*w42 + B_1_2*w47 + B_1_5*w48 - B_1_7*w34 + tmp138 + tmp139 + tmp140 + tmp142 + tmp143 + tmp144 + tmp146 + tmp147 + tmp173 + tmp177;
@@ -4818,73 +4843,73 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wB0 = B_p[INDEX3(k,0,m,numEq,3)]*w55;
-                                        const double wB1 = B_p[INDEX3(k,1,m,numEq,3)]*w56;
-                                        const double wB2 = B_p[INDEX3(k,2,m,numEq,3)]*w54;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+= 4*wB0 + 4*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+= 4*wB0 + 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+= 2*wB0 + 4*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+= 2*wB0 + 2*wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+= 2*wB0 + 2*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+= 2*wB0 +   wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=   wB0 + 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=   wB0 +   wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=-4*wB0 + 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4*wB0 + 4*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+=-2*wB0 + 2*wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2*wB0 + 4*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+=-2*wB0 +   wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2*wB0 + 2*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=  -wB0 +   wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=  -wB0 + 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+= 2*wB0 - 4*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+= 2*wB0 - 2*wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+= 4*wB0 - 4*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+= 4*wB0 - 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=   wB0 - 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=   wB0 -   wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+= 2*wB0 - 2*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+= 2*wB0 -   wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+=-2*wB0 - 2*wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2*wB0 - 4*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+=-4*wB0 - 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4*wB0 - 4*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=  -wB0 -   wB1 +   wB2;
-                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=  -wB0 - 2*wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+=-2*wB0 -   wB1 + 2*wB2;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2*wB0 - 2*wB1 + 4*wB2;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+= 2*wB0 + 2*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+= 2*wB0 +   wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=   wB0 + 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=   wB0 +   wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+= 4*wB0 + 4*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+= 4*wB0 + 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+= 2*wB0 + 4*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+= 2*wB0 + 2*wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+=-2*wB0 +   wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2*wB0 + 2*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=  -wB0 +   wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=  -wB0 + 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+=-4*wB0 + 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4*wB0 + 4*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+=-2*wB0 + 2*wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2*wB0 + 4*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=   wB0 - 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=   wB0 -   wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+= 2*wB0 - 2*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+= 2*wB0 -   wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+= 2*wB0 - 4*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+= 2*wB0 - 2*wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+= 4*wB0 - 4*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+= 4*wB0 - 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=  -wB0 -   wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=  -wB0 - 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+=-2*wB0 -   wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2*wB0 - 2*wB1 - 4*wB2;
-                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+=-2*wB0 - 2*wB1 -   wB2;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2*wB0 - 4*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+=-4*wB0 - 2*wB1 - 2*wB2;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4*wB0 - 4*wB1 - 4*wB2;
+                                        const Scalar wB0 = B_p[INDEX3(k,0,m,numEq,3)]*w55;
+                                        const Scalar wB1 = B_p[INDEX3(k,1,m,numEq,3)]*w56;
+                                        const Scalar wB2 = B_p[INDEX3(k,2,m,numEq,3)]*w54;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+= 4.*wB0 + 4.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+= 4.*wB0 + 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+= 2.*wB0 + 4.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+= 2.*wB0 + 2.*wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+= 2.*wB0 + 2.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+= 2.*wB0 +    wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=    wB0 + 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=    wB0 +    wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=-4.*wB0 + 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4.*wB0 + 4.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+=-2.*wB0 + 2.*wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2.*wB0 + 4.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+=-2.*wB0 +    wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2.*wB0 + 2.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=   -wB0 +    wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=   -wB0 + 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+= 2.*wB0 - 4.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+= 2.*wB0 - 2.*wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+= 4.*wB0 - 4.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+= 4.*wB0 - 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=    wB0 - 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=    wB0 -    wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+= 2.*wB0 - 2.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+= 2.*wB0 -    wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+=-2.*wB0 - 2.*wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2.*wB0 - 4.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+=-4.*wB0 - 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4.*wB0 - 4.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=   -wB0 -    wB1 +    wB2;
+                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=   -wB0 - 2.*wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+=-2.*wB0 -    wB1 + 2.*wB2;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2.*wB0 - 2.*wB1 + 4.*wB2;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+= 2.*wB0 + 2.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+= 2.*wB0 +    wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=    wB0 + 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=    wB0 +    wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+= 4.*wB0 + 4.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+= 4.*wB0 + 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+= 2.*wB0 + 4.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+= 2.*wB0 + 2.*wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+=-2.*wB0 +    wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2.*wB0 + 2.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=   -wB0 +    wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=   -wB0 + 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+=-4.*wB0 + 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4.*wB0 + 4.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+=-2.*wB0 + 2.*wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2.*wB0 + 4.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=    wB0 - 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=    wB0 -    wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+= 2.*wB0 - 2.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+= 2.*wB0 -    wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+= 2.*wB0 - 4.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+= 2.*wB0 - 2.*wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+= 4.*wB0 - 4.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+= 4.*wB0 - 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=   -wB0 -    wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=   -wB0 - 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+=-2.*wB0 -    wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2.*wB0 - 2.*wB1 - 4.*wB2;
+                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+=-2.*wB0 - 2.*wB1 -    wB2;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2.*wB0 - 4.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+=-4.*wB0 - 2.*wB1 - 2.*wB2;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4.*wB0 - 4.*wB1 - 4.*wB2;
                                     }
                                 }
                             }
@@ -4893,298 +4918,298 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double* C_p=C.getSampleDataRO(e);
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
                             if (C.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,3)];
-                                        const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,3)];
-                                        const double C_2_0 = C_p[INDEX4(k,m,2, 0, numEq,numComp,3)];
-                                        const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,3)];
-                                        const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,3)];
-                                        const double C_2_1 = C_p[INDEX4(k,m,2, 1, numEq,numComp,3)];
-                                        const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,3)];
-                                        const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,3)];
-                                        const double C_2_2 = C_p[INDEX4(k,m,2, 2, numEq,numComp,3)];
-                                        const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,3)];
-                                        const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,3)];
-                                        const double C_2_3 = C_p[INDEX4(k,m,2, 3, numEq,numComp,3)];
-                                        const double C_0_4 = C_p[INDEX4(k,m,0, 4, numEq,numComp,3)];
-                                        const double C_1_4 = C_p[INDEX4(k,m,1, 4, numEq,numComp,3)];
-                                        const double C_2_4 = C_p[INDEX4(k,m,2, 4, numEq,numComp,3)];
-                                        const double C_0_5 = C_p[INDEX4(k,m,0, 5, numEq,numComp,3)];
-                                        const double C_1_5 = C_p[INDEX4(k,m,1, 5, numEq,numComp,3)];
-                                        const double C_2_5 = C_p[INDEX4(k,m,2, 5, numEq,numComp,3)];
-                                        const double C_0_6 = C_p[INDEX4(k,m,0, 6, numEq,numComp,3)];
-                                        const double C_1_6 = C_p[INDEX4(k,m,1, 6, numEq,numComp,3)];
-                                        const double C_2_6 = C_p[INDEX4(k,m,2, 6, numEq,numComp,3)];
-                                        const double C_0_7 = C_p[INDEX4(k,m,0, 7, numEq,numComp,3)];
-                                        const double C_1_7 = C_p[INDEX4(k,m,1, 7, numEq,numComp,3)];
-                                        const double C_2_7 = C_p[INDEX4(k,m,2, 7, numEq,numComp,3)];
-                                        const double tmp0 = w38*(-C_2_5 - C_2_6);
-                                        const double tmp1 = w42*(C_1_3 + C_1_7);
-                                        const double tmp2 = w41*(C_0_3 + C_0_7);
-                                        const double tmp3 = w37*(C_1_1 + C_1_5);
-                                        const double tmp4 = w39*(C_0_2 + C_0_6);
-                                        const double tmp5 = w45*(-C_2_1 - C_2_2);
-                                        const double tmp6 = w36*(C_0_1 + C_0_5);
-                                        const double tmp7 = w40*(C_1_2 + C_1_6);
-                                        const double tmp8 = w33*(C_0_0 + C_0_4);
-                                        const double tmp9 = w34*(C_1_0 + C_1_4);
-                                        const double tmp10 = w38*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
-                                        const double tmp11 = w42*(C_1_4 + C_1_5);
-                                        const double tmp12 = w41*(C_0_4 + C_0_6);
-                                        const double tmp13 = w37*(C_1_6 + C_1_7);
-                                        const double tmp14 = w39*(C_0_5 + C_0_7);
-                                        const double tmp15 = w45*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
-                                        const double tmp16 = w36*(C_0_0 + C_0_2);
-                                        const double tmp17 = w40*(C_1_0 + C_1_1);
-                                        const double tmp18 = w33*(C_0_1 + C_0_3);
-                                        const double tmp19 = w34*(C_1_2 + C_1_3);
-                                        const double tmp20 = w38*(-C_2_5 - C_2_7);
-                                        const double tmp21 = w35*(-C_2_4 - C_2_6);
-                                        const double tmp22 = w41*(C_0_1 + C_0_3);
-                                        const double tmp23 = w37*(C_1_0 + C_1_5);
-                                        const double tmp24 = w39*(C_0_0 + C_0_2);
-                                        const double tmp25 = w45*(-C_2_0 - C_2_2);
-                                        const double tmp26 = w36*(C_0_5 + C_0_7);
-                                        const double tmp27 = w40*(C_1_2 + C_1_7);
-                                        const double tmp28 = w33*(C_0_4 + C_0_6);
-                                        const double tmp29 = w46*(-C_2_1 - C_2_3);
-                                        const double tmp30 = w38*(C_2_0 + C_2_2);
-                                        const double tmp31 = w35*(C_2_1 + C_2_3);
-                                        const double tmp32 = w41*(-C_0_4 - C_0_6);
-                                        const double tmp33 = w37*(-C_1_2 - C_1_7);
-                                        const double tmp34 = w39*(-C_0_5 - C_0_7);
-                                        const double tmp35 = w45*(C_2_5 + C_2_7);
-                                        const double tmp36 = w36*(-C_0_0 - C_0_2);
-                                        const double tmp37 = w40*(-C_1_0 - C_1_5);
-                                        const double tmp38 = w33*(-C_0_1 - C_0_3);
-                                        const double tmp39 = w46*(C_2_4 + C_2_6);
-                                        const double tmp40 = w38*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
-                                        const double tmp41 = w42*(-C_1_2 - C_1_3);
-                                        const double tmp42 = w41*(-C_0_1 - C_0_3);
-                                        const double tmp43 = w37*(-C_1_0 - C_1_1);
-                                        const double tmp44 = w39*(-C_0_0 - C_0_2);
-                                        const double tmp45 = w45*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
-                                        const double tmp46 = w36*(-C_0_5 - C_0_7);
-                                        const double tmp47 = w40*(-C_1_6 - C_1_7);
-                                        const double tmp48 = w33*(-C_0_4 - C_0_6);
-                                        const double tmp49 = w34*(-C_1_4 - C_1_5);
-                                        const double tmp50 = w38*(C_2_0 + C_2_1);
-                                        const double tmp51 = w42*(-C_1_4 - C_1_5);
-                                        const double tmp52 = w35*(C_2_2 + C_2_3);
-                                        const double tmp53 = w37*(-C_1_6 - C_1_7);
-                                        const double tmp54 = w39*(-C_0_1 - C_0_7);
-                                        const double tmp55 = w45*(C_2_6 + C_2_7);
-                                        const double tmp56 = w36*(-C_0_0 - C_0_6);
-                                        const double tmp57 = w40*(-C_1_0 - C_1_1);
-                                        const double tmp58 = w46*(C_2_4 + C_2_5);
-                                        const double tmp59 = w34*(-C_1_2 - C_1_3);
-                                        const double tmp60 = w38*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
-                                        const double tmp61 = w37*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
-                                        const double tmp62 = w39*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
-                                        const double tmp63 = w45*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
-                                        const double tmp64 = w36*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
-                                        const double tmp65 = w40*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
-                                        const double tmp66 = w41*(-C_0_5 - C_0_7);
-                                        const double tmp67 = w39*(-C_0_4 - C_0_6);
-                                        const double tmp68 = w36*(-C_0_1 - C_0_3);
-                                        const double tmp69 = w33*(-C_0_0 - C_0_2);
-                                        const double tmp70 = w38*(C_2_0 + C_2_3);
-                                        const double tmp71 = w42*(C_1_2 + C_1_6);
-                                        const double tmp72 = w41*(-C_0_2 - C_0_6);
-                                        const double tmp73 = w37*(C_1_0 + C_1_4);
-                                        const double tmp74 = w39*(-C_0_3 - C_0_7);
-                                        const double tmp75 = w45*(C_2_4 + C_2_7);
-                                        const double tmp76 = w36*(-C_0_0 - C_0_4);
-                                        const double tmp77 = w40*(C_1_3 + C_1_7);
-                                        const double tmp78 = w33*(-C_0_1 - C_0_5);
-                                        const double tmp79 = w34*(C_1_1 + C_1_5);
-                                        const double tmp80 = w39*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
-                                        const double tmp81 = w36*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
-                                        const double tmp82 = w38*(-C_2_4 - C_2_7);
-                                        const double tmp83 = w42*(-C_1_1 - C_1_5);
-                                        const double tmp84 = w41*(C_0_1 + C_0_5);
-                                        const double tmp85 = w37*(-C_1_3 - C_1_7);
-                                        const double tmp86 = w39*(C_0_0 + C_0_4);
-                                        const double tmp87 = w45*(-C_2_0 - C_2_3);
-                                        const double tmp88 = w36*(C_0_3 + C_0_7);
-                                        const double tmp89 = w40*(-C_1_0 - C_1_4);
-                                        const double tmp90 = w33*(C_0_2 + C_0_6);
-                                        const double tmp91 = w34*(-C_1_2 - C_1_6);
-                                        const double tmp92 = w38*(C_2_1 + C_2_2);
-                                        const double tmp93 = w45*(C_2_5 + C_2_6);
-                                        const double tmp94 = w37*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
-                                        const double tmp95 = w40*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
-                                        const double tmp96 = w42*(C_1_0 + C_1_1);
-                                        const double tmp97 = w41*(C_0_0 + C_0_2);
-                                        const double tmp98 = w37*(C_1_2 + C_1_3);
-                                        const double tmp99 = w39*(C_0_1 + C_0_3);
-                                        const double tmp100 = w36*(C_0_4 + C_0_6);
-                                        const double tmp101 = w40*(C_1_4 + C_1_5);
-                                        const double tmp102 = w33*(C_0_5 + C_0_7);
-                                        const double tmp103 = w34*(C_1_6 + C_1_7);
-                                        const double tmp104 = w38*(-C_2_2 - C_2_3);
-                                        const double tmp105 = w35*(-C_2_0 - C_2_1);
-                                        const double tmp106 = w41*(-C_0_3 - C_0_7);
-                                        const double tmp107 = w37*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
-                                        const double tmp108 = w39*(-C_0_2 - C_0_6);
-                                        const double tmp109 = w45*(-C_2_4 - C_2_5);
-                                        const double tmp110 = w36*(-C_0_1 - C_0_5);
-                                        const double tmp111 = w40*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
-                                        const double tmp112 = w33*(-C_0_0 - C_0_4);
-                                        const double tmp113 = w46*(-C_2_6 - C_2_7);
-                                        const double tmp114 = w42*(-C_1_0 - C_1_4);
-                                        const double tmp115 = w41*(-C_0_0 - C_0_4);
-                                        const double tmp116 = w37*(-C_1_2 - C_1_6);
-                                        const double tmp117 = w39*(-C_0_1 - C_0_5);
-                                        const double tmp118 = w36*(-C_0_2 - C_0_6);
-                                        const double tmp119 = w40*(-C_1_1 - C_1_5);
-                                        const double tmp120 = w33*(-C_0_3 - C_0_7);
-                                        const double tmp121 = w34*(-C_1_3 - C_1_7);
-                                        const double tmp122 = w38*(C_2_2 + C_2_3);
-                                        const double tmp123 = w42*(C_1_6 + C_1_7);
-                                        const double tmp124 = w35*(C_2_0 + C_2_1);
-                                        const double tmp125 = w37*(C_1_4 + C_1_5);
-                                        const double tmp126 = w39*(C_0_2 + C_0_4);
-                                        const double tmp127 = w45*(C_2_4 + C_2_5);
-                                        const double tmp128 = w36*(C_0_3 + C_0_5);
-                                        const double tmp129 = w40*(C_1_2 + C_1_3);
-                                        const double tmp130 = w46*(C_2_6 + C_2_7);
-                                        const double tmp131 = w34*(C_1_0 + C_1_1);
-                                        const double tmp132 = w38*(-C_2_1 - C_2_2);
-                                        const double tmp133 = w37*(C_1_2 + C_1_7);
-                                        const double tmp134 = w39*(C_0_1 + C_0_7);
-                                        const double tmp135 = w36*(C_0_0 + C_0_6);
-                                        const double tmp136 = w40*(C_1_0 + C_1_5);
-                                        const double tmp137 = w45*(-C_2_5 - C_2_6);
-                                        const double tmp138 = w38*(-C_2_4 - C_2_6);
-                                        const double tmp139 = w35*(-C_2_5 - C_2_7);
-                                        const double tmp140 = w41*(-C_0_0 - C_0_2);
-                                        const double tmp141 = w37*(-C_1_3 - C_1_6);
-                                        const double tmp142 = w39*(-C_0_1 - C_0_3);
-                                        const double tmp143 = w45*(-C_2_1 - C_2_3);
-                                        const double tmp144 = w36*(-C_0_4 - C_0_6);
-                                        const double tmp145 = w40*(-C_1_1 - C_1_4);
-                                        const double tmp146 = w33*(-C_0_5 - C_0_7);
-                                        const double tmp147 = w46*(-C_2_0 - C_2_2);
-                                        const double tmp148 = w39*(-C_0_3 - C_0_5);
-                                        const double tmp149 = w36*(-C_0_2 - C_0_4);
-                                        const double tmp150 = w38*(C_2_5 + C_2_6);
-                                        const double tmp151 = w37*(-C_1_0 - C_1_5);
-                                        const double tmp152 = w39*(-C_0_0 - C_0_6);
-                                        const double tmp153 = w45*(C_2_1 + C_2_2);
-                                        const double tmp154 = w36*(-C_0_1 - C_0_7);
-                                        const double tmp155 = w40*(-C_1_2 - C_1_7);
-                                        const double tmp156 = w41*(C_0_2 + C_0_6);
-                                        const double tmp157 = w39*(C_0_3 + C_0_7);
-                                        const double tmp158 = w36*(C_0_0 + C_0_4);
-                                        const double tmp159 = w33*(C_0_1 + C_0_5);
-                                        const double tmp160 = w38*(C_2_6 + C_2_7);
-                                        const double tmp161 = w35*(C_2_4 + C_2_5);
-                                        const double tmp162 = w45*(C_2_0 + C_2_1);
-                                        const double tmp163 = w46*(C_2_2 + C_2_3);
-                                        const double tmp164 = w38*(-C_2_0 - C_2_3);
-                                        const double tmp165 = w37*(C_1_3 + C_1_6);
-                                        const double tmp166 = w40*(C_1_1 + C_1_4);
-                                        const double tmp167 = w45*(-C_2_4 - C_2_7);
-                                        const double tmp168 = w39*(C_0_3 + C_0_5);
-                                        const double tmp169 = w36*(C_0_2 + C_0_4);
-                                        const double tmp170 = w38*(C_2_1 + C_2_3);
-                                        const double tmp171 = w35*(C_2_0 + C_2_2);
-                                        const double tmp172 = w41*(C_0_5 + C_0_7);
-                                        const double tmp173 = w37*(C_1_1 + C_1_4);
-                                        const double tmp174 = w39*(C_0_4 + C_0_6);
-                                        const double tmp175 = w45*(C_2_4 + C_2_6);
-                                        const double tmp176 = w36*(C_0_1 + C_0_3);
-                                        const double tmp177 = w40*(C_1_3 + C_1_6);
-                                        const double tmp178 = w33*(C_0_0 + C_0_2);
-                                        const double tmp179 = w46*(C_2_5 + C_2_7);
-                                        const double tmp180 = w38*(-C_2_1 - C_2_3);
-                                        const double tmp181 = w42*(C_1_1 + C_1_5);
-                                        const double tmp182 = w35*(-C_2_0 - C_2_2);
-                                        const double tmp183 = w37*(C_1_3 + C_1_7);
-                                        const double tmp184 = w39*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
-                                        const double tmp185 = w45*(-C_2_4 - C_2_6);
-                                        const double tmp186 = w36*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
-                                        const double tmp187 = w40*(C_1_0 + C_1_4);
-                                        const double tmp188 = w46*(-C_2_5 - C_2_7);
-                                        const double tmp189 = w34*(C_1_2 + C_1_6);
-                                        const double tmp190 = w38*(-C_2_0 - C_2_1);
-                                        const double tmp191 = w35*(-C_2_2 - C_2_3);
-                                        const double tmp192 = w41*(C_0_0 + C_0_4);
-                                        const double tmp193 = w37*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
-                                        const double tmp194 = w39*(C_0_1 + C_0_5);
-                                        const double tmp195 = w45*(-C_2_6 - C_2_7);
-                                        const double tmp196 = w36*(C_0_2 + C_0_6);
-                                        const double tmp197 = w40*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
-                                        const double tmp198 = w33*(C_0_3 + C_0_7);
-                                        const double tmp199 = w46*(-C_2_4 - C_2_5);
-                                        const double tmp200 = w38*(-C_2_6 - C_2_7);
-                                        const double tmp201 = w42*(C_1_2 + C_1_3);
-                                        const double tmp202 = w35*(-C_2_4 - C_2_5);
-                                        const double tmp203 = w37*(C_1_0 + C_1_1);
-                                        const double tmp204 = w45*(-C_2_0 - C_2_1);
-                                        const double tmp205 = w40*(C_1_6 + C_1_7);
-                                        const double tmp206 = w46*(-C_2_2 - C_2_3);
-                                        const double tmp207 = w34*(C_1_4 + C_1_5);
-                                        const double tmp208 = w37*(-C_1_1 - C_1_4);
-                                        const double tmp209 = w39*(-C_0_2 - C_0_4);
-                                        const double tmp210 = w36*(-C_0_3 - C_0_5);
-                                        const double tmp211 = w40*(-C_1_3 - C_1_6);
-                                        const double tmp212 = w38*(C_2_4 + C_2_7);
-                                        const double tmp213 = w45*(C_2_0 + C_2_3);
-                                        const double tmp214 = w41*(-C_0_1 - C_0_5);
-                                        const double tmp215 = w39*(-C_0_0 - C_0_4);
-                                        const double tmp216 = w36*(-C_0_3 - C_0_7);
-                                        const double tmp217 = w33*(-C_0_2 - C_0_6);
-                                        const double tmp218 = w42*(-C_1_3 - C_1_7);
-                                        const double tmp219 = w37*(-C_1_1 - C_1_5);
-                                        const double tmp220 = w40*(-C_1_2 - C_1_6);
-                                        const double tmp221 = w34*(-C_1_0 - C_1_4);
-                                        const double tmp222 = w39*(C_0_0 + C_0_6);
-                                        const double tmp223 = w36*(C_0_1 + C_0_7);
-                                        const double tmp224 = w38*(C_2_4 + C_2_5);
-                                        const double tmp225 = w35*(C_2_6 + C_2_7);
-                                        const double tmp226 = w45*(C_2_2 + C_2_3);
-                                        const double tmp227 = w46*(C_2_0 + C_2_1);
-                                        const double tmp228 = w38*(-C_2_0 - C_2_2);
-                                        const double tmp229 = w42*(-C_1_2 - C_1_6);
-                                        const double tmp230 = w35*(-C_2_1 - C_2_3);
-                                        const double tmp231 = w37*(-C_1_0 - C_1_4);
-                                        const double tmp232 = w39*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
-                                        const double tmp233 = w45*(-C_2_5 - C_2_7);
-                                        const double tmp234 = w36*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
-                                        const double tmp235 = w40*(-C_1_3 - C_1_7);
-                                        const double tmp236 = w46*(-C_2_4 - C_2_6);
-                                        const double tmp237 = w34*(-C_1_1 - C_1_5);
-                                        const double tmp238 = w42*(C_1_0 + C_1_4);
-                                        const double tmp239 = w37*(C_1_2 + C_1_6);
-                                        const double tmp240 = w40*(C_1_1 + C_1_5);
-                                        const double tmp241 = w34*(C_1_3 + C_1_7);
-                                        const double tmp242 = w38*(-C_2_4 - C_2_5);
-                                        const double tmp243 = w42*(-C_1_0 - C_1_1);
-                                        const double tmp244 = w35*(-C_2_6 - C_2_7);
-                                        const double tmp245 = w37*(-C_1_2 - C_1_3);
-                                        const double tmp246 = w45*(-C_2_2 - C_2_3);
-                                        const double tmp247 = w40*(-C_1_4 - C_1_5);
-                                        const double tmp248 = w46*(-C_2_0 - C_2_1);
-                                        const double tmp249 = w34*(-C_1_6 - C_1_7);
-                                        const double tmp250 = w42*(-C_1_6 - C_1_7);
-                                        const double tmp251 = w37*(-C_1_4 - C_1_5);
-                                        const double tmp252 = w40*(-C_1_2 - C_1_3);
-                                        const double tmp253 = w34*(-C_1_0 - C_1_1);
-                                        const double tmp254 = w38*(C_2_5 + C_2_7);
-                                        const double tmp255 = w35*(C_2_4 + C_2_6);
-                                        const double tmp256 = w45*(C_2_0 + C_2_2);
-                                        const double tmp257 = w46*(C_2_1 + C_2_3);
-                                        const double tmp258 = w38*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
-                                        const double tmp259 = w45*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
-                                        const double tmp260 = w38*(C_2_4 + C_2_6);
-                                        const double tmp261 = w35*(C_2_5 + C_2_7);
-                                        const double tmp262 = w45*(C_2_1 + C_2_3);
-                                        const double tmp263 = w46*(C_2_0 + C_2_2);
+                                        const Scalar C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,3)];
+                                        const Scalar C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,3)];
+                                        const Scalar C_2_0 = C_p[INDEX4(k,m,2, 0, numEq,numComp,3)];
+                                        const Scalar C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,3)];
+                                        const Scalar C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,3)];
+                                        const Scalar C_2_1 = C_p[INDEX4(k,m,2, 1, numEq,numComp,3)];
+                                        const Scalar C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,3)];
+                                        const Scalar C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,3)];
+                                        const Scalar C_2_2 = C_p[INDEX4(k,m,2, 2, numEq,numComp,3)];
+                                        const Scalar C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,3)];
+                                        const Scalar C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,3)];
+                                        const Scalar C_2_3 = C_p[INDEX4(k,m,2, 3, numEq,numComp,3)];
+                                        const Scalar C_0_4 = C_p[INDEX4(k,m,0, 4, numEq,numComp,3)];
+                                        const Scalar C_1_4 = C_p[INDEX4(k,m,1, 4, numEq,numComp,3)];
+                                        const Scalar C_2_4 = C_p[INDEX4(k,m,2, 4, numEq,numComp,3)];
+                                        const Scalar C_0_5 = C_p[INDEX4(k,m,0, 5, numEq,numComp,3)];
+                                        const Scalar C_1_5 = C_p[INDEX4(k,m,1, 5, numEq,numComp,3)];
+                                        const Scalar C_2_5 = C_p[INDEX4(k,m,2, 5, numEq,numComp,3)];
+                                        const Scalar C_0_6 = C_p[INDEX4(k,m,0, 6, numEq,numComp,3)];
+                                        const Scalar C_1_6 = C_p[INDEX4(k,m,1, 6, numEq,numComp,3)];
+                                        const Scalar C_2_6 = C_p[INDEX4(k,m,2, 6, numEq,numComp,3)];
+                                        const Scalar C_0_7 = C_p[INDEX4(k,m,0, 7, numEq,numComp,3)];
+                                        const Scalar C_1_7 = C_p[INDEX4(k,m,1, 7, numEq,numComp,3)];
+                                        const Scalar C_2_7 = C_p[INDEX4(k,m,2, 7, numEq,numComp,3)];
+                                        const Scalar tmp0 = w38*(-C_2_5 - C_2_6);
+                                        const Scalar tmp1 = w42*(C_1_3 + C_1_7);
+                                        const Scalar tmp2 = w41*(C_0_3 + C_0_7);
+                                        const Scalar tmp3 = w37*(C_1_1 + C_1_5);
+                                        const Scalar tmp4 = w39*(C_0_2 + C_0_6);
+                                        const Scalar tmp5 = w45*(-C_2_1 - C_2_2);
+                                        const Scalar tmp6 = w36*(C_0_1 + C_0_5);
+                                        const Scalar tmp7 = w40*(C_1_2 + C_1_6);
+                                        const Scalar tmp8 = w33*(C_0_0 + C_0_4);
+                                        const Scalar tmp9 = w34*(C_1_0 + C_1_4);
+                                        const Scalar tmp10 = w38*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
+                                        const Scalar tmp11 = w42*(C_1_4 + C_1_5);
+                                        const Scalar tmp12 = w41*(C_0_4 + C_0_6);
+                                        const Scalar tmp13 = w37*(C_1_6 + C_1_7);
+                                        const Scalar tmp14 = w39*(C_0_5 + C_0_7);
+                                        const Scalar tmp15 = w45*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
+                                        const Scalar tmp16 = w36*(C_0_0 + C_0_2);
+                                        const Scalar tmp17 = w40*(C_1_0 + C_1_1);
+                                        const Scalar tmp18 = w33*(C_0_1 + C_0_3);
+                                        const Scalar tmp19 = w34*(C_1_2 + C_1_3);
+                                        const Scalar tmp20 = w38*(-C_2_5 - C_2_7);
+                                        const Scalar tmp21 = w35*(-C_2_4 - C_2_6);
+                                        const Scalar tmp22 = w41*(C_0_1 + C_0_3);
+                                        const Scalar tmp23 = w37*(C_1_0 + C_1_5);
+                                        const Scalar tmp24 = w39*(C_0_0 + C_0_2);
+                                        const Scalar tmp25 = w45*(-C_2_0 - C_2_2);
+                                        const Scalar tmp26 = w36*(C_0_5 + C_0_7);
+                                        const Scalar tmp27 = w40*(C_1_2 + C_1_7);
+                                        const Scalar tmp28 = w33*(C_0_4 + C_0_6);
+                                        const Scalar tmp29 = w46*(-C_2_1 - C_2_3);
+                                        const Scalar tmp30 = w38*(C_2_0 + C_2_2);
+                                        const Scalar tmp31 = w35*(C_2_1 + C_2_3);
+                                        const Scalar tmp32 = w41*(-C_0_4 - C_0_6);
+                                        const Scalar tmp33 = w37*(-C_1_2 - C_1_7);
+                                        const Scalar tmp34 = w39*(-C_0_5 - C_0_7);
+                                        const Scalar tmp35 = w45*(C_2_5 + C_2_7);
+                                        const Scalar tmp36 = w36*(-C_0_0 - C_0_2);
+                                        const Scalar tmp37 = w40*(-C_1_0 - C_1_5);
+                                        const Scalar tmp38 = w33*(-C_0_1 - C_0_3);
+                                        const Scalar tmp39 = w46*(C_2_4 + C_2_6);
+                                        const Scalar tmp40 = w38*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
+                                        const Scalar tmp41 = w42*(-C_1_2 - C_1_3);
+                                        const Scalar tmp42 = w41*(-C_0_1 - C_0_3);
+                                        const Scalar tmp43 = w37*(-C_1_0 - C_1_1);
+                                        const Scalar tmp44 = w39*(-C_0_0 - C_0_2);
+                                        const Scalar tmp45 = w45*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
+                                        const Scalar tmp46 = w36*(-C_0_5 - C_0_7);
+                                        const Scalar tmp47 = w40*(-C_1_6 - C_1_7);
+                                        const Scalar tmp48 = w33*(-C_0_4 - C_0_6);
+                                        const Scalar tmp49 = w34*(-C_1_4 - C_1_5);
+                                        const Scalar tmp50 = w38*(C_2_0 + C_2_1);
+                                        const Scalar tmp51 = w42*(-C_1_4 - C_1_5);
+                                        const Scalar tmp52 = w35*(C_2_2 + C_2_3);
+                                        const Scalar tmp53 = w37*(-C_1_6 - C_1_7);
+                                        const Scalar tmp54 = w39*(-C_0_1 - C_0_7);
+                                        const Scalar tmp55 = w45*(C_2_6 + C_2_7);
+                                        const Scalar tmp56 = w36*(-C_0_0 - C_0_6);
+                                        const Scalar tmp57 = w40*(-C_1_0 - C_1_1);
+                                        const Scalar tmp58 = w46*(C_2_4 + C_2_5);
+                                        const Scalar tmp59 = w34*(-C_1_2 - C_1_3);
+                                        const Scalar tmp60 = w38*(C_2_0 + C_2_1 + C_2_2 + C_2_3);
+                                        const Scalar tmp61 = w37*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
+                                        const Scalar tmp62 = w39*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
+                                        const Scalar tmp63 = w45*(C_2_4 + C_2_5 + C_2_6 + C_2_7);
+                                        const Scalar tmp64 = w36*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
+                                        const Scalar tmp65 = w40*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
+                                        const Scalar tmp66 = w41*(-C_0_5 - C_0_7);
+                                        const Scalar tmp67 = w39*(-C_0_4 - C_0_6);
+                                        const Scalar tmp68 = w36*(-C_0_1 - C_0_3);
+                                        const Scalar tmp69 = w33*(-C_0_0 - C_0_2);
+                                        const Scalar tmp70 = w38*(C_2_0 + C_2_3);
+                                        const Scalar tmp71 = w42*(C_1_2 + C_1_6);
+                                        const Scalar tmp72 = w41*(-C_0_2 - C_0_6);
+                                        const Scalar tmp73 = w37*(C_1_0 + C_1_4);
+                                        const Scalar tmp74 = w39*(-C_0_3 - C_0_7);
+                                        const Scalar tmp75 = w45*(C_2_4 + C_2_7);
+                                        const Scalar tmp76 = w36*(-C_0_0 - C_0_4);
+                                        const Scalar tmp77 = w40*(C_1_3 + C_1_7);
+                                        const Scalar tmp78 = w33*(-C_0_1 - C_0_5);
+                                        const Scalar tmp79 = w34*(C_1_1 + C_1_5);
+                                        const Scalar tmp80 = w39*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
+                                        const Scalar tmp81 = w36*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
+                                        const Scalar tmp82 = w38*(-C_2_4 - C_2_7);
+                                        const Scalar tmp83 = w42*(-C_1_1 - C_1_5);
+                                        const Scalar tmp84 = w41*(C_0_1 + C_0_5);
+                                        const Scalar tmp85 = w37*(-C_1_3 - C_1_7);
+                                        const Scalar tmp86 = w39*(C_0_0 + C_0_4);
+                                        const Scalar tmp87 = w45*(-C_2_0 - C_2_3);
+                                        const Scalar tmp88 = w36*(C_0_3 + C_0_7);
+                                        const Scalar tmp89 = w40*(-C_1_0 - C_1_4);
+                                        const Scalar tmp90 = w33*(C_0_2 + C_0_6);
+                                        const Scalar tmp91 = w34*(-C_1_2 - C_1_6);
+                                        const Scalar tmp92 = w38*(C_2_1 + C_2_2);
+                                        const Scalar tmp93 = w45*(C_2_5 + C_2_6);
+                                        const Scalar tmp94 = w37*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
+                                        const Scalar tmp95 = w40*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
+                                        const Scalar tmp96 = w42*(C_1_0 + C_1_1);
+                                        const Scalar tmp97 = w41*(C_0_0 + C_0_2);
+                                        const Scalar tmp98 = w37*(C_1_2 + C_1_3);
+                                        const Scalar tmp99 = w39*(C_0_1 + C_0_3);
+                                        const Scalar tmp100 = w36*(C_0_4 + C_0_6);
+                                        const Scalar tmp101 = w40*(C_1_4 + C_1_5);
+                                        const Scalar tmp102 = w33*(C_0_5 + C_0_7);
+                                        const Scalar tmp103 = w34*(C_1_6 + C_1_7);
+                                        const Scalar tmp104 = w38*(-C_2_2 - C_2_3);
+                                        const Scalar tmp105 = w35*(-C_2_0 - C_2_1);
+                                        const Scalar tmp106 = w41*(-C_0_3 - C_0_7);
+                                        const Scalar tmp107 = w37*(C_1_2 + C_1_3 + C_1_6 + C_1_7);
+                                        const Scalar tmp108 = w39*(-C_0_2 - C_0_6);
+                                        const Scalar tmp109 = w45*(-C_2_4 - C_2_5);
+                                        const Scalar tmp110 = w36*(-C_0_1 - C_0_5);
+                                        const Scalar tmp111 = w40*(C_1_0 + C_1_1 + C_1_4 + C_1_5);
+                                        const Scalar tmp112 = w33*(-C_0_0 - C_0_4);
+                                        const Scalar tmp113 = w46*(-C_2_6 - C_2_7);
+                                        const Scalar tmp114 = w42*(-C_1_0 - C_1_4);
+                                        const Scalar tmp115 = w41*(-C_0_0 - C_0_4);
+                                        const Scalar tmp116 = w37*(-C_1_2 - C_1_6);
+                                        const Scalar tmp117 = w39*(-C_0_1 - C_0_5);
+                                        const Scalar tmp118 = w36*(-C_0_2 - C_0_6);
+                                        const Scalar tmp119 = w40*(-C_1_1 - C_1_5);
+                                        const Scalar tmp120 = w33*(-C_0_3 - C_0_7);
+                                        const Scalar tmp121 = w34*(-C_1_3 - C_1_7);
+                                        const Scalar tmp122 = w38*(C_2_2 + C_2_3);
+                                        const Scalar tmp123 = w42*(C_1_6 + C_1_7);
+                                        const Scalar tmp124 = w35*(C_2_0 + C_2_1);
+                                        const Scalar tmp125 = w37*(C_1_4 + C_1_5);
+                                        const Scalar tmp126 = w39*(C_0_2 + C_0_4);
+                                        const Scalar tmp127 = w45*(C_2_4 + C_2_5);
+                                        const Scalar tmp128 = w36*(C_0_3 + C_0_5);
+                                        const Scalar tmp129 = w40*(C_1_2 + C_1_3);
+                                        const Scalar tmp130 = w46*(C_2_6 + C_2_7);
+                                        const Scalar tmp131 = w34*(C_1_0 + C_1_1);
+                                        const Scalar tmp132 = w38*(-C_2_1 - C_2_2);
+                                        const Scalar tmp133 = w37*(C_1_2 + C_1_7);
+                                        const Scalar tmp134 = w39*(C_0_1 + C_0_7);
+                                        const Scalar tmp135 = w36*(C_0_0 + C_0_6);
+                                        const Scalar tmp136 = w40*(C_1_0 + C_1_5);
+                                        const Scalar tmp137 = w45*(-C_2_5 - C_2_6);
+                                        const Scalar tmp138 = w38*(-C_2_4 - C_2_6);
+                                        const Scalar tmp139 = w35*(-C_2_5 - C_2_7);
+                                        const Scalar tmp140 = w41*(-C_0_0 - C_0_2);
+                                        const Scalar tmp141 = w37*(-C_1_3 - C_1_6);
+                                        const Scalar tmp142 = w39*(-C_0_1 - C_0_3);
+                                        const Scalar tmp143 = w45*(-C_2_1 - C_2_3);
+                                        const Scalar tmp144 = w36*(-C_0_4 - C_0_6);
+                                        const Scalar tmp145 = w40*(-C_1_1 - C_1_4);
+                                        const Scalar tmp146 = w33*(-C_0_5 - C_0_7);
+                                        const Scalar tmp147 = w46*(-C_2_0 - C_2_2);
+                                        const Scalar tmp148 = w39*(-C_0_3 - C_0_5);
+                                        const Scalar tmp149 = w36*(-C_0_2 - C_0_4);
+                                        const Scalar tmp150 = w38*(C_2_5 + C_2_6);
+                                        const Scalar tmp151 = w37*(-C_1_0 - C_1_5);
+                                        const Scalar tmp152 = w39*(-C_0_0 - C_0_6);
+                                        const Scalar tmp153 = w45*(C_2_1 + C_2_2);
+                                        const Scalar tmp154 = w36*(-C_0_1 - C_0_7);
+                                        const Scalar tmp155 = w40*(-C_1_2 - C_1_7);
+                                        const Scalar tmp156 = w41*(C_0_2 + C_0_6);
+                                        const Scalar tmp157 = w39*(C_0_3 + C_0_7);
+                                        const Scalar tmp158 = w36*(C_0_0 + C_0_4);
+                                        const Scalar tmp159 = w33*(C_0_1 + C_0_5);
+                                        const Scalar tmp160 = w38*(C_2_6 + C_2_7);
+                                        const Scalar tmp161 = w35*(C_2_4 + C_2_5);
+                                        const Scalar tmp162 = w45*(C_2_0 + C_2_1);
+                                        const Scalar tmp163 = w46*(C_2_2 + C_2_3);
+                                        const Scalar tmp164 = w38*(-C_2_0 - C_2_3);
+                                        const Scalar tmp165 = w37*(C_1_3 + C_1_6);
+                                        const Scalar tmp166 = w40*(C_1_1 + C_1_4);
+                                        const Scalar tmp167 = w45*(-C_2_4 - C_2_7);
+                                        const Scalar tmp168 = w39*(C_0_3 + C_0_5);
+                                        const Scalar tmp169 = w36*(C_0_2 + C_0_4);
+                                        const Scalar tmp170 = w38*(C_2_1 + C_2_3);
+                                        const Scalar tmp171 = w35*(C_2_0 + C_2_2);
+                                        const Scalar tmp172 = w41*(C_0_5 + C_0_7);
+                                        const Scalar tmp173 = w37*(C_1_1 + C_1_4);
+                                        const Scalar tmp174 = w39*(C_0_4 + C_0_6);
+                                        const Scalar tmp175 = w45*(C_2_4 + C_2_6);
+                                        const Scalar tmp176 = w36*(C_0_1 + C_0_3);
+                                        const Scalar tmp177 = w40*(C_1_3 + C_1_6);
+                                        const Scalar tmp178 = w33*(C_0_0 + C_0_2);
+                                        const Scalar tmp179 = w46*(C_2_5 + C_2_7);
+                                        const Scalar tmp180 = w38*(-C_2_1 - C_2_3);
+                                        const Scalar tmp181 = w42*(C_1_1 + C_1_5);
+                                        const Scalar tmp182 = w35*(-C_2_0 - C_2_2);
+                                        const Scalar tmp183 = w37*(C_1_3 + C_1_7);
+                                        const Scalar tmp184 = w39*(C_0_1 + C_0_3 + C_0_5 + C_0_7);
+                                        const Scalar tmp185 = w45*(-C_2_4 - C_2_6);
+                                        const Scalar tmp186 = w36*(C_0_0 + C_0_2 + C_0_4 + C_0_6);
+                                        const Scalar tmp187 = w40*(C_1_0 + C_1_4);
+                                        const Scalar tmp188 = w46*(-C_2_5 - C_2_7);
+                                        const Scalar tmp189 = w34*(C_1_2 + C_1_6);
+                                        const Scalar tmp190 = w38*(-C_2_0 - C_2_1);
+                                        const Scalar tmp191 = w35*(-C_2_2 - C_2_3);
+                                        const Scalar tmp192 = w41*(C_0_0 + C_0_4);
+                                        const Scalar tmp193 = w37*(-C_1_0 - C_1_1 - C_1_4 - C_1_5);
+                                        const Scalar tmp194 = w39*(C_0_1 + C_0_5);
+                                        const Scalar tmp195 = w45*(-C_2_6 - C_2_7);
+                                        const Scalar tmp196 = w36*(C_0_2 + C_0_6);
+                                        const Scalar tmp197 = w40*(-C_1_2 - C_1_3 - C_1_6 - C_1_7);
+                                        const Scalar tmp198 = w33*(C_0_3 + C_0_7);
+                                        const Scalar tmp199 = w46*(-C_2_4 - C_2_5);
+                                        const Scalar tmp200 = w38*(-C_2_6 - C_2_7);
+                                        const Scalar tmp201 = w42*(C_1_2 + C_1_3);
+                                        const Scalar tmp202 = w35*(-C_2_4 - C_2_5);
+                                        const Scalar tmp203 = w37*(C_1_0 + C_1_1);
+                                        const Scalar tmp204 = w45*(-C_2_0 - C_2_1);
+                                        const Scalar tmp205 = w40*(C_1_6 + C_1_7);
+                                        const Scalar tmp206 = w46*(-C_2_2 - C_2_3);
+                                        const Scalar tmp207 = w34*(C_1_4 + C_1_5);
+                                        const Scalar tmp208 = w37*(-C_1_1 - C_1_4);
+                                        const Scalar tmp209 = w39*(-C_0_2 - C_0_4);
+                                        const Scalar tmp210 = w36*(-C_0_3 - C_0_5);
+                                        const Scalar tmp211 = w40*(-C_1_3 - C_1_6);
+                                        const Scalar tmp212 = w38*(C_2_4 + C_2_7);
+                                        const Scalar tmp213 = w45*(C_2_0 + C_2_3);
+                                        const Scalar tmp214 = w41*(-C_0_1 - C_0_5);
+                                        const Scalar tmp215 = w39*(-C_0_0 - C_0_4);
+                                        const Scalar tmp216 = w36*(-C_0_3 - C_0_7);
+                                        const Scalar tmp217 = w33*(-C_0_2 - C_0_6);
+                                        const Scalar tmp218 = w42*(-C_1_3 - C_1_7);
+                                        const Scalar tmp219 = w37*(-C_1_1 - C_1_5);
+                                        const Scalar tmp220 = w40*(-C_1_2 - C_1_6);
+                                        const Scalar tmp221 = w34*(-C_1_0 - C_1_4);
+                                        const Scalar tmp222 = w39*(C_0_0 + C_0_6);
+                                        const Scalar tmp223 = w36*(C_0_1 + C_0_7);
+                                        const Scalar tmp224 = w38*(C_2_4 + C_2_5);
+                                        const Scalar tmp225 = w35*(C_2_6 + C_2_7);
+                                        const Scalar tmp226 = w45*(C_2_2 + C_2_3);
+                                        const Scalar tmp227 = w46*(C_2_0 + C_2_1);
+                                        const Scalar tmp228 = w38*(-C_2_0 - C_2_2);
+                                        const Scalar tmp229 = w42*(-C_1_2 - C_1_6);
+                                        const Scalar tmp230 = w35*(-C_2_1 - C_2_3);
+                                        const Scalar tmp231 = w37*(-C_1_0 - C_1_4);
+                                        const Scalar tmp232 = w39*(-C_0_0 - C_0_2 - C_0_4 - C_0_6);
+                                        const Scalar tmp233 = w45*(-C_2_5 - C_2_7);
+                                        const Scalar tmp234 = w36*(-C_0_1 - C_0_3 - C_0_5 - C_0_7);
+                                        const Scalar tmp235 = w40*(-C_1_3 - C_1_7);
+                                        const Scalar tmp236 = w46*(-C_2_4 - C_2_6);
+                                        const Scalar tmp237 = w34*(-C_1_1 - C_1_5);
+                                        const Scalar tmp238 = w42*(C_1_0 + C_1_4);
+                                        const Scalar tmp239 = w37*(C_1_2 + C_1_6);
+                                        const Scalar tmp240 = w40*(C_1_1 + C_1_5);
+                                        const Scalar tmp241 = w34*(C_1_3 + C_1_7);
+                                        const Scalar tmp242 = w38*(-C_2_4 - C_2_5);
+                                        const Scalar tmp243 = w42*(-C_1_0 - C_1_1);
+                                        const Scalar tmp244 = w35*(-C_2_6 - C_2_7);
+                                        const Scalar tmp245 = w37*(-C_1_2 - C_1_3);
+                                        const Scalar tmp246 = w45*(-C_2_2 - C_2_3);
+                                        const Scalar tmp247 = w40*(-C_1_4 - C_1_5);
+                                        const Scalar tmp248 = w46*(-C_2_0 - C_2_1);
+                                        const Scalar tmp249 = w34*(-C_1_6 - C_1_7);
+                                        const Scalar tmp250 = w42*(-C_1_6 - C_1_7);
+                                        const Scalar tmp251 = w37*(-C_1_4 - C_1_5);
+                                        const Scalar tmp252 = w40*(-C_1_2 - C_1_3);
+                                        const Scalar tmp253 = w34*(-C_1_0 - C_1_1);
+                                        const Scalar tmp254 = w38*(C_2_5 + C_2_7);
+                                        const Scalar tmp255 = w35*(C_2_4 + C_2_6);
+                                        const Scalar tmp256 = w45*(C_2_0 + C_2_2);
+                                        const Scalar tmp257 = w46*(C_2_1 + C_2_3);
+                                        const Scalar tmp258 = w38*(-C_2_4 - C_2_5 - C_2_6 - C_2_7);
+                                        const Scalar tmp259 = w45*(-C_2_0 - C_2_1 - C_2_2 - C_2_3);
+                                        const Scalar tmp260 = w38*(C_2_4 + C_2_6);
+                                        const Scalar tmp261 = w35*(C_2_5 + C_2_7);
+                                        const Scalar tmp262 = w45*(C_2_1 + C_2_3);
+                                        const Scalar tmp263 = w46*(C_2_0 + C_2_2);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-C_0_0*w50 - C_0_1*w41 - C_0_6*w33 - C_0_7*w49 + C_1_0*w47 - C_1_2*w42 - C_1_5*w34 + C_1_7*w48 - C_2_0*w43 - C_2_3*w35 - C_2_4*w46 - C_2_7*w44 + tmp132 + tmp137 + tmp208 + tmp209 + tmp210 + tmp211;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=C_0_0*w50 + C_0_1*w41 + C_0_6*w33 + C_0_7*w49 + tmp126 + tmp128 + tmp242 + tmp243 + tmp244 + tmp245 + tmp246 + tmp247 + tmp248 + tmp249;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=-C_1_0*w47 + C_1_2*w42 + C_1_5*w34 - C_1_7*w48 + tmp138 + tmp139 + tmp140 + tmp142 + tmp143 + tmp144 + tmp146 + tmp147 + tmp173 + tmp177;
@@ -5254,73 +5279,73 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w55;
-                                        const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w56;
-                                        const double wC2 = C_p[INDEX3(k,m,2,numEq,numComp)]*w54;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+= 4*wC0 + 4*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=-4*wC0 + 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+= 2*wC0 - 4*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+=-2*wC0 - 2*wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+= 2*wC0 + 2*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+=-2*wC0 +   wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=   wC0 - 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=  -wC0 -   wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+= 4*wC0 + 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4*wC0 + 4*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+= 2*wC0 - 2*wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2*wC0 - 4*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+= 2*wC0 +   wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2*wC0 + 2*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=   wC0 -   wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=  -wC0 - 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+= 2*wC0 + 4*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+=-2*wC0 + 2*wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+= 4*wC0 - 4*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+=-4*wC0 - 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=   wC0 + 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=  -wC0 +   wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+= 2*wC0 - 2*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+=-2*wC0 -   wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+= 2*wC0 + 2*wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2*wC0 + 4*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+= 4*wC0 - 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4*wC0 - 4*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=   wC0 +   wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=  -wC0 + 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+= 2*wC0 -   wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2*wC0 - 2*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+= 2*wC0 + 2*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+=-2*wC0 +   wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=   wC0 - 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=  -wC0 -   wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+= 4*wC0 + 4*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+=-4*wC0 + 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+= 2*wC0 - 4*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+=-2*wC0 - 2*wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+= 2*wC0 +   wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2*wC0 + 2*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=   wC0 -   wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=  -wC0 - 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+= 4*wC0 + 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4*wC0 + 4*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+= 2*wC0 - 2*wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2*wC0 - 4*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=   wC0 + 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=  -wC0 +   wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+= 2*wC0 - 2*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+=-2*wC0 -   wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+= 2*wC0 + 4*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+=-2*wC0 + 2*wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+= 4*wC0 - 4*wC1 - 4*wC2;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+=-4*wC0 - 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=   wC0 +   wC1 +   wC2;
-                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=  -wC0 + 2*wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+= 2*wC0 -   wC1 + 2*wC2;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2*wC0 - 2*wC1 + 4*wC2;
-                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+= 2*wC0 + 2*wC1 -   wC2;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2*wC0 + 4*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+= 4*wC0 - 2*wC1 - 2*wC2;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4*wC0 - 4*wC1 - 4*wC2;
+                                        const Scalar wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w55;
+                                        const Scalar wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w56;
+                                        const Scalar wC2 = C_p[INDEX3(k,m,2,numEq,numComp)]*w54;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+= 4.*wC0 + 4.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=-4.*wC0 + 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+= 2.*wC0 - 4.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+=-2.*wC0 - 2.*wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+= 2.*wC0 + 2.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+=-2.*wC0 +    wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=    wC0 - 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=   -wC0 -    wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+= 4.*wC0 + 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=-4.*wC0 + 4.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+= 2.*wC0 - 2.*wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=-2.*wC0 - 4.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+= 2.*wC0 +    wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=-2.*wC0 + 2.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=    wC0 -    wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=   -wC0 - 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+= 2.*wC0 + 4.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+=-2.*wC0 + 2.*wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+= 4.*wC0 - 4.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+=-4.*wC0 - 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=    wC0 + 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=   -wC0 +    wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+= 2.*wC0 - 2.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+=-2.*wC0 -    wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+= 2.*wC0 + 2.*wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=-2.*wC0 + 4.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+= 4.*wC0 - 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=-4.*wC0 - 4.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=    wC0 +    wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=   -wC0 + 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+= 2.*wC0 -    wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=-2.*wC0 - 2.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+= 2.*wC0 + 2.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+=-2.*wC0 +    wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=    wC0 - 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=   -wC0 -    wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+= 4.*wC0 + 4.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+=-4.*wC0 + 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+= 2.*wC0 - 4.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+=-2.*wC0 - 2.*wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+= 2.*wC0 +    wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=-2.*wC0 + 2.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=    wC0 -    wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=   -wC0 - 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+= 4.*wC0 + 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=-4.*wC0 + 4.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+= 2.*wC0 - 2.*wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=-2.*wC0 - 4.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=    wC0 + 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=   -wC0 +    wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+= 2.*wC0 - 2.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+=-2.*wC0 -    wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+= 2.*wC0 + 4.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+=-2.*wC0 + 2.*wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+= 4.*wC0 - 4.*wC1 - 4.*wC2;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+=-4.*wC0 - 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=    wC0 +    wC1 +    wC2;
+                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=   -wC0 + 2.*wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+= 2.*wC0 -    wC1 + 2.*wC2;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=-2.*wC0 - 2.*wC1 + 4.*wC2;
+                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+= 2.*wC0 + 2.*wC1 -    wC2;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=-2.*wC0 + 4.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+= 4.*wC0 - 2.*wC1 - 2.*wC2;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=-4.*wC0 - 4.*wC1 - 4.*wC2;
                                     }
                                 }
                             }
@@ -5329,77 +5354,77 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double* D_p=D.getSampleDataRO(e);
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
                             if (D.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double D_4 = D_p[INDEX3(k,m,4,numEq,numComp)];
-                                        const double D_5 = D_p[INDEX3(k,m,5,numEq,numComp)];
-                                        const double D_6 = D_p[INDEX3(k,m,6,numEq,numComp)];
-                                        const double D_7 = D_p[INDEX3(k,m,7,numEq,numComp)];
-                                        const double tmp0 = w59*(D_3 + D_7);
-                                        const double tmp1 = w57*(D_0 + D_4);
-                                        const double tmp2 = w58*(D_1 + D_2 + D_5 + D_6);
-                                        const double tmp3 = w60*(D_0 + D_1 + D_2 + D_3);
-                                        const double tmp4 = w61*(D_4 + D_5 + D_6 + D_7);
-                                        const double tmp5 = w59*(D_1 + D_3);
-                                        const double tmp6 = w57*(D_4 + D_6);
-                                        const double tmp7 = w58*(D_0 + D_2 + D_5 + D_7);
-                                        const double tmp8 = w59*(D_4 + D_6);
-                                        const double tmp9 = w57*(D_1 + D_3);
-                                        const double tmp10 = w60*(D_4 + D_5 + D_6 + D_7);
-                                        const double tmp11 = w61*(D_0 + D_1 + D_2 + D_3);
-                                        const double tmp12 = w59*(D_4 + D_5);
-                                        const double tmp13 = w57*(D_2 + D_3);
-                                        const double tmp14 = w58*(D_0 + D_1 + D_6 + D_7);
-                                        const double tmp15 = w58*(D_0 + D_1 + D_2 + D_3 + D_4 + D_5 + D_6 + D_7);
-                                        const double tmp16 = w59*(D_2 + D_6);
-                                        const double tmp17 = w57*(D_1 + D_5);
-                                        const double tmp18 = w58*(D_0 + D_3 + D_4 + D_7);
-                                        const double tmp19 = w59*(D_1 + D_5);
-                                        const double tmp20 = w57*(D_2 + D_6);
-                                        const double tmp21 = w60*(D_0 + D_1 + D_4 + D_5);
-                                        const double tmp22 = w61*(D_2 + D_3 + D_6 + D_7);
-                                        const double tmp23 = w59*(D_0 + D_4);
-                                        const double tmp24 = w57*(D_3 + D_7);
-                                        const double tmp25 = w59*(D_6 + D_7);
-                                        const double tmp26 = w57*(D_0 + D_1);
-                                        const double tmp27 = w58*(D_2 + D_3 + D_4 + D_5);
-                                        const double tmp28 = w60*(D_0 + D_5 + D_6);
-                                        const double tmp29 = w61*(D_1 + D_2 + D_7);
-                                        const double tmp30 = w59*(D_0 + D_2);
-                                        const double tmp31 = w57*(D_5 + D_7);
-                                        const double tmp32 = w58*(D_1 + D_3 + D_4 + D_6);
-                                        const double tmp33 = w60*(D_1 + D_2 + D_7);
-                                        const double tmp34 = w61*(D_0 + D_5 + D_6);
-                                        const double tmp35 = w60*(D_1 + D_4 + D_7);
-                                        const double tmp36 = w61*(D_0 + D_3 + D_6);
-                                        const double tmp37 = w60*(D_1 + D_2 + D_4);
-                                        const double tmp38 = w61*(D_3 + D_5 + D_6);
-                                        const double tmp39 = w59*(D_5 + D_7);
-                                        const double tmp40 = w57*(D_0 + D_2);
-                                        const double tmp41 = w60*(D_0 + D_2 + D_4 + D_6);
-                                        const double tmp42 = w61*(D_1 + D_3 + D_5 + D_7);
-                                        const double tmp43 = w60*(D_2 + D_3 + D_6 + D_7);
-                                        const double tmp44 = w61*(D_0 + D_1 + D_4 + D_5);
-                                        const double tmp45 = w60*(D_2 + D_4 + D_7);
-                                        const double tmp46 = w61*(D_0 + D_3 + D_5);
-                                        const double tmp47 = w59*(D_2 + D_3);
-                                        const double tmp48 = w57*(D_4 + D_5);
-                                        const double tmp49 = w60*(D_3 + D_5 + D_6);
-                                        const double tmp50 = w61*(D_1 + D_2 + D_4);
-                                        const double tmp51 = w60*(D_0 + D_3 + D_5);
-                                        const double tmp52 = w61*(D_2 + D_4 + D_7);
-                                        const double tmp53 = w60*(D_0 + D_3 + D_6);
-                                        const double tmp54 = w61*(D_1 + D_4 + D_7);
-                                        const double tmp55 = w60*(D_1 + D_3 + D_5 + D_7);
-                                        const double tmp56 = w61*(D_0 + D_2 + D_4 + D_6);
-                                        const double tmp57 = w59*(D_0 + D_1);
-                                        const double tmp58 = w57*(D_6 + D_7);
+                                        const Scalar D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar D_4 = D_p[INDEX3(k,m,4,numEq,numComp)];
+                                        const Scalar D_5 = D_p[INDEX3(k,m,5,numEq,numComp)];
+                                        const Scalar D_6 = D_p[INDEX3(k,m,6,numEq,numComp)];
+                                        const Scalar D_7 = D_p[INDEX3(k,m,7,numEq,numComp)];
+                                        const Scalar tmp0 = w59*(D_3 + D_7);
+                                        const Scalar tmp1 = w57*(D_0 + D_4);
+                                        const Scalar tmp2 = w58*(D_1 + D_2 + D_5 + D_6);
+                                        const Scalar tmp3 = w60*(D_0 + D_1 + D_2 + D_3);
+                                        const Scalar tmp4 = w61*(D_4 + D_5 + D_6 + D_7);
+                                        const Scalar tmp5 = w59*(D_1 + D_3);
+                                        const Scalar tmp6 = w57*(D_4 + D_6);
+                                        const Scalar tmp7 = w58*(D_0 + D_2 + D_5 + D_7);
+                                        const Scalar tmp8 = w59*(D_4 + D_6);
+                                        const Scalar tmp9 = w57*(D_1 + D_3);
+                                        const Scalar tmp10 = w60*(D_4 + D_5 + D_6 + D_7);
+                                        const Scalar tmp11 = w61*(D_0 + D_1 + D_2 + D_3);
+                                        const Scalar tmp12 = w59*(D_4 + D_5);
+                                        const Scalar tmp13 = w57*(D_2 + D_3);
+                                        const Scalar tmp14 = w58*(D_0 + D_1 + D_6 + D_7);
+                                        const Scalar tmp15 = w58*(D_0 + D_1 + D_2 + D_3 + D_4 + D_5 + D_6 + D_7);
+                                        const Scalar tmp16 = w59*(D_2 + D_6);
+                                        const Scalar tmp17 = w57*(D_1 + D_5);
+                                        const Scalar tmp18 = w58*(D_0 + D_3 + D_4 + D_7);
+                                        const Scalar tmp19 = w59*(D_1 + D_5);
+                                        const Scalar tmp20 = w57*(D_2 + D_6);
+                                        const Scalar tmp21 = w60*(D_0 + D_1 + D_4 + D_5);
+                                        const Scalar tmp22 = w61*(D_2 + D_3 + D_6 + D_7);
+                                        const Scalar tmp23 = w59*(D_0 + D_4);
+                                        const Scalar tmp24 = w57*(D_3 + D_7);
+                                        const Scalar tmp25 = w59*(D_6 + D_7);
+                                        const Scalar tmp26 = w57*(D_0 + D_1);
+                                        const Scalar tmp27 = w58*(D_2 + D_3 + D_4 + D_5);
+                                        const Scalar tmp28 = w60*(D_0 + D_5 + D_6);
+                                        const Scalar tmp29 = w61*(D_1 + D_2 + D_7);
+                                        const Scalar tmp30 = w59*(D_0 + D_2);
+                                        const Scalar tmp31 = w57*(D_5 + D_7);
+                                        const Scalar tmp32 = w58*(D_1 + D_3 + D_4 + D_6);
+                                        const Scalar tmp33 = w60*(D_1 + D_2 + D_7);
+                                        const Scalar tmp34 = w61*(D_0 + D_5 + D_6);
+                                        const Scalar tmp35 = w60*(D_1 + D_4 + D_7);
+                                        const Scalar tmp36 = w61*(D_0 + D_3 + D_6);
+                                        const Scalar tmp37 = w60*(D_1 + D_2 + D_4);
+                                        const Scalar tmp38 = w61*(D_3 + D_5 + D_6);
+                                        const Scalar tmp39 = w59*(D_5 + D_7);
+                                        const Scalar tmp40 = w57*(D_0 + D_2);
+                                        const Scalar tmp41 = w60*(D_0 + D_2 + D_4 + D_6);
+                                        const Scalar tmp42 = w61*(D_1 + D_3 + D_5 + D_7);
+                                        const Scalar tmp43 = w60*(D_2 + D_3 + D_6 + D_7);
+                                        const Scalar tmp44 = w61*(D_0 + D_1 + D_4 + D_5);
+                                        const Scalar tmp45 = w60*(D_2 + D_4 + D_7);
+                                        const Scalar tmp46 = w61*(D_0 + D_3 + D_5);
+                                        const Scalar tmp47 = w59*(D_2 + D_3);
+                                        const Scalar tmp48 = w57*(D_4 + D_5);
+                                        const Scalar tmp49 = w60*(D_3 + D_5 + D_6);
+                                        const Scalar tmp50 = w61*(D_1 + D_2 + D_4);
+                                        const Scalar tmp51 = w60*(D_0 + D_3 + D_5);
+                                        const Scalar tmp52 = w61*(D_2 + D_4 + D_7);
+                                        const Scalar tmp53 = w60*(D_0 + D_3 + D_6);
+                                        const Scalar tmp54 = w61*(D_1 + D_4 + D_7);
+                                        const Scalar tmp55 = w60*(D_1 + D_3 + D_5 + D_7);
+                                        const Scalar tmp56 = w61*(D_0 + D_2 + D_4 + D_6);
+                                        const Scalar tmp57 = w59*(D_0 + D_1);
+                                        const Scalar tmp58 = w57*(D_6 + D_7);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=D_0*w62 + D_7*w63 + tmp49 + tmp50;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=tmp27 + tmp57 + tmp58;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=tmp30 + tmp31 + tmp32;
@@ -5469,71 +5494,71 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wD0 = 8*D_p[INDEX2(k, m, numEq)]*w58;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+=8*wD0;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=  wD0;
-                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+=2*wD0;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+=4*wD0;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=8*wD0;
+                                        const Scalar wD0 = 8.*D_p[INDEX2(k, m, numEq)]*w58;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,0,7,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,1,6,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,2,5,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,2,7,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,3,4,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,3,6,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,4,3,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,5,2,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,6,1,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,6,3,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)]+=8.*wD0;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,7,0,numEq,numComp,8)]+=   wD0;
+                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,7,2,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)]+=2.*wD0;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)]+=4.*wD0;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)]+=8.*wD0;
                                     }
                                 }
                             }
@@ -5542,93 +5567,93 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double* X_p=X.getSampleDataRO(e);
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
                             if (X.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double X_0_0 = X_p[INDEX3(k,0,0,numEq,3)];
-                                    const double X_1_0 = X_p[INDEX3(k,1,0,numEq,3)];
-                                    const double X_2_0 = X_p[INDEX3(k,2,0,numEq,3)];
-                                    const double X_0_1 = X_p[INDEX3(k,0,1,numEq,3)];
-                                    const double X_1_1 = X_p[INDEX3(k,1,1,numEq,3)];
-                                    const double X_2_1 = X_p[INDEX3(k,2,1,numEq,3)];
-                                    const double X_0_2 = X_p[INDEX3(k,0,2,numEq,3)];
-                                    const double X_1_2 = X_p[INDEX3(k,1,2,numEq,3)];
-                                    const double X_2_2 = X_p[INDEX3(k,2,2,numEq,3)];
-                                    const double X_0_3 = X_p[INDEX3(k,0,3,numEq,3)];
-                                    const double X_1_3 = X_p[INDEX3(k,1,3,numEq,3)];
-                                    const double X_2_3 = X_p[INDEX3(k,2,3,numEq,3)];
-                                    const double X_0_4 = X_p[INDEX3(k,0,4,numEq,3)];
-                                    const double X_1_4 = X_p[INDEX3(k,1,4,numEq,3)];
-                                    const double X_2_4 = X_p[INDEX3(k,2,4,numEq,3)];
-                                    const double X_0_5 = X_p[INDEX3(k,0,5,numEq,3)];
-                                    const double X_1_5 = X_p[INDEX3(k,1,5,numEq,3)];
-                                    const double X_2_5 = X_p[INDEX3(k,2,5,numEq,3)];
-                                    const double X_0_6 = X_p[INDEX3(k,0,6,numEq,3)];
-                                    const double X_1_6 = X_p[INDEX3(k,1,6,numEq,3)];
-                                    const double X_2_6 = X_p[INDEX3(k,2,6,numEq,3)];
-                                    const double X_0_7 = X_p[INDEX3(k,0,7,numEq,3)];
-                                    const double X_1_7 = X_p[INDEX3(k,1,7,numEq,3)];
-                                    const double X_2_7 = X_p[INDEX3(k,2,7,numEq,3)];
-                                    const double tmp0 = w72*(X_0_6 + X_0_7);
-                                    const double tmp1 = w66*(X_2_0 + X_2_4);
-                                    const double tmp2 = w64*(X_0_0 + X_0_1);
-                                    const double tmp3 = w68*(X_2_1 + X_2_2 + X_2_5 + X_2_6);
-                                    const double tmp4 = w65*(X_1_0 + X_1_2);
-                                    const double tmp5 = w70*(X_2_3 + X_2_7);
-                                    const double tmp6 = w67*(X_1_1 + X_1_3 + X_1_4 + X_1_6);
-                                    const double tmp7 = w71*(X_1_5 + X_1_7);
-                                    const double tmp8 = w69*(X_0_2 + X_0_3 + X_0_4 + X_0_5);
-                                    const double tmp9 = w72*(-X_0_6 - X_0_7);
-                                    const double tmp10 = w66*(X_2_1 + X_2_5);
-                                    const double tmp11 = w64*(-X_0_0 - X_0_1);
-                                    const double tmp12 = w68*(X_2_0 + X_2_3 + X_2_4 + X_2_7);
-                                    const double tmp13 = w65*(X_1_1 + X_1_3);
-                                    const double tmp14 = w70*(X_2_2 + X_2_6);
-                                    const double tmp15 = w67*(X_1_0 + X_1_2 + X_1_5 + X_1_7);
-                                    const double tmp16 = w71*(X_1_4 + X_1_6);
-                                    const double tmp17 = w69*(-X_0_2 - X_0_3 - X_0_4 - X_0_5);
-                                    const double tmp18 = w72*(X_0_4 + X_0_5);
-                                    const double tmp19 = w66*(X_2_2 + X_2_6);
-                                    const double tmp20 = w64*(X_0_2 + X_0_3);
-                                    const double tmp21 = w65*(-X_1_0 - X_1_2);
-                                    const double tmp22 = w70*(X_2_1 + X_2_5);
-                                    const double tmp23 = w67*(-X_1_1 - X_1_3 - X_1_4 - X_1_6);
-                                    const double tmp24 = w71*(-X_1_5 - X_1_7);
-                                    const double tmp25 = w69*(X_0_0 + X_0_1 + X_0_6 + X_0_7);
-                                    const double tmp26 = w72*(-X_0_4 - X_0_5);
-                                    const double tmp27 = w66*(X_2_3 + X_2_7);
-                                    const double tmp28 = w64*(-X_0_2 - X_0_3);
-                                    const double tmp29 = w65*(-X_1_1 - X_1_3);
-                                    const double tmp30 = w70*(X_2_0 + X_2_4);
-                                    const double tmp31 = w67*(-X_1_0 - X_1_2 - X_1_5 - X_1_7);
-                                    const double tmp32 = w71*(-X_1_4 - X_1_6);
-                                    const double tmp33 = w69*(-X_0_0 - X_0_1 - X_0_6 - X_0_7);
-                                    const double tmp34 = w72*(X_0_2 + X_0_3);
-                                    const double tmp35 = w66*(-X_2_0 - X_2_4);
-                                    const double tmp36 = w64*(X_0_4 + X_0_5);
-                                    const double tmp37 = w68*(-X_2_1 - X_2_2 - X_2_5 - X_2_6);
-                                    const double tmp38 = w65*(X_1_4 + X_1_6);
-                                    const double tmp39 = w70*(-X_2_3 - X_2_7);
-                                    const double tmp40 = w71*(X_1_1 + X_1_3);
-                                    const double tmp41 = w72*(-X_0_2 - X_0_3);
-                                    const double tmp42 = w66*(-X_2_1 - X_2_5);
-                                    const double tmp43 = w64*(-X_0_4 - X_0_5);
-                                    const double tmp44 = w68*(-X_2_0 - X_2_3 - X_2_4 - X_2_7);
-                                    const double tmp45 = w65*(X_1_5 + X_1_7);
-                                    const double tmp46 = w70*(-X_2_2 - X_2_6);
-                                    const double tmp47 = w71*(X_1_0 + X_1_2);
-                                    const double tmp48 = w72*(X_0_0 + X_0_1);
-                                    const double tmp49 = w66*(-X_2_2 - X_2_6);
-                                    const double tmp50 = w64*(X_0_6 + X_0_7);
-                                    const double tmp51 = w65*(-X_1_4 - X_1_6);
-                                    const double tmp52 = w70*(-X_2_1 - X_2_5);
-                                    const double tmp53 = w71*(-X_1_1 - X_1_3);
-                                    const double tmp54 = w72*(-X_0_0 - X_0_1);
-                                    const double tmp55 = w66*(-X_2_3 - X_2_7);
-                                    const double tmp56 = w64*(-X_0_6 - X_0_7);
-                                    const double tmp57 = w65*(-X_1_5 - X_1_7);
-                                    const double tmp58 = w70*(-X_2_0 - X_2_4);
-                                    const double tmp59 = w71*(-X_1_0 - X_1_2);
+                                    const Scalar X_0_0 = X_p[INDEX3(k,0,0,numEq,3)];
+                                    const Scalar X_1_0 = X_p[INDEX3(k,1,0,numEq,3)];
+                                    const Scalar X_2_0 = X_p[INDEX3(k,2,0,numEq,3)];
+                                    const Scalar X_0_1 = X_p[INDEX3(k,0,1,numEq,3)];
+                                    const Scalar X_1_1 = X_p[INDEX3(k,1,1,numEq,3)];
+                                    const Scalar X_2_1 = X_p[INDEX3(k,2,1,numEq,3)];
+                                    const Scalar X_0_2 = X_p[INDEX3(k,0,2,numEq,3)];
+                                    const Scalar X_1_2 = X_p[INDEX3(k,1,2,numEq,3)];
+                                    const Scalar X_2_2 = X_p[INDEX3(k,2,2,numEq,3)];
+                                    const Scalar X_0_3 = X_p[INDEX3(k,0,3,numEq,3)];
+                                    const Scalar X_1_3 = X_p[INDEX3(k,1,3,numEq,3)];
+                                    const Scalar X_2_3 = X_p[INDEX3(k,2,3,numEq,3)];
+                                    const Scalar X_0_4 = X_p[INDEX3(k,0,4,numEq,3)];
+                                    const Scalar X_1_4 = X_p[INDEX3(k,1,4,numEq,3)];
+                                    const Scalar X_2_4 = X_p[INDEX3(k,2,4,numEq,3)];
+                                    const Scalar X_0_5 = X_p[INDEX3(k,0,5,numEq,3)];
+                                    const Scalar X_1_5 = X_p[INDEX3(k,1,5,numEq,3)];
+                                    const Scalar X_2_5 = X_p[INDEX3(k,2,5,numEq,3)];
+                                    const Scalar X_0_6 = X_p[INDEX3(k,0,6,numEq,3)];
+                                    const Scalar X_1_6 = X_p[INDEX3(k,1,6,numEq,3)];
+                                    const Scalar X_2_6 = X_p[INDEX3(k,2,6,numEq,3)];
+                                    const Scalar X_0_7 = X_p[INDEX3(k,0,7,numEq,3)];
+                                    const Scalar X_1_7 = X_p[INDEX3(k,1,7,numEq,3)];
+                                    const Scalar X_2_7 = X_p[INDEX3(k,2,7,numEq,3)];
+                                    const Scalar tmp0 = w72*(X_0_6 + X_0_7);
+                                    const Scalar tmp1 = w66*(X_2_0 + X_2_4);
+                                    const Scalar tmp2 = w64*(X_0_0 + X_0_1);
+                                    const Scalar tmp3 = w68*(X_2_1 + X_2_2 + X_2_5 + X_2_6);
+                                    const Scalar tmp4 = w65*(X_1_0 + X_1_2);
+                                    const Scalar tmp5 = w70*(X_2_3 + X_2_7);
+                                    const Scalar tmp6 = w67*(X_1_1 + X_1_3 + X_1_4 + X_1_6);
+                                    const Scalar tmp7 = w71*(X_1_5 + X_1_7);
+                                    const Scalar tmp8 = w69*(X_0_2 + X_0_3 + X_0_4 + X_0_5);
+                                    const Scalar tmp9 = w72*(-X_0_6 - X_0_7);
+                                    const Scalar tmp10 = w66*(X_2_1 + X_2_5);
+                                    const Scalar tmp11 = w64*(-X_0_0 - X_0_1);
+                                    const Scalar tmp12 = w68*(X_2_0 + X_2_3 + X_2_4 + X_2_7);
+                                    const Scalar tmp13 = w65*(X_1_1 + X_1_3);
+                                    const Scalar tmp14 = w70*(X_2_2 + X_2_6);
+                                    const Scalar tmp15 = w67*(X_1_0 + X_1_2 + X_1_5 + X_1_7);
+                                    const Scalar tmp16 = w71*(X_1_4 + X_1_6);
+                                    const Scalar tmp17 = w69*(-X_0_2 - X_0_3 - X_0_4 - X_0_5);
+                                    const Scalar tmp18 = w72*(X_0_4 + X_0_5);
+                                    const Scalar tmp19 = w66*(X_2_2 + X_2_6);
+                                    const Scalar tmp20 = w64*(X_0_2 + X_0_3);
+                                    const Scalar tmp21 = w65*(-X_1_0 - X_1_2);
+                                    const Scalar tmp22 = w70*(X_2_1 + X_2_5);
+                                    const Scalar tmp23 = w67*(-X_1_1 - X_1_3 - X_1_4 - X_1_6);
+                                    const Scalar tmp24 = w71*(-X_1_5 - X_1_7);
+                                    const Scalar tmp25 = w69*(X_0_0 + X_0_1 + X_0_6 + X_0_7);
+                                    const Scalar tmp26 = w72*(-X_0_4 - X_0_5);
+                                    const Scalar tmp27 = w66*(X_2_3 + X_2_7);
+                                    const Scalar tmp28 = w64*(-X_0_2 - X_0_3);
+                                    const Scalar tmp29 = w65*(-X_1_1 - X_1_3);
+                                    const Scalar tmp30 = w70*(X_2_0 + X_2_4);
+                                    const Scalar tmp31 = w67*(-X_1_0 - X_1_2 - X_1_5 - X_1_7);
+                                    const Scalar tmp32 = w71*(-X_1_4 - X_1_6);
+                                    const Scalar tmp33 = w69*(-X_0_0 - X_0_1 - X_0_6 - X_0_7);
+                                    const Scalar tmp34 = w72*(X_0_2 + X_0_3);
+                                    const Scalar tmp35 = w66*(-X_2_0 - X_2_4);
+                                    const Scalar tmp36 = w64*(X_0_4 + X_0_5);
+                                    const Scalar tmp37 = w68*(-X_2_1 - X_2_2 - X_2_5 - X_2_6);
+                                    const Scalar tmp38 = w65*(X_1_4 + X_1_6);
+                                    const Scalar tmp39 = w70*(-X_2_3 - X_2_7);
+                                    const Scalar tmp40 = w71*(X_1_1 + X_1_3);
+                                    const Scalar tmp41 = w72*(-X_0_2 - X_0_3);
+                                    const Scalar tmp42 = w66*(-X_2_1 - X_2_5);
+                                    const Scalar tmp43 = w64*(-X_0_4 - X_0_5);
+                                    const Scalar tmp44 = w68*(-X_2_0 - X_2_3 - X_2_4 - X_2_7);
+                                    const Scalar tmp45 = w65*(X_1_5 + X_1_7);
+                                    const Scalar tmp46 = w70*(-X_2_2 - X_2_6);
+                                    const Scalar tmp47 = w71*(X_1_0 + X_1_2);
+                                    const Scalar tmp48 = w72*(X_0_0 + X_0_1);
+                                    const Scalar tmp49 = w66*(-X_2_2 - X_2_6);
+                                    const Scalar tmp50 = w64*(X_0_6 + X_0_7);
+                                    const Scalar tmp51 = w65*(-X_1_4 - X_1_6);
+                                    const Scalar tmp52 = w70*(-X_2_1 - X_2_5);
+                                    const Scalar tmp53 = w71*(-X_1_1 - X_1_3);
+                                    const Scalar tmp54 = w72*(-X_0_0 - X_0_1);
+                                    const Scalar tmp55 = w66*(-X_2_3 - X_2_7);
+                                    const Scalar tmp56 = w64*(-X_0_6 - X_0_7);
+                                    const Scalar tmp57 = w65*(-X_1_5 - X_1_7);
+                                    const Scalar tmp58 = w70*(-X_2_0 - X_2_4);
+                                    const Scalar tmp59 = w71*(-X_1_0 - X_1_2);
                                     EM_F[INDEX2(k,0,numEq)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 + tmp7 + tmp8;
                                     EM_F[INDEX2(k,1,numEq)]+=tmp10 + tmp11 + tmp12 + tmp13 + tmp14 + tmp15 + tmp16 + tmp17 + tmp9;
                                     EM_F[INDEX2(k,2,numEq)]+=tmp12 + tmp18 + tmp19 + tmp20 + tmp21 + tmp22 + tmp23 + tmp24 + tmp25;
@@ -5640,9 +5665,9 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double wX0 = 18*X_p[INDEX2(k, 0, numEq)]*w55;
-                                    const double wX1 = 18*X_p[INDEX2(k, 1, numEq)]*w56;
-                                    const double wX2 = 18*X_p[INDEX2(k, 2, numEq)]*w54;
+                                    const Scalar wX0 = 18.*X_p[INDEX2(k, 0, numEq)]*w55;
+                                    const Scalar wX1 = 18.*X_p[INDEX2(k, 1, numEq)]*w56;
+                                    const Scalar wX2 = 18.*X_p[INDEX2(k, 2, numEq)]*w54;
                                     EM_F[INDEX2(k,0,numEq)]+= wX0 + wX1 + wX2;
                                     EM_F[INDEX2(k,1,numEq)]+=-wX0 + wX1 + wX2;
                                     EM_F[INDEX2(k,2,numEq)]+= wX0 - wX1 + wX2;
@@ -5658,33 +5683,33 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double* Y_p=Y.getSampleDataRO(e);
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                             if (Y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
-                                    const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
-                                    const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
-                                    const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
-                                    const double Y_4 = Y_p[INDEX2(k, 4, numEq)];
-                                    const double Y_5 = Y_p[INDEX2(k, 5, numEq)];
-                                    const double Y_6 = Y_p[INDEX2(k, 6, numEq)];
-                                    const double Y_7 = Y_p[INDEX2(k, 7, numEq)];
-                                    const double tmp0 = w76*(Y_3 + Y_5 + Y_6);
-                                    const double tmp1 = w75*(Y_1 + Y_2 + Y_4);
-                                    const double tmp2 = w76*(Y_2 + Y_4 + Y_7);
-                                    const double tmp3 = w75*(Y_0 + Y_3 + Y_5);
-                                    const double tmp4 = w76*(Y_1 + Y_4 + Y_7);
-                                    const double tmp5 = w75*(Y_0 + Y_3 + Y_6);
-                                    const double tmp6 = w76*(Y_0 + Y_5 + Y_6);
-                                    const double tmp7 = w75*(Y_1 + Y_2 + Y_7);
-                                    const double tmp8 = w76*(Y_1 + Y_2 + Y_7);
-                                    const double tmp9 = w75*(Y_0 + Y_5 + Y_6);
-                                    const double tmp10 = w76*(Y_0 + Y_3 + Y_6);
-                                    const double tmp11 = w75*(Y_1 + Y_4 + Y_7);
-                                    const double tmp12 = w76*(Y_0 + Y_3 + Y_5);
-                                    const double tmp13 = w75*(Y_2 + Y_4 + Y_7);
-                                    const double tmp14 = w76*(Y_1 + Y_2 + Y_4);
-                                    const double tmp15 = w75*(Y_3 + Y_5 + Y_6);
+                                    const Scalar Y_0 = Y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar Y_1 = Y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar Y_2 = Y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar Y_3 = Y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar Y_4 = Y_p[INDEX2(k, 4, numEq)];
+                                    const Scalar Y_5 = Y_p[INDEX2(k, 5, numEq)];
+                                    const Scalar Y_6 = Y_p[INDEX2(k, 6, numEq)];
+                                    const Scalar Y_7 = Y_p[INDEX2(k, 7, numEq)];
+                                    const Scalar tmp0 = w76*(Y_3 + Y_5 + Y_6);
+                                    const Scalar tmp1 = w75*(Y_1 + Y_2 + Y_4);
+                                    const Scalar tmp2 = w76*(Y_2 + Y_4 + Y_7);
+                                    const Scalar tmp3 = w75*(Y_0 + Y_3 + Y_5);
+                                    const Scalar tmp4 = w76*(Y_1 + Y_4 + Y_7);
+                                    const Scalar tmp5 = w75*(Y_0 + Y_3 + Y_6);
+                                    const Scalar tmp6 = w76*(Y_0 + Y_5 + Y_6);
+                                    const Scalar tmp7 = w75*(Y_1 + Y_2 + Y_7);
+                                    const Scalar tmp8 = w76*(Y_1 + Y_2 + Y_7);
+                                    const Scalar tmp9 = w75*(Y_0 + Y_5 + Y_6);
+                                    const Scalar tmp10 = w76*(Y_0 + Y_3 + Y_6);
+                                    const Scalar tmp11 = w75*(Y_1 + Y_4 + Y_7);
+                                    const Scalar tmp12 = w76*(Y_0 + Y_3 + Y_5);
+                                    const Scalar tmp13 = w75*(Y_2 + Y_4 + Y_7);
+                                    const Scalar tmp14 = w76*(Y_1 + Y_2 + Y_4);
+                                    const Scalar tmp15 = w75*(Y_3 + Y_5 + Y_6);
                                     EM_F[INDEX2(k,0,numEq)]+=Y_0*w74 + Y_7*w77 + tmp0 + tmp1;
                                     EM_F[INDEX2(k,1,numEq)]+=Y_1*w74 + Y_6*w77 + tmp2 + tmp3;
                                     EM_F[INDEX2(k,2,numEq)]+=Y_2*w74 + Y_5*w77 + tmp4 + tmp5;
@@ -5696,14 +5721,14 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,0,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,1,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,2,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,3,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,4,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,5,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,6,numEq)]+=216*Y_p[k]*w58;
-                                    EM_F[INDEX2(k,7,numEq)]+=216*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,0,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,1,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,2,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,3,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,4,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,5,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,6,numEq)]+=216.*Y_p[k]*w58;
+                                    EM_F[INDEX2(k,7,numEq)]+=216.*Y_p[k]*w58;
                                 }
                             }
                         }
@@ -5723,8 +5748,10 @@ void DefaultAssembler3D::assemblePDESystem(AbstractSystemMatrix* mat, Data& rhs,
 // PDE SYSTEM BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
-                               Data& rhs, const Data& d, const Data& y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySystem(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& d, const Data& y) const
 {
     dim_t numEq, numComp;
     if (!mat)
@@ -5749,23 +5776,24 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
     const double w1 = w2*(SQRT3 + 2);
     const double w3 = w2*(-4*SQRT3 + 7);
     const double w4 = w2*(4*SQRT3 + 7);
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = !d.isEmpty();
     const bool add_EM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8*numEq*numComp, 0);
-        vector<double> EM_F(8*numEq, 0);
+        vector<Scalar> EM_S(8*8*numEq*numComp, zero);
+        vector<Scalar> EM_F(8*numEq, zero);
 
         if (domain->m_faceOffset[0] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -5776,25 +5804,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w0*(d_0 + d_1);
-                                        const double tmp1 = w1*(d_2 + d_3);
-                                        const double tmp2 = w0*(d_0 + d_2);
-                                        const double tmp3 = w1*(d_1 + d_3);
-                                        const double tmp4 = w0*(d_1 + d_3);
-                                        const double tmp5 = w1*(d_0 + d_2);
-                                        const double tmp6 = w0*(d_2 + d_3);
-                                        const double tmp7 = w1*(d_0 + d_1);
-                                        const double tmp8 = w2*(d_0 + d_3);
-                                        const double tmp9 = w2*(d_1 + d_2);
-                                        const double tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w0*(d_0 + d_1);
+                                        const Scalar tmp1 = w1*(d_2 + d_3);
+                                        const Scalar tmp2 = w0*(d_0 + d_2);
+                                        const Scalar tmp3 = w1*(d_1 + d_3);
+                                        const Scalar tmp4 = w0*(d_1 + d_3);
+                                        const Scalar tmp5 = w1*(d_0 + d_2);
+                                        const Scalar tmp6 = w0*(d_2 + d_3);
+                                        const Scalar tmp7 = w1*(d_0 + d_1);
+                                        const Scalar tmp8 = w2*(d_0 + d_3);
+                                        const Scalar tmp9 = w2*(d_1 + d_2);
+                                        const Scalar tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = d_0*w4 + d_3*w3 + tmp9;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = tmp6 + tmp7;
                                         EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = tmp4 + tmp5;
@@ -5816,23 +5844,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w2;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w2;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,6,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,2,4,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,2,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,0,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -5841,26 +5869,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w2*(y_1 + y_2);
-                                    const double tmp1 = 6*w2*(y_0 + y_3);
-                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6*w0*y_3 + 6*w1*y_0;
-                                    EM_F[INDEX2(k,2,numEq)] = tmp1 + 6*w0*y_2 + 6*w1*y_1;
-                                    EM_F[INDEX2(k,4,numEq)] = tmp1 + 6*w0*y_1 + 6*w1*y_2;
-                                    EM_F[INDEX2(k,6,numEq)] = tmp0 + 6*w0*y_0 + 6*w1*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w2*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w2*(y_0 + y_3);
+                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6.*w0*y_3 + 6.*w1*y_0;
+                                    EM_F[INDEX2(k,2,numEq)] = tmp1 + 6.*w0*y_2 + 6.*w1*y_1;
+                                    EM_F[INDEX2(k,4,numEq)] = tmp1 + 6.*w0*y_1 + 6.*w1*y_2;
+                                    EM_F[INDEX2(k,6,numEq)] = tmp0 + 6.*w0*y_0 + 6.*w1*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,0,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,2,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,4,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,6,numEq)] = 36*w2*y_p[k];
+                                    EM_F[INDEX2(k,0,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,2,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,4,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,6,numEq)] = 36.*w2*y_p[k];
                                 }
                             }
                         }
@@ -5874,9 +5902,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[1] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -5887,25 +5915,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w0*(d_0 + d_2);
-                                        const double tmp1 = w1*(d_1 + d_3);
-                                        const double tmp2 = w0*(d_2 + d_3);
-                                        const double tmp3 = w1*(d_0 + d_1);
-                                        const double tmp4 = w0*(d_1 + d_3);
-                                        const double tmp5 = w1*(d_0 + d_2);
-                                        const double tmp6 = w2*(d_0 + d_3);
-                                        const double tmp7 = w2*(d_1 + d_2);
-                                        const double tmp8 = w0*(d_0 + d_1);
-                                        const double tmp9 = w1*(d_2 + d_3);
-                                        const double tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w0*(d_0 + d_2);
+                                        const Scalar tmp1 = w1*(d_1 + d_3);
+                                        const Scalar tmp2 = w0*(d_2 + d_3);
+                                        const Scalar tmp3 = w1*(d_0 + d_1);
+                                        const Scalar tmp4 = w0*(d_1 + d_3);
+                                        const Scalar tmp5 = w1*(d_0 + d_2);
+                                        const Scalar tmp6 = w2*(d_0 + d_3);
+                                        const Scalar tmp7 = w2*(d_1 + d_2);
+                                        const Scalar tmp8 = w0*(d_0 + d_1);
+                                        const Scalar tmp9 = w1*(d_2 + d_3);
+                                        const Scalar tmp10 = w2*(d_0 + d_1 + d_2 + d_3);
                                         EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = d_0*w4 + d_3*w3 + tmp7;
                                         EM_S[INDEX4(k,m,1,3,numEq,numComp,8)] = tmp2 + tmp3;
                                         EM_S[INDEX4(k,m,1,5,numEq,numComp,8)] = tmp4 + tmp5;
@@ -5927,23 +5955,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w2;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w2;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,1,7,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,3,5,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,3,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,1,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -5952,26 +5980,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w2*(y_1 + y_2);
-                                    const double tmp1 = 6*w2*(y_0 + y_3);
-                                    EM_F[INDEX2(k,1,numEq)] = tmp0 + 6*w0*y_3 + 6*w1*y_0;
-                                    EM_F[INDEX2(k,3,numEq)] = tmp1 + 6*w0*y_2 + 6*w1*y_1;
-                                    EM_F[INDEX2(k,5,numEq)] = tmp1 + 6*w0*y_1 + 6*w1*y_2;
-                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6*w0*y_0 + 6*w1*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w2*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w2*(y_0 + y_3);
+                                    EM_F[INDEX2(k,1,numEq)] = tmp0 + 6.*w0*y_3 + 6.*w1*y_0;
+                                    EM_F[INDEX2(k,3,numEq)] = tmp1 + 6.*w0*y_2 + 6.*w1*y_1;
+                                    EM_F[INDEX2(k,5,numEq)] = tmp1 + 6.*w0*y_1 + 6.*w1*y_2;
+                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6.*w0*y_0 + 6.*w1*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,1,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,3,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,5,numEq)] = 36*w2*y_p[k];
-                                    EM_F[INDEX2(k,7,numEq)] = 36*w2*y_p[k];
+                                    EM_F[INDEX2(k,1,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,3,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,5,numEq)] = 36.*w2*y_p[k];
+                                    EM_F[INDEX2(k,7,numEq)] = 36.*w2*y_p[k];
                                 }
                             }
                         }
@@ -5985,9 +6013,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[2] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -5998,25 +6026,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w5*(d_0 + d_1);
-                                        const double tmp1 = w6*(d_2 + d_3);
-                                        const double tmp2 = w5*(d_0 + d_2);
-                                        const double tmp3 = w6*(d_1 + d_3);
-                                        const double tmp4 = w5*(d_1 + d_3);
-                                        const double tmp5 = w6*(d_0 + d_2);
-                                        const double tmp6 = w7*(d_0 + d_3);
-                                        const double tmp7 = w7*(d_0 + d_1 + d_2 + d_3);
-                                        const double tmp8 = w7*(d_1 + d_2);
-                                        const double tmp9 = w5*(d_2 + d_3);
-                                        const double tmp10 = w6*(d_0 + d_1);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w5*(d_0 + d_1);
+                                        const Scalar tmp1 = w6*(d_2 + d_3);
+                                        const Scalar tmp2 = w5*(d_0 + d_2);
+                                        const Scalar tmp3 = w6*(d_1 + d_3);
+                                        const Scalar tmp4 = w5*(d_1 + d_3);
+                                        const Scalar tmp5 = w6*(d_0 + d_2);
+                                        const Scalar tmp6 = w7*(d_0 + d_3);
+                                        const Scalar tmp7 = w7*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar tmp8 = w7*(d_1 + d_2);
+                                        const Scalar tmp9 = w5*(d_2 + d_3);
+                                        const Scalar tmp10 = w6*(d_0 + d_1);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = d_0*w9 + d_3*w8 + tmp8;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = tmp10 + tmp9;
                                         EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = tmp4 + tmp5;
@@ -6038,23 +6066,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w7;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w7;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,5,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,1,4,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,1,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,1,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,0,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,5,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -6063,26 +6091,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w7*(y_1 + y_2);
-                                    const double tmp1 = 6*w7*(y_0 + y_3);
-                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6*w5*y_3 + 6*w6*y_0;
-                                    EM_F[INDEX2(k,1,numEq)] = tmp1 + 6*w5*y_2 + 6*w6*y_1;
-                                    EM_F[INDEX2(k,4,numEq)] = tmp1 + 6*w5*y_1 + 6*w6*y_2;
-                                    EM_F[INDEX2(k,5,numEq)] = tmp0 + 6*w5*y_0 + 6*w6*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w7*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w7*(y_0 + y_3);
+                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6.*w5*y_3 + 6.*w6*y_0;
+                                    EM_F[INDEX2(k,1,numEq)] = tmp1 + 6.*w5*y_2 + 6.*w6*y_1;
+                                    EM_F[INDEX2(k,4,numEq)] = tmp1 + 6.*w5*y_1 + 6.*w6*y_2;
+                                    EM_F[INDEX2(k,5,numEq)] = tmp0 + 6.*w5*y_0 + 6.*w6*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,0,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,1,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,4,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,5,numEq)] = 36*w7*y_p[k];
+                                    EM_F[INDEX2(k,0,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,1,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,4,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,5,numEq)] = 36.*w7*y_p[k];
                                 }
                             }
                         }
@@ -6096,9 +6124,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[3] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -6109,25 +6137,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w5*(d_0 + d_2);
-                                        const double tmp1 = w6*(d_1 + d_3);
-                                        const double tmp2 = w5*(d_1 + d_3);
-                                        const double tmp3 = w6*(d_0 + d_2);
-                                        const double tmp4 = w7*(d_0 + d_1 + d_2 + d_3);
-                                        const double tmp5 = w5*(d_0 + d_1);
-                                        const double tmp6 = w6*(d_2 + d_3);
-                                        const double tmp7 = w7*(d_0 + d_3);
-                                        const double tmp8 = w7*(d_1 + d_2);
-                                        const double tmp9 = w5*(d_2 + d_3);
-                                        const double tmp10 = w6*(d_0 + d_1);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w5*(d_0 + d_2);
+                                        const Scalar tmp1 = w6*(d_1 + d_3);
+                                        const Scalar tmp2 = w5*(d_1 + d_3);
+                                        const Scalar tmp3 = w6*(d_0 + d_2);
+                                        const Scalar tmp4 = w7*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar tmp5 = w5*(d_0 + d_1);
+                                        const Scalar tmp6 = w6*(d_2 + d_3);
+                                        const Scalar tmp7 = w7*(d_0 + d_3);
+                                        const Scalar tmp8 = w7*(d_1 + d_2);
+                                        const Scalar tmp9 = w5*(d_2 + d_3);
+                                        const Scalar tmp10 = w6*(d_0 + d_1);
                                         EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = d_0*w9 + d_3*w8 + tmp8;
                                         EM_S[INDEX4(k,m,2,3,numEq,numComp,8)] = tmp10 + tmp9;
                                         EM_S[INDEX4(k,m,2,6,numEq,numComp,8)] = tmp2 + tmp3;
@@ -6149,23 +6177,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w7;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)] = 2*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w7;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,2,6,numEq,numComp,8)] = 2.*wd0;
                                         EM_S[INDEX4(k,m,2,7,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4*wd0;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4.*wd0;
                                         EM_S[INDEX4(k,m,3,6,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)] = 2*wd0;
+                                        EM_S[INDEX4(k,m,3,7,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,2,numEq,numComp,8)] = 2.*wd0;
                                         EM_S[INDEX4(k,m,6,3,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)] = 2*wd0;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)] = 2.*wd0;
                                         EM_S[INDEX4(k,m,7,2,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4*wd0;
+                                        EM_S[INDEX4(k,m,7,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -6174,26 +6202,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w7*(y_1 + y_2);
-                                    const double tmp1 = 6*w7*(y_0 + y_3);
-                                    EM_F[INDEX2(k,2,numEq)] = tmp0 + 6*w5*y_3 + 6*w6*y_0;
-                                    EM_F[INDEX2(k,3,numEq)] = tmp1 + 6*w5*y_2 + 6*w6*y_1;
-                                    EM_F[INDEX2(k,6,numEq)] = tmp1 + 6*w5*y_1 + 6*w6*y_2;
-                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6*w5*y_0 + 6*w6*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w7*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w7*(y_0 + y_3);
+                                    EM_F[INDEX2(k,2,numEq)] = tmp0 + 6.*w5*y_3 + 6.*w6*y_0;
+                                    EM_F[INDEX2(k,3,numEq)] = tmp1 + 6.*w5*y_2 + 6.*w6*y_1;
+                                    EM_F[INDEX2(k,6,numEq)] = tmp1 + 6.*w5*y_1 + 6.*w6*y_2;
+                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6.*w5*y_0 + 6.*w6*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,2,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,3,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,6,numEq)] = 36*w7*y_p[k];
-                                    EM_F[INDEX2(k,7,numEq)] = 36*w7*y_p[k];
+                                    EM_F[INDEX2(k,2,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,3,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,6,numEq)] = 36.*w7*y_p[k];
+                                    EM_F[INDEX2(k,7,numEq)] = 36.*w7*y_p[k];
                                 }
                             }
                         }
@@ -6207,9 +6235,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[4] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -6220,25 +6248,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w10*(d_0 + d_2);
-                                        const double tmp1 = w11*(d_1 + d_3);
-                                        const double tmp2 = w12*(d_0 + d_1 + d_2 + d_3);
-                                        const double tmp3 = w12*(d_1 + d_2);
-                                        const double tmp4 = w10*(d_1 + d_3);
-                                        const double tmp5 = w11*(d_0 + d_2);
-                                        const double tmp6 = w12*(d_0 + d_3);
-                                        const double tmp7 = w10*(d_0 + d_1);
-                                        const double tmp8 = w11*(d_2 + d_3);
-                                        const double tmp9 = w10*(d_2 + d_3);
-                                        const double tmp10 = w11*(d_0 + d_1);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w10*(d_0 + d_2);
+                                        const Scalar tmp1 = w11*(d_1 + d_3);
+                                        const Scalar tmp2 = w12*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar tmp3 = w12*(d_1 + d_2);
+                                        const Scalar tmp4 = w10*(d_1 + d_3);
+                                        const Scalar tmp5 = w11*(d_0 + d_2);
+                                        const Scalar tmp6 = w12*(d_0 + d_3);
+                                        const Scalar tmp7 = w10*(d_0 + d_1);
+                                        const Scalar tmp8 = w11*(d_2 + d_3);
+                                        const Scalar tmp9 = w10*(d_2 + d_3);
+                                        const Scalar tmp10 = w11*(d_0 + d_1);
                                         EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = d_0*w14 + d_3*w13 + tmp3;
                                         EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = tmp10 + tmp9;
                                         EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = tmp4 + tmp5;
@@ -6260,23 +6288,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w12;
-                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w12;
+                                        EM_S[INDEX4(k,m,0,0,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,0,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,2,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,0,3,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,1,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,1,1,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,1,2,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,1,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,2,0,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,2,1,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,2,2,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,2,3,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,3,0,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,3,1,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,3,2,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,3,3,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -6285,26 +6313,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w12*(y_1 + y_2);
-                                    const double tmp1 = 6*w12*(y_0 + y_3);
-                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6*w10*y_3 + 6*w11*y_0;
-                                    EM_F[INDEX2(k,1,numEq)] = tmp1 + 6*w10*y_2 + 6*w11*y_1;
-                                    EM_F[INDEX2(k,2,numEq)] = tmp1 + 6*w10*y_1 + 6*w11*y_2;
-                                    EM_F[INDEX2(k,3,numEq)] = tmp0 + 6*w10*y_0 + 6*w11*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w12*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w12*(y_0 + y_3);
+                                    EM_F[INDEX2(k,0,numEq)] = tmp0 + 6.*w10*y_3 + 6.*w11*y_0;
+                                    EM_F[INDEX2(k,1,numEq)] = tmp1 + 6.*w10*y_2 + 6.*w11*y_1;
+                                    EM_F[INDEX2(k,2,numEq)] = tmp1 + 6.*w10*y_1 + 6.*w11*y_2;
+                                    EM_F[INDEX2(k,3,numEq)] = tmp0 + 6.*w10*y_0 + 6.*w11*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,0,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,1,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,2,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,3,numEq)] = 36*w12*y_p[k];
+                                    EM_F[INDEX2(k,0,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,1,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,2,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,3,numEq)] = 36.*w12*y_p[k];
                                 }
                             }
                         }
@@ -6318,9 +6346,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 
         if (domain->m_faceOffset[5] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -6331,25 +6359,25 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             if (d.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
-                                        const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
-                                        const double d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
-                                        const double d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
-                                        const double tmp0 = w12*(d_0 + d_1 + d_2 + d_3);
-                                        const double tmp1 = w10*(d_1 + d_3);
-                                        const double tmp2 = w11*(d_0 + d_2);
-                                        const double tmp3 = w10*(d_2 + d_3);
-                                        const double tmp4 = w11*(d_0 + d_1);
-                                        const double tmp5 = w10*(d_0 + d_1);
-                                        const double tmp6 = w11*(d_2 + d_3);
-                                        const double tmp7 = w12*(d_1 + d_2);
-                                        const double tmp8 = w10*(d_0 + d_2);
-                                        const double tmp9 = w11*(d_1 + d_3);
-                                        const double tmp10 = w12*(d_0 + d_3);
+                                        const Scalar d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
+                                        const Scalar d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
+                                        const Scalar d_2 = d_p[INDEX3(k,m,2,numEq,numComp)];
+                                        const Scalar d_3 = d_p[INDEX3(k,m,3,numEq,numComp)];
+                                        const Scalar tmp0 = w12*(d_0 + d_1 + d_2 + d_3);
+                                        const Scalar tmp1 = w10*(d_1 + d_3);
+                                        const Scalar tmp2 = w11*(d_0 + d_2);
+                                        const Scalar tmp3 = w10*(d_2 + d_3);
+                                        const Scalar tmp4 = w11*(d_0 + d_1);
+                                        const Scalar tmp5 = w10*(d_0 + d_1);
+                                        const Scalar tmp6 = w11*(d_2 + d_3);
+                                        const Scalar tmp7 = w12*(d_1 + d_2);
+                                        const Scalar tmp8 = w10*(d_0 + d_2);
+                                        const Scalar tmp9 = w11*(d_1 + d_3);
+                                        const Scalar tmp10 = w12*(d_0 + d_3);
                                         EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = d_0*w14 + d_3*w13 + tmp7;
                                         EM_S[INDEX4(k,m,5,4,numEq,numComp,8)] = tmp3 + tmp4;
                                         EM_S[INDEX4(k,m,6,4,numEq,numComp,8)] = tmp1 + tmp2;
@@ -6371,23 +6399,23 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
                                     for (index_t m=0; m<numComp; m++) {
-                                        const double wd0 = 4*d_p[INDEX2(k, m, numEq)]*w12;
-                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4*wd0;
-                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)] =   wd0;
-                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)] = 2*wd0;
-                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4*wd0;
+                                        const Scalar wd0 = 4.*d_p[INDEX2(k, m, numEq)]*w12;
+                                        EM_S[INDEX4(k,m,4,4,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,5,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,4,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,4,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,4,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,5,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,6,5,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,7,5,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,6,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,5,6,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,6,6,numEq,numComp,8)] = 4.*wd0;
+                                        EM_S[INDEX4(k,m,7,6,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,4,7,numEq,numComp,8)] =    wd0;
+                                        EM_S[INDEX4(k,m,5,7,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,6,7,numEq,numComp,8)] = 2.*wd0;
+                                        EM_S[INDEX4(k,m,7,7,numEq,numComp,8)] = 4.*wd0;
                                     }
                                 }
                             }
@@ -6396,26 +6424,26 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             if (y.actsExpanded()) {
                                 for (index_t k=0; k<numEq; k++) {
-                                    const double y_0 = y_p[INDEX2(k, 0, numEq)];
-                                    const double y_1 = y_p[INDEX2(k, 1, numEq)];
-                                    const double y_2 = y_p[INDEX2(k, 2, numEq)];
-                                    const double y_3 = y_p[INDEX2(k, 3, numEq)];
-                                    const double tmp0 = 6*w12*(y_1 + y_2);
-                                    const double tmp1 = 6*w12*(y_0 + y_3);
-                                    EM_F[INDEX2(k,4,numEq)] = tmp0 + 6*w10*y_3 + 6*w11*y_0;
-                                    EM_F[INDEX2(k,5,numEq)] = tmp1 + 6*w10*y_2 + 6*w11*y_1;
-                                    EM_F[INDEX2(k,6,numEq)] = tmp1 + 6*w10*y_1 + 6*w11*y_2;
-                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6*w10*y_0 + 6*w11*y_3;
+                                    const Scalar y_0 = y_p[INDEX2(k, 0, numEq)];
+                                    const Scalar y_1 = y_p[INDEX2(k, 1, numEq)];
+                                    const Scalar y_2 = y_p[INDEX2(k, 2, numEq)];
+                                    const Scalar y_3 = y_p[INDEX2(k, 3, numEq)];
+                                    const Scalar tmp0 = 6.*w12*(y_1 + y_2);
+                                    const Scalar tmp1 = 6.*w12*(y_0 + y_3);
+                                    EM_F[INDEX2(k,4,numEq)] = tmp0 + 6.*w10*y_3 + 6.*w11*y_0;
+                                    EM_F[INDEX2(k,5,numEq)] = tmp1 + 6.*w10*y_2 + 6.*w11*y_1;
+                                    EM_F[INDEX2(k,6,numEq)] = tmp1 + 6.*w10*y_1 + 6.*w11*y_2;
+                                    EM_F[INDEX2(k,7,numEq)] = tmp0 + 6.*w10*y_0 + 6.*w11*y_3;
                                 }
                             } else { // constant data
                                 for (index_t k=0; k<numEq; k++) {
-                                    EM_F[INDEX2(k,4,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,5,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,6,numEq)] = 36*w12*y_p[k];
-                                    EM_F[INDEX2(k,7,numEq)] = 36*w12*y_p[k];
+                                    EM_F[INDEX2(k,4,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,5,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,6,numEq)] = 36.*w12*y_p[k];
+                                    EM_F[INDEX2(k,7,numEq)] = 36.*w12*y_p[k];
                                 }
                             }
                         }
@@ -6433,10 +6461,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
 // PDE SYSTEM REDUCED
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
-                                     Data& rhs, const Data& A, const Data& B,
-                                     const Data& C, const Data& D,
-                                     const Data& X, const Data& Y) const
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDESystemReduced(
+                                        AbstractSystemMatrix* mat, Data& rhs,
+                                        const Data& A, const Data& B,
+                                        const Data& C, const Data& D,
+                                        const Data& X, const Data& Y) const
 {
     dim_t numEq, numComp;
     if (!mat)
@@ -6456,17 +6486,18 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
     const double w7 = m_dx[0]*m_dx[2]/(16*m_dx[1]);
     const double w8 = m_dx[1]*m_dx[2]/(16*m_dx[0]);
     const double w9 = m_dx[0]*m_dx[1]*m_dx[2]/64;
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = (!A.isEmpty() || !B.isEmpty() || !C.isEmpty() || !D.isEmpty());
     const bool add_EM_F = (!X.isEmpty() || !Y.isEmpty());
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8*numEq*numComp, 0);
-        vector<double> EM_F(8*numEq, 0);
+        vector<Scalar> EM_S(8*8*numEq*numComp, zero);
+        vector<Scalar> EM_F(8*numEq, zero);
 
         for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -6475,26 +6506,26 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                     for (index_t k0=0; k0<NE0; ++k0)  {
                         const index_t e = k0 + NE0*k1 + NE0*NE1*k2;
                         if (add_EM_S)
-                            fill(EM_S.begin(), EM_S.end(), 0);
+                            fill(EM_S.begin(), EM_S.end(), zero);
                         if (add_EM_F)
-                            fill(EM_F.begin(), EM_F.end(), 0);
+                            fill(EM_F.begin(), EM_F.end(), zero);
 
                         ///////////////
                         // process A //
                         ///////////////
                         if (!A.isEmpty()) {
-                            const double* A_p=A.getSampleDataRO(e);
+                            const Scalar* A_p = A.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double Aw00 = A_p[INDEX4(k,0,m,0,numEq,3,numComp)]*w8;
-                                    const double Aw10 = A_p[INDEX4(k,1,m,0,numEq,3,numComp)]*w2;
-                                    const double Aw20 = A_p[INDEX4(k,2,m,0,numEq,3,numComp)]*w1;
-                                    const double Aw01 = A_p[INDEX4(k,0,m,1,numEq,3,numComp)]*w2;
-                                    const double Aw11 = A_p[INDEX4(k,1,m,1,numEq,3,numComp)]*w7;
-                                    const double Aw21 = A_p[INDEX4(k,2,m,1,numEq,3,numComp)]*w0;
-                                    const double Aw02 = A_p[INDEX4(k,0,m,2,numEq,3,numComp)]*w1;
-                                    const double Aw12 = A_p[INDEX4(k,1,m,2,numEq,3,numComp)]*w0;
-                                    const double Aw22 = A_p[INDEX4(k,2,m,2,numEq,3,numComp)]*w6;
+                                    const Scalar Aw00 = A_p[INDEX4(k,0,m,0,numEq,3,numComp)]*w8;
+                                    const Scalar Aw10 = A_p[INDEX4(k,1,m,0,numEq,3,numComp)]*w2;
+                                    const Scalar Aw20 = A_p[INDEX4(k,2,m,0,numEq,3,numComp)]*w1;
+                                    const Scalar Aw01 = A_p[INDEX4(k,0,m,1,numEq,3,numComp)]*w2;
+                                    const Scalar Aw11 = A_p[INDEX4(k,1,m,1,numEq,3,numComp)]*w7;
+                                    const Scalar Aw21 = A_p[INDEX4(k,2,m,1,numEq,3,numComp)]*w0;
+                                    const Scalar Aw02 = A_p[INDEX4(k,0,m,2,numEq,3,numComp)]*w1;
+                                    const Scalar Aw12 = A_p[INDEX4(k,1,m,2,numEq,3,numComp)]*w0;
+                                    const Scalar Aw22 = A_p[INDEX4(k,2,m,2,numEq,3,numComp)]*w6;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+= Aw00 + Aw01 + Aw02 + Aw10 + Aw11 + Aw12 + Aw20 + Aw21 + Aw22;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=-Aw00 - Aw01 - Aw02 + Aw10 + Aw11 + Aw12 + Aw20 + Aw21 + Aw22;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+= Aw00 + Aw01 + Aw02 - Aw10 - Aw11 - Aw12 + Aw20 + Aw21 + Aw22;
@@ -6566,12 +6597,12 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                         // process B //
                         ///////////////
                         if (!B.isEmpty()) {
-                            const double* B_p=B.getSampleDataRO(e);
+                            const Scalar* B_p = B.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double wB0 = B_p[INDEX3(k,0,m, numEq, 3)]*w5;
-                                    const double wB1 = B_p[INDEX3(k,1,m, numEq, 3)]*w4;
-                                    const double wB2 = B_p[INDEX3(k,2,m, numEq, 3)]*w3;
+                                    const Scalar wB0 = B_p[INDEX3(k,0,m, numEq, 3)]*w5;
+                                    const Scalar wB1 = B_p[INDEX3(k,1,m, numEq, 3)]*w4;
+                                    const Scalar wB2 = B_p[INDEX3(k,2,m, numEq, 3)]*w3;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-wB0 - wB1 - wB2;
                                     EM_S[INDEX4(k,m,0,1,numEq,numComp,8)]+=-wB0 - wB1 - wB2;
                                     EM_S[INDEX4(k,m,0,2,numEq,numComp,8)]+=-wB0 - wB1 - wB2;
@@ -6643,12 +6674,12 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                         // process C //
                         ///////////////
                         if (!C.isEmpty()) {
-                            const double* C_p=C.getSampleDataRO(e);
+                            const Scalar* C_p = C.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w5;
-                                    const double wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w4;
-                                    const double wC2 = C_p[INDEX3(k, m, 2, numEq, numComp)]*w3;
+                                    const Scalar wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w5;
+                                    const Scalar wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w4;
+                                    const Scalar wC2 = C_p[INDEX3(k, m, 2, numEq, numComp)]*w3;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=-wC0 - wC1 - wC2;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=-wC0 - wC1 - wC2;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=-wC0 - wC1 - wC2;
@@ -6720,10 +6751,10 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                         // process D //
                         ///////////////
                         if (!D.isEmpty()) {
-                            const double* D_p=D.getSampleDataRO(e);
+                            const Scalar* D_p = D.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double wD = D_p[INDEX2(k, m, numEq)]*w9;
+                                    const Scalar wD = D_p[INDEX2(k, m, numEq)]*w9;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]+=wD;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]+=wD;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]+=wD;
@@ -6795,11 +6826,11 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                         // process X //
                         ///////////////
                         if (!X.isEmpty()) {
-                            const double* X_p=X.getSampleDataRO(e);
+                            const Scalar* X_p = X.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                const double wX0 = 8*X_p[INDEX2(k, 0, numEq)]*w5;
-                                const double wX1 = 8*X_p[INDEX2(k, 1, numEq)]*w4;
-                                const double wX2 = 8*X_p[INDEX2(k, 2, numEq)]*w3;
+                                const Scalar wX0 = 8.*X_p[INDEX2(k, 0, numEq)]*w5;
+                                const Scalar wX1 = 8.*X_p[INDEX2(k, 1, numEq)]*w4;
+                                const Scalar wX2 = 8.*X_p[INDEX2(k, 2, numEq)]*w3;
                                 EM_F[INDEX2(k,0,numEq)]+=-wX0 - wX1 - wX2;
                                 EM_F[INDEX2(k,1,numEq)]+= wX0 - wX1 - wX2;
                                 EM_F[INDEX2(k,2,numEq)]+=-wX0 + wX1 - wX2;
@@ -6814,16 +6845,16 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
                         // process Y //
                         ///////////////
                         if (!Y.isEmpty()) {
-                            const double* Y_p=Y.getSampleDataRO(e);
+                            const Scalar* Y_p = Y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,0,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,1,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,2,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,3,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,4,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,5,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,6,numEq)]+=8*Y_p[k]*w9;
-                                EM_F[INDEX2(k,7,numEq)]+=8*Y_p[k]*w9;
+                                EM_F[INDEX2(k,0,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,1,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,2,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,3,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,4,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,5,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,6,numEq)]+=8.*Y_p[k]*w9;
+                                EM_F[INDEX2(k,7,numEq)]+=8.*Y_p[k]*w9;
                             }
                         }
 
@@ -6842,7 +6873,8 @@ void DefaultAssembler3D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
 // PDE SYSTEM REDUCED BOUNDARY
 /****************************************************************************/
 
-void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
+template<class Scalar>
+void DefaultAssembler3D<Scalar>::assemblePDEBoundarySystemReduced(
                                         AbstractSystemMatrix* mat, Data& rhs,
                                         const Data& d, const Data& y) const
 {
@@ -6856,23 +6888,24 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
     const double w0 = m_dx[0]*m_dx[1]/16.;
     const double w1 = m_dx[0]*m_dx[2]/16.;
     const double w2 = m_dx[1]*m_dx[2]/16.;
-    const int NE0 = m_NE[0];
-    const int NE1 = m_NE[1];
-    const int NE2 = m_NE[2];
+    const dim_t NE0 = m_NE[0];
+    const dim_t NE1 = m_NE[1];
+    const dim_t NE2 = m_NE[2];
     const bool add_EM_S = !d.isEmpty();
     const bool add_EM_F = !y.isEmpty();
+    const Scalar zero = static_cast<Scalar>(0);
     rhs.requireWrite();
 
 #pragma omp parallel
     {
-        vector<double> EM_S(8*8*numEq*numComp, 0);
-        vector<double> EM_F(8*numEq, 0);
+        vector<Scalar> EM_S(8*8*numEq*numComp, zero);
+        vector<Scalar> EM_F(8*numEq, zero);
 
         if (domain->m_faceOffset[0] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -6883,10 +6916,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w2;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w2;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]=tmp0;
@@ -6910,12 +6943,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,0,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,2,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,4,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,6,numEq)] = 4*w2*y_p[k];
+                                EM_F[INDEX2(k,0,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,2,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,4,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,6,numEq)] = 4.*w2*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*k1;
@@ -6928,9 +6961,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[1] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -6941,10 +6974,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w2;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w2;
                                     EM_S[INDEX4(k,m,1,1,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,3,1,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,5,1,numEq,numComp,8)]=tmp0;
@@ -6968,12 +7001,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,1,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,3,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,5,numEq)] = 4*w2*y_p[k];
-                                EM_F[INDEX2(k,7,numEq)] = 4*w2*y_p[k];
+                                EM_F[INDEX2(k,1,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,3,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,5,numEq)] = 4.*w2*y_p[k];
+                                EM_F[INDEX2(k,7,numEq)] = 4.*w2*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(k1+1)-2;
@@ -6986,9 +7019,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[2] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -6999,10 +7032,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,4,0,numEq,numComp,8)]=tmp0;
@@ -7026,12 +7059,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,0,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,1,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,4,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,5,numEq)] = 4*w1*y_p[k];
+                                EM_F[INDEX2(k,0,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,1,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,4,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,5,numEq)] = 4.*w1*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+k0;
@@ -7044,9 +7077,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[3] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k2_0=0; k2_0<2; k2_0++) { // colouring
 #pragma omp for
@@ -7057,10 +7090,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
                                     EM_S[INDEX4(k,m,2,2,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,3,2,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,6,2,numEq,numComp,8)]=tmp0;
@@ -7084,12 +7117,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,2,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,3,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,6,numEq)] = 4*w1*y_p[k];
-                                EM_F[INDEX2(k,7,numEq)] = 4*w1*y_p[k];
+                                EM_F[INDEX2(k,2,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,3,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,6,numEq)] = 4.*w1*y_p[k];
+                                EM_F[INDEX2(k,7,numEq)] = 4.*w1*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*k2+m_NN[0]*(m_NN[1]-2)+k0;
@@ -7102,9 +7135,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[4] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -7115,10 +7148,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
                                     EM_S[INDEX4(k,m,0,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,1,0,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,2,0,numEq,numComp,8)]=tmp0;
@@ -7142,12 +7175,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,0,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,1,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,2,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,3,numEq)] = 4*w0*y_p[k];
+                                EM_F[INDEX2(k,0,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,1,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,2,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,3,numEq)] = 4.*w0*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*k1+k0;
@@ -7160,9 +7193,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
 
         if (domain->m_faceOffset[5] > -1) {
             if (add_EM_S)
-                fill(EM_S.begin(), EM_S.end(), 0);
+                fill(EM_S.begin(), EM_S.end(), zero);
             if (add_EM_F)
-                fill(EM_F.begin(), EM_F.end(), 0);
+                fill(EM_F.begin(), EM_F.end(), zero);
 
             for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
 #pragma omp for
@@ -7173,10 +7206,10 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process d //
                         ///////////////
                         if (add_EM_S) {
-                            const double* d_p=d.getSampleDataRO(e);
+                            const Scalar* d_p = d.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
                                 for (index_t m=0; m<numComp; m++) {
-                                    const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
+                                    const Scalar tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
                                     EM_S[INDEX4(k,m,4,4,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,5,4,numEq,numComp,8)]=tmp0;
                                     EM_S[INDEX4(k,m,6,4,numEq,numComp,8)]=tmp0;
@@ -7200,12 +7233,12 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
                         // process y //
                         ///////////////
                         if (add_EM_F) {
-                            const double* y_p=y.getSampleDataRO(e);
+                            const Scalar* y_p = y.getSampleDataRO(e, zero);
                             for (index_t k=0; k<numEq; k++) {
-                                EM_F[INDEX2(k,4,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,5,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,6,numEq)] = 4*w0*y_p[k];
-                                EM_F[INDEX2(k,7,numEq)] = 4*w0*y_p[k];
+                                EM_F[INDEX2(k,4,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,5,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,6,numEq)] = 4.*w0*y_p[k];
+                                EM_F[INDEX2(k,7,numEq)] = 4.*w0*y_p[k];
                             }
                         }
                         const index_t firstNode=m_NN[0]*m_NN[1]*(m_NN[2]-2)+m_NN[0]*k1+k0;
@@ -7218,5 +7251,9 @@ void DefaultAssembler3D::assemblePDEBoundarySystemReduced(
     } // end of parallel region
 }
 
+// instantiate our two supported versions
+template class DefaultAssembler3D<escript::DataTypes::real_t>;
+template class DefaultAssembler3D<escript::DataTypes::cplx_t>;
+
 } // namespace ripley
 
diff --git a/ripley/src/DefaultAssembler3D.h b/ripley/src/DefaultAssembler3D.h
index a175e84..4ed3b51 100644
--- a/ripley/src/DefaultAssembler3D.h
+++ b/ripley/src/DefaultAssembler3D.h
@@ -20,18 +20,18 @@
 
 namespace ripley {
 
-
+template<class Scalar = double>
 class DefaultAssembler3D : public AbstractAssembler
 {
 public:
-    DefaultAssembler3D(escript::const_Domain_ptr dom, const double *dx,
-                       const dim_t *NE, const dim_t *NN)
+    DefaultAssembler3D<Scalar>(escript::const_Domain_ptr dom, const double* dx,
+                               const dim_t* NE, const dim_t* NN)
         : AbstractAssembler(),
         m_dx(dx),
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Brick>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Brick>(dom);
     }
 
     ~DefaultAssembler3D() {}
@@ -119,7 +119,7 @@ public:
                                    const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Brick> domain;
+    POINTER_WRAPPER_CLASS(const Brick) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/LameAssembler2D.cpp b/ripley/src/LameAssembler2D.cpp
index 1387450..e876ac9 100644
--- a/ripley/src/LameAssembler2D.cpp
+++ b/ripley/src/LameAssembler2D.cpp
@@ -14,12 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <ripley/LameAssembler2D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/index.h>
+
 using namespace std;
 using escript::AbstractSystemMatrix;
 using escript::Data;
diff --git a/ripley/src/LameAssembler2D.h b/ripley/src/LameAssembler2D.h
index a3fda4c..c48afd3 100644
--- a/ripley/src/LameAssembler2D.h
+++ b/ripley/src/LameAssembler2D.h
@@ -31,7 +31,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Rectangle>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Rectangle>(dom);
     }
     ~LameAssembler2D() {}
     
@@ -60,7 +60,7 @@ public:
                                    const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Rectangle> domain;
+    POINTER_WRAPPER_CLASS(const Rectangle) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/LameAssembler3D.cpp b/ripley/src/LameAssembler3D.cpp
index 8ba9891..8bd2052 100644
--- a/ripley/src/LameAssembler3D.cpp
+++ b/ripley/src/LameAssembler3D.cpp
@@ -14,12 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <ripley/LameAssembler3D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/index.h>
+
 using namespace std;
 
 using escript::AbstractSystemMatrix;
diff --git a/ripley/src/LameAssembler3D.h b/ripley/src/LameAssembler3D.h
index 19ad41a..a145285 100644
--- a/ripley/src/LameAssembler3D.h
+++ b/ripley/src/LameAssembler3D.h
@@ -31,7 +31,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Brick>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Brick>(dom);
     }
     ~LameAssembler3D(){};
     
@@ -60,7 +60,7 @@ public:
                                    const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Brick> domain;
+    POINTER_WRAPPER_CLASS(const Brick) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/MultiBrick.cpp b/ripley/src/MultiBrick.cpp
index 661e1f2..384daa2 100644
--- a/ripley/src/MultiBrick.cpp
+++ b/ripley/src/MultiBrick.cpp
@@ -14,19 +14,19 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-
 #include <ripley/MultiBrick.h>
 #include <ripley/blocktools.h>
 #include <ripley/domainhelpers.h>
+
 #include <escript/DataFactory.h>
 #include <escript/FunctionSpaceFactory.h>
+#include <escript/index.h>
 
 #define FIRST_QUAD 0.21132486540518711775
 #define SECOND_QUAD 0.78867513459481288225
 
 #include <iomanip>
+#include <iostream>
 #include <limits>
 
 using std::vector;
@@ -586,7 +586,6 @@ void MultiBrick::readBinaryGrid(escript::Data& out, string filename,
     Brick::readBinaryGrid(out, filename, params);
 }
 
-#ifdef USE_BOOSTIO
 void MultiBrick::readBinaryGridFromZipped(escript::Data& out, string filename,
                                const ReaderParameters& params) const
 {
@@ -594,7 +593,6 @@ void MultiBrick::readBinaryGridFromZipped(escript::Data& out, string filename,
         throw RipleyException("Non-parent MultiBricks cannot read datafiles");
     Brick::readBinaryGridFromZipped(out, filename, params);
 }
-#endif
 
 void MultiBrick::writeBinaryGrid(const escript::Data& in, string filename,
                                 int byteOrder, int dataType) const
@@ -1018,14 +1016,32 @@ void MultiBrick::populateSampleIds()
     // helps when writing out data rank after rank.
 
     // build node distribution vector first.
-    // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
-    // constant for all ranks in this implementation
+    // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes.
+    // Unlike regular ripley domains this is NOT constant for all ranks so
+    // we do an Allgather (we could have also computed per rank but it's a bit
+    // involved)
     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
-    const dim_t numDOF=getNumDOF();
-    for (dim_t k=1; k<m_mpiInfo->size; k++) {
-        m_nodeDistribution[k]=k*numDOF;
+    dim_t numDOF=getNumDOF();
+    if (m_mpiInfo->size > 1) {
+#if ESYS_MPI
+        MPI_Allgather(&numDOF, 1, MPI_DIM_T, &m_nodeDistribution[0], 1,
+                      MPI_DIM_T, m_mpiInfo->comm);
+
+        // accumulate
+        dim_t accu = 0;
+        for (int rank=0; rank<m_mpiInfo->size; rank++) {
+            const dim_t n = m_nodeDistribution[rank];
+            m_nodeDistribution[rank] = accu;
+            accu += n;
+        }
+        ESYS_ASSERT(accu == getNumDataPointsGlobal(),
+                "something went wrong computing the DOF distribution!");
+
+        m_nodeDistribution[m_mpiInfo->size] = accu;
+#endif
+    } else {
+        m_nodeDistribution[m_mpiInfo->size] = numDOF;
     }
-    m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
 
     try {
         m_nodeId.resize(getNumNodes());
@@ -1258,7 +1274,7 @@ void MultiBrick::populateSampleIds()
 }
 
 //private
-vector<IndexVector> MultiBrick::getConnections() const
+vector<IndexVector> MultiBrick::getConnections(bool includeShared) const
 {
     // returns a vector v of size numDOF where v[i] is a vector with indices
     // of DOFs connected to i (up to 27 in 3D)
@@ -1289,193 +1305,6 @@ vector<IndexVector> MultiBrick::getConnections() const
     return indices;
 }
 
-//private
-void MultiBrick::populateDofMap()
-{
-    const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
-    const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
-    const dim_t nDOF2 = (m_gNE[2]+1)/m_NX[2];
-    const index_t left = (m_offset[0]==0 ? 0 : 1);
-    const index_t bottom = (m_offset[1]==0 ? 0 : 1);
-    const index_t front = (m_offset[2]==0 ? 0 : 1);
-
-    // populate node->DOF mapping with own degrees of freedom.
-    // The rest is assigned in the loop further down
-    m_dofMap.assign(getNumNodes(), 0);
-#pragma omp parallel for
-    for (index_t i=front; i<front+nDOF2; i++) {
-        for (index_t j=bottom; j<bottom+nDOF1; j++) {
-            for (index_t k=left; k<left+nDOF0; k++) {
-                m_dofMap[i*m_NN[0]*m_NN[1]+j*m_NN[0]+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
-            }
-        }
-    }
-
-    const dim_t numDOF=nDOF0*nDOF1*nDOF2;
-    RankVector neighbour;
-    IndexVector offsetInShared(1,0);
-    IndexVector sendShared, recvShared;
-    dim_t numShared=0;
-    const int x=m_mpiInfo->rank%m_NX[0];
-    const int y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
-    const int z=m_mpiInfo->rank/(m_NX[0]*m_NX[1]);
-
-    // build list of shared components and neighbours by looping through
-    // all potential neighbouring ranks and checking if positions are
-    // within bounds
-    for (int i2=-1; i2<2; i2++) {
-        for (int i1=-1; i1<2; i1++) {
-            for (int i0=-1; i0<2; i0++) {
-                // skip this rank
-                if (i0==0 && i1==0 && i2==0)
-                    continue;
-                // location of neighbour rank
-                const int nx=x+i0;
-                const int ny=y+i1;
-                const int nz=z+i2;
-                if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX[0] && ny<m_NX[1] && nz<m_NX[2]) {
-                    neighbour.push_back(nz*m_NX[0]*m_NX[1]+ny*m_NX[0]+nx);
-                    if (i0==0 && i1==0) {
-                        // sharing front or back plane
-                        offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
-                        for (dim_t i=0; i<nDOF1; i++) {
-                            const dim_t firstDOF=(i2==-1 ? i*nDOF0
-                                    : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
-                            const dim_t firstNode=(i2==-1 ? left+(i+bottom)*m_NN[0]
-                                    : left+(i+bottom)*m_NN[0]+m_NN[0]*m_NN[1]*(m_NN[2]-1));
-                            for (dim_t j=0; j<nDOF0; j++, numShared++) {
-                                sendShared.push_back(firstDOF+j);
-                                recvShared.push_back(numDOF+numShared);
-                                m_dofMap[firstNode+j]=numDOF+numShared;
-                            }
-                        }
-                    } else if (i0==0 && i2==0) {
-                        // sharing top or bottom plane
-                        offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
-                        for (dim_t i=0; i<nDOF2; i++) {
-                            const dim_t firstDOF=(i1==-1 ? i*nDOF0*nDOF1
-                                    : nDOF0*((i+1)*nDOF1-1));
-                            const dim_t firstNode=(i1==-1 ?
-                                    left+(i+front)*m_NN[0]*m_NN[1]
-                                    : left+m_NN[0]*((i+1+front)*m_NN[1]-1));
-                            for (dim_t j=0; j<nDOF0; j++, numShared++) {
-                                sendShared.push_back(firstDOF+j);
-                                recvShared.push_back(numDOF+numShared);
-                                m_dofMap[firstNode+j]=numDOF+numShared;
-                            }
-                        }
-                    } else if (i1==0 && i2==0) {
-                        // sharing left or right plane
-                        offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
-                        for (dim_t i=0; i<nDOF2; i++) {
-                            const dim_t firstDOF=(i0==-1 ? i*nDOF0*nDOF1
-                                    : nDOF0*(1+i*nDOF1)-1);
-                            const dim_t firstNode=(i0==-1 ?
-                                    (bottom+(i+front)*m_NN[1])*m_NN[0]
-                                    : (bottom+1+(i+front)*m_NN[1])*m_NN[0]-1);
-                            for (dim_t j=0; j<nDOF1; j++, numShared++) {
-                                sendShared.push_back(firstDOF+j*nDOF0);
-                                recvShared.push_back(numDOF+numShared);
-                                m_dofMap[firstNode+j*m_NN[0]]=numDOF+numShared;
-                            }
-                        }
-                    } else if (i0==0) {
-                        // sharing an edge in x direction
-                        offsetInShared.push_back(offsetInShared.back()+nDOF0);
-                        const dim_t firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
-                                           +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
-                        const dim_t firstNode=left+(i1+1)/2*m_NN[0]*(m_NN[1]-1)
-                                            +(i2+1)/2*m_NN[0]*m_NN[1]*(m_NN[2]-1);
-                        for (dim_t i=0; i<nDOF0; i++, numShared++) {
-                            sendShared.push_back(firstDOF+i);
-                            recvShared.push_back(numDOF+numShared);
-                            m_dofMap[firstNode+i]=numDOF+numShared;
-                        }
-                    } else if (i1==0) {
-                        // sharing an edge in y direction
-                        offsetInShared.push_back(offsetInShared.back()+nDOF1);
-                        const dim_t firstDOF=(i0+1)/2*(nDOF0-1)
-                                           +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
-                        const dim_t firstNode=bottom*m_NN[0]
-                                            +(i0+1)/2*(m_NN[0]-1)
-                                            +(i2+1)/2*m_NN[0]*m_NN[1]*(m_NN[2]-1);
-                        for (dim_t i=0; i<nDOF1; i++, numShared++) {
-                            sendShared.push_back(firstDOF+i*nDOF0);
-                            recvShared.push_back(numDOF+numShared);
-                            m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
-                        }
-                    } else if (i2==0) {
-                        // sharing an edge in z direction
-                        offsetInShared.push_back(offsetInShared.back()+nDOF2);
-                        const dim_t firstDOF=(i0+1)/2*(nDOF0-1)
-                                           +(i1+1)/2*nDOF0*(nDOF1-1);
-                        const dim_t firstNode=front*m_NN[0]*m_NN[1]
-                                            +(i0+1)/2*(m_NN[0]-1)
-                                            +(i1+1)/2*m_NN[0]*(m_NN[1]-1);
-                        for (dim_t i=0; i<nDOF2; i++, numShared++) {
-                            sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
-                            recvShared.push_back(numDOF+numShared);
-                            m_dofMap[firstNode+i*m_NN[0]*m_NN[1]]=numDOF+numShared;
-                        }
-                    } else {
-                        // sharing a node
-                        const dim_t dof = (i0+1)/2*(nDOF0-1)
-                                       +(i1+1)/2*nDOF0*(nDOF1-1)
-                                       +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
-                        const dim_t node = (i0+1)/2*(m_NN[0]-1)
-                                        +(i1+1)/2*m_NN[0]*(m_NN[1]-1)
-                                        +(i2+1)/2*m_NN[0]*m_NN[1]*(m_NN[2]-1);
-                        offsetInShared.push_back(offsetInShared.back()+1);
-                        sendShared.push_back(dof);
-                        recvShared.push_back(numDOF+numShared);
-                        m_dofMap[node] = numDOF+numShared;
-                        ++numShared;
-                    }
-                }
-            }
-        }
-    }
-
-    // TODO: paso::SharedComponents should take vectors to avoid this
-    Esys_MPI_rank* neighPtr = NULL;
-    index_t* sendPtr = NULL;
-    index_t* recvPtr = NULL;
-    if (neighbour.size() > 0) {
-        neighPtr = &neighbour[0];
-        sendPtr = &sendShared[0];
-        recvPtr = &recvShared[0];
-    }
-    // create connector
-    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, sendPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, recvPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
-
-    // useful debug output
-    /*
-    std::cout << "--- rcv_shcomp ---" << std::endl;
-    std::cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << std::endl;
-    for (size_t i=0; i<neighbour.size(); i++) {
-        std::cout << "neighbor[" << i << "]=" << neighbour[i]
-            << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << std::endl;
-    }
-    for (size_t i=0; i<recvShared.size(); i++) {
-        std::cout << "shared[" << i << "]=" << recvShared[i] << std::endl;
-    }
-    std::cout << "--- snd_shcomp ---" << std::endl;
-    for (size_t i=0; i<sendShared.size(); i++) {
-        std::cout << "shared[" << i << "]=" << sendShared[i] << std::endl;
-    }
-    std::cout << "--- dofMap ---" << std::endl;
-    for (size_t i=0; i<m_dofMap.size(); i++) {
-        std::cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << std::endl;
-    }
-    */
-}
-
 RankVector MultiBrick::getOwnerVector(int fsType) const
 {
     if (m_subdivisions != 1)
diff --git a/ripley/src/MultiBrick.h b/ripley/src/MultiBrick.h
index d222efe..2b7cb5c 100644
--- a/ripley/src/MultiBrick.h
+++ b/ripley/src/MultiBrick.h
@@ -27,7 +27,7 @@ namespace ripley {
 */
 class RIPLEY_DLL_API MultiBrick: public Brick
 {
-    friend class DefaultAssembler3D;
+    template<class Scalar> friend class DefaultAssembler3D;
     friend class WaveAssembler3D;
     friend class LameAssembler3D;
 public:
@@ -98,10 +98,8 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
 
-#ifdef USE_BOOSTIO
     virtual void readBinaryGridFromZipped(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#endif
 
     /**
     */
@@ -186,9 +184,8 @@ protected:
     virtual void interpolateReducedToElementsFiner(const escript::Data& source, escript::Data& target, const MultiBrick& other) const;
     virtual void interpolateReducedToReducedFiner(const escript::Data& source, escript::Data& target, const MultiBrick& other) const;
 
-    void populateSampleIds();
-    void populateDofMap();
-    std::vector<IndexVector> getConnections() const;
+    virtual void populateSampleIds();
+    virtual std::vector<IndexVector> getConnections(bool includeShared) const;
 
     dim_t findNode(const double *coords) const;
 
diff --git a/ripley/src/MultiRectangle.cpp b/ripley/src/MultiRectangle.cpp
index 93b2f0e..f0e7550 100644
--- a/ripley/src/MultiRectangle.cpp
+++ b/ripley/src/MultiRectangle.cpp
@@ -14,20 +14,20 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-
 #include <ripley/MultiRectangle.h>
 #include <ripley/blocktools.h>
 #include <ripley/domainhelpers.h>
+
 #include <escript/DataFactory.h>
 #include <escript/FunctionSpaceFactory.h>
+#include <escript/index.h>
 
 #define FIRST_QUAD 0.21132486540518711775
 #define SECOND_QUAD 0.78867513459481288225
 
 #include <algorithm>
 #include <iomanip>
+#include <iostream>
 #include <limits>
 
 using std::vector;
@@ -446,7 +446,6 @@ void MultiRectangle::readBinaryGrid(escript::Data& out, string filename,
     Rectangle::readBinaryGrid(out, filename, params);
 }
 
-#ifdef USE_BOOSTIO
 void MultiRectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
                                const ReaderParameters& params) const
 {
@@ -454,7 +453,6 @@ void MultiRectangle::readBinaryGridFromZipped(escript::Data& out, string filenam
         throw RipleyException("Non-parent MultiRectangles cannot read datafiles");
     Rectangle::readBinaryGridFromZipped(out, filename, params);
 }
-#endif
 
 void MultiRectangle::writeBinaryGrid(const escript::Data& in, string filename,
                                 int byteOrder, int dataType) const
@@ -473,66 +471,100 @@ void MultiRectangle::dump(const string& fileName) const
 
 void MultiRectangle::populateDofMap()
 {
+    // build node distribution vector first.
+    // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes.
+    // Unlike regular ripley domains this is NOT constant for all ranks so
+    // we do an Allgather (we could have also computed per rank but it's a bit
+    // involved)
+    m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
+    dim_t numDOF = getNumDOF();
+    if (m_mpiInfo->size > 1) {
+#if ESYS_MPI
+        MPI_Allgather(&numDOF, 1, MPI_DIM_T, &m_nodeDistribution[0], 1,
+                      MPI_DIM_T, m_mpiInfo->comm);
+
+        // accumulate
+        dim_t accu = 0;
+        for (int rank = 0; rank < m_mpiInfo->size; rank++) {
+            const dim_t n = m_nodeDistribution[rank];
+            m_nodeDistribution[rank] = accu;
+            accu += n;
+        }
+        ESYS_ASSERT(accu == getNumDataPointsGlobal(),
+                "something went wrong computing the DOF distribution!");
+
+        m_nodeDistribution[m_mpiInfo->size] = accu;
+#endif
+    } else {
+        m_nodeDistribution[m_mpiInfo->size] = numDOF;
+    }
+
+    // degrees of freedom are numbered from left to right, bottom to top in
+    // each rank, continuing on the next rank (ranks also go left-right,
+    // bottom-top).
+    // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
+    // helps when writing out data rank after rank.
+
+    try {
+        m_nodeId.assign(getNumNodes(), -1);
+        m_dofMap.assign(getNumNodes(), -1);
+        m_dofId.assign(numDOF, -1);
+    } catch (const std::length_error& le) {
+        throw RipleyException("The system does not have sufficient memory for a domain of this size.");
+    }
+
     const index_t left = getFirstInDim(0);
     const index_t bottom = getFirstInDim(1);
     const dim_t nDOF0 = getNumDOFInAxis(0);
     const dim_t nDOF1 = getNumDOFInAxis(1);
-    // populate node->DOF mapping with own degrees of freedom.
-    // The rest is assigned in the loop further down
-    m_dofMap.assign(getNumNodes(), -7);
+    // populate node->DOF mapping, DOF IDs and own node IDs.
+    // The rest of the node IDs are communicated further down.
 #pragma omp parallel for
-    for (index_t i=bottom; i<bottom+nDOF1; i++) {
-        for (index_t j=left; j<left+nDOF0; j++) {
-            m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
+    for (dim_t i=0; i<nDOF1; i++) {
+        for (dim_t j=0; j<nDOF0; j++) {
+            const index_t nodeIdx = j+left + (i+bottom)*m_NN[0];
+            const index_t dofIdx = j + i*nDOF0;
+            m_dofMap[nodeIdx] = dofIdx;
+            m_dofId[dofIdx] = m_nodeId[nodeIdx]
+                = m_nodeDistribution[m_mpiInfo->rank] + dofIdx;
         }
     }
 
-    // build list of shared components and neighbours by looping through
-    // all potential neighbouring ranks and checking if positions are
-    // within bounds
-    const dim_t numDOF=nDOF0*nDOF1;
-    m_colIndices.clear();
-    m_rowIndices.clear();
-    m_colIndices.resize(numDOF);
-    m_rowIndices.resize(getNumNodes() - numDOF);
+    // build list of shared components and neighbours
+    m_colIndices.assign(numDOF, IndexVector());
+    m_rowIndices.assign(getNumNodes() - numDOF, IndexVector());
 
     RankVector neighbour;
     IndexVector offsetInSharedSend(1,0);
     IndexVector offsetInSharedRecv(1,0);
     IndexVector sendShared, recvShared;
-    const int x=m_mpiInfo->rank%m_NX[0];
-    const int y=m_mpiInfo->rank/m_NX[0];
+    const int x = m_mpiInfo->rank%m_NX[0];
+    const int y = m_mpiInfo->rank/m_NX[0];
     // numShared will contain the number of shared DOFs after the following
     // blocks
-    dim_t numShared=0;
+    dim_t numShared = 0;
     // sharing bottom edge
     if (y > 0) {
         neighbour.push_back((y-1)*m_NX[0] + x);
         //joining edge, send and recv
         offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF0);
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0*m_subdivisions);
-        for (dim_t i=0; i < nDOF0; i++, numShared++) {
+        // add to send only
+        for (dim_t i=0; i < nDOF0; i++) {
             sendShared.push_back(i);
-            recvShared.push_back(numDOF+numShared);
-            m_dofMap[i+left]=numDOF + numShared;
-            const dim_t ind = i;
-            if (i > 0)
-                doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);
-            doublyLink(m_colIndices, m_rowIndices, ind, numShared);
-            if (i < nDOF0 - 1)
-                doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);
         }
     
-        for (unsigned sy = 1; sy < m_subdivisions; sy++) {
-            for (dim_t i=0; i < nDOF0; i++, numShared++) {
-                recvShared.push_back(numDOF+numShared);
-                m_dofMap[left + i + sy*m_NN[0]] = numDOF + numShared;
-                const dim_t ind = i;
+        for (unsigned sy = 0; sy < m_subdivisions; sy++) {
+            for (index_t i = 0; i < nDOF0; i++, numShared++) {
+                const index_t nodeIdx = left + i + sy*m_NN[0];
+                const index_t dofIdx = i;
+                recvShared.push_back(nodeIdx);
+                m_dofMap[nodeIdx] = numDOF + numShared;
                 if (i > 0)
-                    doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);
-                doublyLink(m_colIndices, m_rowIndices, ind, numShared);
+                    doublyLink(m_colIndices, m_rowIndices, dofIdx - 1, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
                 if (i < nDOF0 - 1)
-                    doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);
+                    doublyLink(m_colIndices, m_rowIndices, dofIdx + 1, numShared);
             }
         }
     }
@@ -543,19 +575,20 @@ void MultiRectangle::populateDofMap()
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0);
         // add to send only
         for (unsigned sy = 0; sy < m_subdivisions; sy++) {
-            for (dim_t i=0; i < nDOF0; i++) {
+            for (index_t i = 0; i < nDOF0; i++) {
                 sendShared.push_back(numDOF-nDOF0*(m_subdivisions - sy) + i);
             }
         }
-        for (dim_t i=0; i < nDOF0; i++, numShared++) {
-            recvShared.push_back(numDOF+numShared);
-            m_dofMap[m_NN[0]*(m_NN[1]-1)+left+i]=numDOF+numShared;
-            const dim_t ind = numDOF-nDOF0+i;
+        for (index_t i = 0; i < nDOF0; i++, numShared++) {
+            const index_t nodeIdx = left + i + m_NN[0]*(m_NN[1]-1);
+            const index_t dofIdx = numDOF - nDOF0 + i;
+            recvShared.push_back(nodeIdx);
+            m_dofMap[nodeIdx] = numDOF+numShared;
             if (i > 0)
-                doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);
-            doublyLink(m_colIndices, m_rowIndices, ind, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx - 1, numShared);
+            doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
             if (i < nDOF0 - 1)
-                doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx + 1, numShared);
         }
     }
     // sharing left edge
@@ -563,26 +596,19 @@ void MultiRectangle::populateDofMap()
         neighbour.push_back(y*m_NX[0] + x-1);
         offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1);
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1*m_subdivisions);
-        for (dim_t i=0; i < nDOF1; i++, numShared++) {
-            for (unsigned sx = 0; sx < m_subdivisions - 1; sx++, numShared++) {
-                recvShared.push_back(numDOF+numShared);
-                m_dofMap[(bottom+i)*m_NN[0] + sx] = numDOF + numShared;
-                const dim_t ind = i*nDOF0;
+        for (index_t i = 0; i < nDOF1; i++) {
+            const index_t dofIdx = i*nDOF0;
+            sendShared.push_back(dofIdx);
+            for (unsigned sx = 0; sx < m_subdivisions; sx++, numShared++) {
+                const index_t nodeIdx = (bottom+i)*m_NN[0] + sx;
+                recvShared.push_back(nodeIdx);
+                m_dofMap[nodeIdx] = numDOF + numShared;
                 if (i > 0)
-                    doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);
-                doublyLink(m_colIndices, m_rowIndices, ind, numShared);
+                    doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
                 if (i < nDOF1 - 1)
-                    doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);
+                    doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);
             }
-            sendShared.push_back(i*nDOF0);
-            recvShared.push_back(numDOF + numShared);
-            m_dofMap[(bottom+i)*m_NN[0] + m_subdivisions - 1]=numDOF + numShared;
-            const dim_t ind = i*nDOF0;
-            if (i > 0)
-                doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);
-            doublyLink(m_colIndices, m_rowIndices, ind, numShared);
-            if (i < nDOF1 - 1)
-                doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);
         }
     }
     // sharing right edge
@@ -590,22 +616,23 @@ void MultiRectangle::populateDofMap()
         neighbour.push_back(y*m_NX[0] + x+1);
         offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1*m_subdivisions);
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1);
-        for (dim_t i=0; i < nDOF1; i++, numShared++) {
+        for (index_t i = 0; i < nDOF1; i++, numShared++) {
             for (unsigned sx = 0; sx < m_subdivisions - 1; sx++) {
                 sendShared.push_back((i+1)*nDOF0-(m_subdivisions - sx));
             }
-            sendShared.push_back((i+1)*nDOF0-1);
-            recvShared.push_back(numDOF+numShared);
-            m_dofMap[(bottom+1+i)*m_NN[0]- 1]=numDOF+numShared;
-            const dim_t ind = (i+1)*nDOF0 - 1;
+            const index_t nodeIdx = (bottom+1+i)*m_NN[0] - 1;
+            const index_t dofIdx = (i+1)*nDOF0 - 1;
+            sendShared.push_back(dofIdx);
+            recvShared.push_back(nodeIdx);
+            m_dofMap[nodeIdx] = numDOF + numShared;
             if (i > 0)
-                doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);
-            doublyLink(m_colIndices, m_rowIndices, ind, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
+            doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
             if (i < nDOF1 - 1)
-                doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);
         }
     }
-    // sharing bottom-left node
+    // sharing bottom-left block
     if (x > 0 && y > 0) {
         neighbour.push_back((y-1)*m_NX[0] + x-1);
         // sharing a node
@@ -613,39 +640,43 @@ void MultiRectangle::populateDofMap()
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions*m_subdivisions);
         for (unsigned sy = 0; sy < m_subdivisions; sy++) {
             for (unsigned sx = 0; sx < m_subdivisions; sx++, numShared++) {
-                m_dofMap[sx + sy*m_NN[0]] = numDOF + numShared;
-                recvShared.push_back(numDOF+numShared);
+                const index_t nodeIdx = sx + sy*m_NN[0];
+                m_dofMap[nodeIdx] = numDOF + numShared;
+                recvShared.push_back(nodeIdx);
                 doublyLink(m_colIndices, m_rowIndices, 0, numShared);
             }
         }
         sendShared.push_back(0);
     }
-    // sharing top-left node
+    // sharing top-left block
     if (x > 0 && y < m_NX[1]-1) {
         neighbour.push_back((y+1)*m_NX[0] + x-1);
         offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);
         for (int s = 0; s < m_subdivisions; s++, numShared++) {
-            sendShared.push_back(numDOF - (m_subdivisions - s)*nDOF0);
-            recvShared.push_back(numDOF + numShared);
-            m_dofMap[m_NN[0]*(m_NN[1]-1) + s] = numDOF + numShared;
+            const index_t nodeIdx = m_NN[0]*(m_NN[1]-1) + s;
+            const index_t dofIdx = numDOF - (m_subdivisions - s)*nDOF0;
+            sendShared.push_back(dofIdx);
+            recvShared.push_back(nodeIdx);
+            m_dofMap[nodeIdx] = numDOF + numShared;
             if (s > 0)
-                doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s + 1)*nDOF0, numShared);
-            doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s)*nDOF0, numShared);
+                doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
+            doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
             if (s < m_subdivisions - 1)
-                doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s - 1)*nDOF0, numShared);            
+                doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);            
         }
     }
-    // sharing bottom-right node
+    // sharing bottom-right block
     if (x < m_NX[0]-1 && y > 0) {
         neighbour.push_back((y-1)*m_NX[0] + x+1);
         offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);
         offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);
         for (int s = 0; s < m_subdivisions; s++, numShared++) {
-            recvShared.push_back(numDOF+numShared);
-            m_dofMap[(s+1)*m_NN[0] - 1] = numDOF + numShared;
-            sendShared.push_back(nDOF0-(m_subdivisions-s));
+            const index_t nodeIdx = (s+1)*m_NN[0] - 1;
             const dim_t ind = nDOF0 - (m_subdivisions - s);
+            recvShared.push_back(nodeIdx);
+            m_dofMap[nodeIdx] = numDOF + numShared;
+            sendShared.push_back(ind);
             if (s > 0)
                 doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);
             doublyLink(m_colIndices, m_rowIndices, ind, numShared);
@@ -653,7 +684,7 @@ void MultiRectangle::populateDofMap()
                 doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);
         }
     }
-    // sharing top-right node
+    // sharing top-right block
     if (x < m_NX[0]-1 && y < m_NX[1]-1) {
         neighbour.push_back((y+1)*m_NX[0] + x+1);
         offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions*m_subdivisions);
@@ -663,55 +694,68 @@ void MultiRectangle::populateDofMap()
                 sendShared.push_back(numDOF-(m_subdivisions - sy - 1)*nDOF0 - (m_subdivisions - sx));
             }
         }
-        recvShared.push_back(numDOF+numShared);
-        m_dofMap[m_NN[0]*m_NN[1]-1]=numDOF+numShared;
+        const dim_t nodeIdx = m_NN[0]*m_NN[1] - 1;
+        recvShared.push_back(nodeIdx);
+        m_dofMap[nodeIdx] = numDOF+numShared;
         doublyLink(m_colIndices, m_rowIndices, numDOF-1, numShared);
         ++numShared;
     }
 
-    // TODO: paso::SharedComponents should take vectors to avoid this
-    Esys_MPI_rank* neighPtr = NULL;
-    index_t* sendPtr = NULL;
-    index_t* recvPtr = NULL;
-    if (neighbour.size() > 0) {
-        neighPtr = &neighbour[0];
-        sendPtr = &sendShared[0];
-        recvPtr = &recvShared[0];
-    }
-    // create connector
-    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, sendPtr,
-            &offsetInSharedSend[0], 1, 0, m_mpiInfo));
-    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, recvPtr,
-            &offsetInSharedRecv[0], 1, 0, m_mpiInfo));
-    m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+#ifdef ESYS_MPI
+    if (m_mpiInfo->size > 1) {
+        // now send off shared DOF IDs so nodeId will become a global node
+        // labelling.
+        const dim_t numSend = offsetInSharedSend.back();
+        const dim_t numRecv = offsetInSharedRecv.back();
+        IndexVector recvBuffer(numRecv);
+        IndexVector sendBuffer(numSend);
+        std::vector<MPI_Request> reqs(2*neighbour.size());
+        std::vector<MPI_Status> stats(2*neighbour.size());
+
+        // prepare the send buffer
+#pragma omp parallel for
+        for (index_t i = 0; i < numSend; ++i) {
+            sendBuffer[i] = m_dofId[sendShared[i]];
+        }
+        for (index_t i = 0; i < neighbour.size(); i++) {
+            MPI_Irecv(&recvBuffer[offsetInSharedRecv[i]],
+                    offsetInSharedRecv[i+1] - offsetInSharedRecv[i],
+                    MPI_DIM_T, neighbour[i],
+                    m_mpiInfo->counter()+neighbour[i],
+                    m_mpiInfo->comm, &reqs[2*i]);
+            MPI_Issend(&sendBuffer[offsetInSharedSend[i]],
+                    offsetInSharedSend[i+1] - offsetInSharedSend[i],
+                    MPI_DIM_T, neighbour[i],
+                    m_mpiInfo->counter()+m_mpiInfo->rank, m_mpiInfo->comm,
+                    &reqs[2*i+1]);
+        }
+        m_mpiInfo->incCounter(m_mpiInfo->size);
+
+        // do something else here...
+
+        MPI_Waitall(2*neighbour.size(), &reqs[0], &stats[0]);
+
+        // now populate rest of node IDs
+#pragma omp parallel for
+        for (index_t i=0; i < numRecv; i++) {
+            const index_t nodeIdx = recvShared[i];
+            m_nodeId[nodeIdx] = recvBuffer[m_dofMap[nodeIdx]-numDOF];
+        }
+    }
+#endif // ESYS_MPI
+
+#ifdef ESYS_HAVE_PASO
+    createPasoConnector(neighbour, offsetInSharedSend, offsetInSharedRecv,
+                        sendShared, recvShared);
+#endif
 }
 
 void MultiRectangle::populateSampleIds()
 {
-    // degrees of freedom are numbered from left to right, bottom to top in
-    // each rank, continuing on the next rank (ranks also go left-right,
-    // bottom-top).
-    // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
-    // helps when writing out data rank after rank.
+    // label nodes and DOF first
+    populateDofMap();
 
-    // build node distribution vector first.
-    // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
-    // constant for all ranks in this implementation
-    m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
-    const dim_t numDOF=getNumDOF();
-    for (dim_t k=1; k<m_mpiInfo->size; k++) {
-        m_nodeDistribution[k]=k*numDOF;
-    }
-    m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
-    try {
-        m_nodeId.resize(getNumNodes());
-        m_dofId.resize(numDOF);
-        m_elementId.resize(getNumElements());
-    } catch (const std::length_error& le) {
-        throw RipleyException("The system does not have sufficient memory for a domain of this size.");
-    }
+    m_elementId.assign(getNumElements(), -1);
 
     // populate face element counts
     //left
@@ -736,89 +780,24 @@ void MultiRectangle::populateSampleIds()
         m_faceCount[3]=0;
 
     const dim_t NFE = getNumFaceElements();
-    m_faceId.resize(NFE);
-
-    const index_t left = getFirstInDim(0);
-    const index_t bottom = getFirstInDim(1);
-    const dim_t nDOF0 = getNumDOFInAxis(0);
-    const dim_t nDOF1 = getNumDOFInAxis(1);
     const dim_t NE0 = m_NE[0];
     const dim_t NE1 = m_NE[1];
-
-#define globalNodeId(x,y) \
-    ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
-    + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
-
-    // set corner id's outside the parallel region
-    m_nodeId[0] = globalNodeId(0, 0);
-    m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
-    m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
-    m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
-#undef globalNodeId
+    m_faceId.resize(NFE);
 
 #pragma omp parallel
     {
-        // populate degrees of freedom and own nodes (identical id)
+        // populate element IDs
 #pragma omp for nowait
-        for (dim_t i=0; i<nDOF1; i++) {
-            for (dim_t j=0; j<nDOF0; j++) {
-                const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
-                const index_t dofIdx=j+i*nDOF0;
-                m_dofId[dofIdx] = m_nodeId[nodeIdx]
-                    = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
-            }
-        }
-
-        // populate the rest of the nodes (shared with other ranks)
-        if (m_faceCount[0]==0) { // left column
-#pragma omp for nowait
-            for (dim_t i=0; i<nDOF1; i++) {
-                const index_t nodeIdx=(i+bottom)*m_NN[0];
-                const index_t dofId=(i+1)*nDOF0-1;
-                m_nodeId[nodeIdx]
-                    = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
-            }
-        }
-        if (m_faceCount[1]==0) { // right column
-#pragma omp for nowait
-            for (dim_t i=0; i<nDOF1; i++) {
-                const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
-                const index_t dofId=i*nDOF0;
-                m_nodeId[nodeIdx]
-                    = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
-            }
-        }
-        if (m_faceCount[2]==0) { // bottom row
-#pragma omp for nowait
-            for (dim_t i=0; i<nDOF0; i++) {
-                const index_t nodeIdx=i+left;
-                const index_t dofId=nDOF0*(nDOF1-1)+i;
-                m_nodeId[nodeIdx]
-                    = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
-            }
-        }
-        if (m_faceCount[3]==0) { // top row
-#pragma omp for nowait
-            for (dim_t i=0; i<nDOF0; i++) {
-                const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
-                const index_t dofId=i;
-                m_nodeId[nodeIdx]
-                    = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
-            }
-        }
-
-        // populate element id's
-#pragma omp for nowait
-        for (dim_t i1=0; i1<NE1; i1++) {
-            for (dim_t i0=0; i0<NE0; i0++) {
+        for (index_t i1 = 0; i1 < NE1; i1++) {
+            for (index_t i0 = 0; i0 < NE0; i0++) {
                 m_elementId[i0+i1*NE0]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
             }
         }
 
         // face elements
 #pragma omp for
-        for (dim_t k=0; k<NFE; k++)
-            m_faceId[k]=k;
+        for (index_t k = 0; k < NFE; k++)
+            m_faceId[k] = k;
     } // end parallel section
 
     m_nodeTags.assign(getNumNodes(), 0);
@@ -832,11 +811,11 @@ void MultiRectangle::populateSampleIds()
     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
     m_faceOffset.assign(4, -1);
     m_faceTags.clear();
-    index_t offset=0;
+    index_t offset = 0;
     for (size_t i=0; i<4; i++) {
-        if (m_faceCount[i]>0) {
+        if (m_faceCount[i] > 0) {
             m_faceOffset[i]=offset;
-            offset+=m_faceCount[i];
+            offset += m_faceCount[i];
             m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
         }
     }
@@ -846,39 +825,39 @@ void MultiRectangle::populateSampleIds()
     setTagMap("top", TOP);
     updateTagsInUse(FaceElements);
 
-    populateDofMap();
 }
 
+#ifdef ESYS_HAVE_PASO
 paso::SystemMatrixPattern_ptr MultiRectangle::getPasoMatrixPattern(
                                                     bool reducedRowOrder,
                                                     bool reducedColOrder) const
 {
-    if (m_pattern.get())
-        return m_pattern;
-
-    // first call - create pattern, then return
-    const dim_t numDOF = getNumDOF();
-    const dim_t numShared = getNumNodes() - numDOF;
+    if (!m_pattern) {
+        // first call - create pattern, then return
+        const dim_t numDOF = getNumDOF();
+        const dim_t numShared = getNumNodes() - numDOF;
 #pragma omp parallel for
-    for (dim_t i = 0; i < numShared; i++) {
-        sort(m_rowIndices[i].begin(), m_rowIndices[i].end());
-    }
+        for (index_t i = 0; i < numShared; i++) {
+            sort(m_rowIndices[i].begin(), m_rowIndices[i].end());
+        }
 
-    // create main and couple blocks
-    paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);
-    paso::Pattern_ptr colPattern = createPasoPattern(m_colIndices, numShared);
-    paso::Pattern_ptr rowPattern = createPasoPattern(m_rowIndices, numDOF);
+        // create main and couple blocks
+        paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);
+        paso::Pattern_ptr colPattern = createPasoPattern(m_colIndices, numShared);
+        paso::Pattern_ptr rowPattern = createPasoPattern(m_rowIndices, numDOF);
 
-    // allocate paso distribution
-    paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,
-            const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0));
+        // allocate Paso distribution
+        escript::Distribution_ptr distribution(new escript::Distribution(
+                                               m_mpiInfo, m_nodeDistribution));
 
-    // finally create the system matrix pattern
-    m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
-            distribution, distribution, mainPattern, colPattern, rowPattern,
-            m_connector, m_connector));
+        // finally create the system matrix pattern
+        m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
+                distribution, distribution, mainPattern, colPattern, rowPattern,
+                getPasoConnector(), getPasoConnector()));
+    }
     return m_pattern;
 }
+#endif
 
 RankVector MultiRectangle::getOwnerVector(int fsType) const
 {
diff --git a/ripley/src/MultiRectangle.h b/ripley/src/MultiRectangle.h
index 9a27e22..d9accad 100644
--- a/ripley/src/MultiRectangle.h
+++ b/ripley/src/MultiRectangle.h
@@ -27,7 +27,7 @@ namespace ripley {
 */
 class RIPLEY_DLL_API MultiRectangle: public Rectangle
 {
-    friend class DefaultAssembler2D;
+    template<class Scalar> friend class DefaultAssembler2D;
     friend class WaveAssembler2D;
     friend class LameAssembler2D;
 public:
@@ -98,10 +98,8 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
 
-#ifdef USE_BOOSTIO
     virtual void readBinaryGridFromZipped(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#endif
 
     /**
     */
@@ -131,9 +129,11 @@ protected:
 
     virtual void interpolateReducedToElementsFiner(const escript::Data& source, escript::Data& target, const MultiRectangle& other) const;
     virtual void interpolateReducedToReducedFiner(const escript::Data& source, escript::Data& target, const MultiRectangle& other) const;
+#ifdef ESYS_HAVE_PASO
     virtual paso::SystemMatrixPattern_ptr getPasoMatrixPattern(
                                                     bool reducedRowOrder,
                                                     bool reducedColOrder) const;
+#endif
     virtual index_t getFirstInDim(unsigned axis) const;
     virtual void populateSampleIds();
     virtual dim_t getNumDOFInAxis(unsigned axis) const;
@@ -157,7 +157,7 @@ inline dim_t MultiRectangle::getNumDOF() const
 //protected
 inline dim_t MultiRectangle::getNumDOFInAxis(unsigned axis) const
 {
-    EsysAssert((axis < m_numDim), "Invalid axis");
+    ESYS_ASSERT(axis < m_numDim, "Invalid axis");
     dim_t res = m_ownNE[axis] + 1;
     if (m_offset[axis] + m_NE[axis] < m_gNE[axis]) {
         res--;
diff --git a/ripley/src/Rectangle.cpp b/ripley/src/Rectangle.cpp
index c75a147..d3c8bf8 100644
--- a/ripley/src/Rectangle.cpp
+++ b/ripley/src/Rectangle.cpp
@@ -14,13 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-#include <esysUtils/esysFileWriter.h>
-#include <esysUtils/EsysRandom.h>
-
-#include <paso/SystemMatrix.h>
-
 #include <ripley/Rectangle.h>
 #include <ripley/DefaultAssembler2D.h>
 #include <ripley/LameAssembler2D.h>
@@ -28,11 +21,19 @@
 #include <ripley/blocktools.h>
 #include <ripley/domainhelpers.h>
 
-#ifdef USE_NETCDF
+#include <escript/FileWriter.h>
+#include <escript/index.h>
+#include <escript/Random.h>
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#endif
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #ifdef ESYS_MPI
 #include <pmpio.h>
@@ -47,8 +48,11 @@
 
 namespace bp = boost::python;
 namespace bm = boost::math;
-using esysUtils::FileWriter;
 using escript::AbstractSystemMatrix;
+using escript::FileWriter;
+using escript::IOError;
+using escript::NotImplementedError;
+using escript::ValueError;
 using std::vector;
 using std::string;
 using std::min;
@@ -73,7 +77,7 @@ Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
                 "limit may be raised in future releases.");
 
     if (n0 <= 0 || n1 <= 0)
-        throw RipleyException("Number of elements in each spatial dimension "
+        throw ValueError("Number of elements in each spatial dimension "
                 "must be positive");
 
     // ignore subdivision parameters for serial run
@@ -82,7 +86,7 @@ Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
         d1=1;
     }
 
-    bool warn=false;
+    bool warn = false;
     vector<int> factors;
     int ranks = m_mpiInfo->size;
     dim_t epr[2] = {n0,n1};
@@ -96,7 +100,7 @@ Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
             epr[i] = -1; // can no longer be max
             //remove
             if (ranks % d[i] != 0) {
-                throw RipleyException("Invalid number of spatial subdivisions");
+                throw ValueError("Invalid number of spatial subdivisions");
             }
             ranks /= d[i];
         }
@@ -118,7 +122,7 @@ Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
     // ensure number of subdivisions is valid and nodes can be distributed
     // among number of ranks
     if (d0*d1 != m_mpiInfo->size)
-        throw RipleyException("Invalid number of spatial subdivisions");
+        throw ValueError("Invalid number of spatial subdivisions");
 
     if (warn) {
         std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
@@ -130,22 +134,47 @@ Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
     m_dx[0] = l0/n0;
     m_dx[1] = l1/n1;
 
+    warn = false;
     if ((n0+1)%d0 > 0) {
-        n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
-        l0=m_dx[0]*n0;
-        std::cout << "Warning: Adjusted number of elements and length. N0="
-            << n0 << ", l0=" << l0 << std::endl;
+        switch (getDecompositionPolicy()) {
+            case DECOMP_EXPAND:
+                l0 = m_dx[0]*n0; // fall through
+            case DECOMP_ADD_ELEMENTS:
+                n0 = (dim_t)round((float)(n0+1)/d0+0.5)*d0-1; // fall through
+            case DECOMP_STRICT:
+                warn = true;
+                break;
+        }
     }
     if ((n1+1)%d1 > 0) {
-        n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
-        l1=m_dx[1]*n1;
-        std::cout << "Warning: Adjusted number of elements and length. N1="
-            << n1 << ", l1=" << l1 << std::endl;
+        switch (getDecompositionPolicy()) {
+            case DECOMP_EXPAND:
+                l1 = m_dx[1]*n1; // fall through
+            case DECOMP_ADD_ELEMENTS:
+                n1 = (dim_t)round((float)(n1+1)/d1+0.5)*d1-1; // fall through
+            case DECOMP_STRICT:
+                warn = true;
+                break;
+        }
     }
 
     if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
-        throw RipleyException("Too few elements for the number of ranks");
+        throw ValueError("Too few elements for the number of ranks");
 
+    if (warn) {
+        if (getDecompositionPolicy() == DECOMP_STRICT) {
+            throw ValueError("Unable to decompose domain to the number of "
+                    "MPI ranks without adding elements and the policy "
+                    "is set to STRICT. Use setDecompositionPolicy() "
+                    "to allow adding elements.");
+        } else {
+            std::cout << "Warning: Domain setup has been adjusted as follows "
+                    "to allow decomposition into " << m_mpiInfo->size
+                    << " MPI ranks:" << std::endl
+                    << "    N0=" << n0 << ", l0=" << l0 << std::endl
+                    << "    N1=" << n1 << ", l1=" << l1 << std::endl;
+        }
+    }
     m_gNE[0] = n0;
     m_gNE[1] = n1;
     m_origin[0] = x0;
@@ -215,7 +244,7 @@ bool Rectangle::operator==(const escript::AbstractDomain& other) const
 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
             const ReaderParameters& params) const
 {
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
     // check destination function space
     dim_t myN0, myN1;
     if (out.getFunctionSpace().getTypeCode() == Nodes) {
@@ -226,35 +255,35 @@ void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
         myN0 = m_NE[0];
         myN1 = m_NE[1];
     } else
-        throw RipleyException("readNcGrid(): invalid function space for output data object");
+        throw ValueError("readNcGrid(): invalid function space for output data object");
 
     if (params.first.size() != 2)
-        throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
+        throw ValueError("readNcGrid(): argument 'first' must have 2 entries");
 
     if (params.numValues.size() != 2)
-        throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
+        throw ValueError("readNcGrid(): argument 'numValues' must have 2 entries");
 
     if (params.multiplier.size() != 2)
-        throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
+        throw ValueError("readNcGrid(): argument 'multiplier' must have 2 entries");
     for (size_t i=0; i<params.multiplier.size(); i++)
         if (params.multiplier[i]<1)
-            throw RipleyException("readNcGrid(): all multipliers must be positive");
+            throw ValueError("readNcGrid(): all multipliers must be positive");
     if (params.reverse.size() != 2)
-        throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
+        throw ValueError("readNcGrid(): argument 'reverse' must have 2 entries");
 
     // check file existence and size
     NcFile f(filename.c_str(), NcFile::ReadOnly);
     if (!f.is_valid())
-        throw RipleyException("readNcGrid(): cannot open file");
+        throw IOError("readNcGrid(): cannot open file");
 
     NcVar* var = f.get_var(varname.c_str());
     if (!var)
-        throw RipleyException("readNcGrid(): invalid variable");
+        throw IOError("readNcGrid(): invalid variable");
 
     // TODO: rank>0 data support
     const int numComp = out.getDataPointSize();
     if (numComp > 1)
-        throw RipleyException("readNcGrid(): only scalar data supported");
+        throw NotImplementedError("readNcGrid(): only scalar data supported");
 
     const int dims = var->num_dims();
     boost::scoped_array<long> edges(var->edges());
@@ -263,7 +292,7 @@ void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
     // note the expected ordering of edges (as in numpy: y,x)
     if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
             || (dims==1 && params.numValues[1]>1) ) {
-        throw RipleyException("readNcGrid(): not enough data in file");
+        throw IOError("readNcGrid(): not enough data in file");
     }
 
     // check if this rank contributes anything
@@ -349,14 +378,14 @@ void Rectangle::readBinaryGrid(escript::Data& out, string filename,
             readBinaryGridImpl<double>(out, filename, params);
             break;
         default:
-            throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
+            throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
     }
 }
 
-#ifdef USE_BOOSTIO
 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
                                const ReaderParameters& params) const
 {
+#ifdef ESYS_HAVE_BOOST_IO
     // the mapping is not universally correct but should work on our
     // supported platforms
     switch (params.dataType) {
@@ -370,10 +399,12 @@ void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
             readBinaryGridZippedImpl<double>(out, filename, params);
             break;
         default:
-            throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
+            throw ValueError("readBinaryGridFromZipped(): invalid or unsupported datatype");
     }
-}
+#else
+    throw ValueError("readBinaryGridFromZipped(): not compiled with zip support");
 #endif
+}
 
 template<typename ValueType>
 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
@@ -389,26 +420,26 @@ void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
         myN0 = m_NE[0];
         myN1 = m_NE[1];
     } else
-        throw RipleyException("readBinaryGrid(): invalid function space for output data object");
+        throw ValueError("readBinaryGrid(): invalid function space for output data object");
 
     if (params.first.size() != 2)
-        throw RipleyException("readBinaryGrid(): argument 'first' must have 2 entries");
+        throw ValueError("readBinaryGrid(): argument 'first' must have 2 entries");
 
     if (params.numValues.size() != 2)
-        throw RipleyException("readBinaryGrid(): argument 'numValues' must have 2 entries");
+        throw ValueError("readBinaryGrid(): argument 'numValues' must have 2 entries");
 
     if (params.multiplier.size() != 2)
-        throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 2 entries");
+        throw ValueError("readBinaryGrid(): argument 'multiplier' must have 2 entries");
     for (size_t i=0; i<params.multiplier.size(); i++)
         if (params.multiplier[i]<1)
-            throw RipleyException("readBinaryGrid(): all multipliers must be positive");
+            throw ValueError("readBinaryGrid(): all multipliers must be positive");
     if (params.reverse[0] != 0 || params.reverse[1] != 0)
-        throw RipleyException("readBinaryGrid(): reversing not supported yet");
+        throw NotImplementedError("readBinaryGrid(): reversing not supported yet");
 
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw RipleyException("readBinaryGrid(): cannot open file");
+        throw IOError("readBinaryGrid(): cannot open file " + filename);
     }
     f.seekg(0, ios::end);
     const int numComp = out.getDataPointSize();
@@ -416,7 +447,7 @@ void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
     const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
     if (filesize < reqsize) {
         f.close();
-        throw RipleyException("readBinaryGrid(): not enough data in file");
+        throw IOError("readBinaryGrid(): not enough data in file");
     }
 
     // check if this rank contributes anything
@@ -499,7 +530,7 @@ void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
     f.close();
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 template<typename ValueType>
 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
                                    const ReaderParameters& params) const
@@ -514,12 +545,12 @@ void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filen
         myN0 = m_NE[0];
         myN1 = m_NE[1];
     } else
-        throw RipleyException("readBinaryGrid(): invalid function space for output data object");
+        throw ValueError("readBinaryGrid(): invalid function space for output data object");
 
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw RipleyException("readBinaryGridFromZipped(): cannot open file");
+        throw IOError("readBinaryGridFromZipped(): cannot open file" + filename);
     }
     f.seekg(0, ios::end);
     const int numComp = out.getDataPointSize();
@@ -532,7 +563,7 @@ void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filen
     filesize = decompressed.size();
     const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
     if (filesize < reqsize) {
-        throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
+        throw IOError("readBinaryGridFromZipped(): not enough data in file");
     }
 
     // check if this rank contributes anything
@@ -609,7 +640,7 @@ void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
             writeBinaryGridImpl<double>(in, filename, byteOrder);
             break;
         default:
-            throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
+            throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
     }
 }
 
@@ -645,13 +676,13 @@ void Rectangle::writeBinaryGridImpl(const escript::Data& in,
         offset0 = m_offset[0];
         offset1 = m_offset[1];
     } else
-        throw RipleyException("writeBinaryGrid(): unsupported function space");
+        throw ValueError("writeBinaryGrid(): unsupported function space");
 
     const int numComp = in.getDataPointSize();
     const int dpp = in.getNumDataPointsPerSample();
 
     if (numComp > 1 || dpp > 1)
-        throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
+        throw NotImplementedError("writeBinaryGrid(): only scalar, single-value data supported");
 
     const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
 
@@ -686,12 +717,12 @@ void Rectangle::writeBinaryGridImpl(const escript::Data& in,
 
 void Rectangle::write(const std::string& filename) const
 {
-    throw RipleyException("write: not supported");
+    throw NotImplementedError("write: not supported");
 }
 
 void Rectangle::dump(const string& fileName) const
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     string fn(fileName);
     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
         fn+=".silo";
@@ -740,7 +771,7 @@ void Rectangle::dump(const string& fileName) const
     }
 
     if (!dbfile)
-        throw RipleyException("dump: Could not create Silo file");
+        throw IOError("dump: Could not create Silo file");
 
     /*
     if (driver==DB_HDF5) {
@@ -830,7 +861,7 @@ void Rectangle::dump(const string& fileName) const
         DBClose(dbfile);
     }
 
-#else // USE_SILO
+#else // ESYS_HAVE_SILO
     throw RipleyException("dump: no Silo support");
 #endif
 }
@@ -858,7 +889,7 @@ const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
 
     std::stringstream msg;
     msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 bool Rectangle::ownSample(int fsType, index_t id) const
@@ -906,13 +937,13 @@ bool Rectangle::ownSample(int fsType, index_t id) const
 
     std::stringstream msg;
     msg << "ownSample: invalid function space type " << fsType;
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 RankVector Rectangle::getOwnerVector(int fsType) const
 {
     RankVector owner;
-    const Esys_MPI_rank rank = m_mpiInfo->rank;
+    const int rank = m_mpiInfo->rank;
 
     if (fsType == Elements || fsType == ReducedElements) {
         owner.assign(getNumElements(), rank);
@@ -943,7 +974,7 @@ RankVector Rectangle::getOwnerVector(int fsType) const
         }
 
     } else {
-        throw RipleyException("getOwnerVector: only valid for element types");
+        throw ValueError("getOwnerVector: only valid for element types");
     }
 
     return owner;
@@ -1050,7 +1081,7 @@ void Rectangle::setToNormal(escript::Data& out) const
         std::stringstream msg;
         msg << "setToNormal: invalid function space type "
             << out.getFunctionSpace().getTypeCode();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 }
 
@@ -1112,7 +1143,7 @@ void Rectangle::setToSize(escript::Data& out) const
         std::stringstream msg;
         msg << "setToSize: invalid function space type "
             << out.getFunctionSpace().getTypeCode();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 }
 
@@ -1137,9 +1168,9 @@ void Rectangle::assembleCoordinates(escript::Data& arg) const
 {
     int numDim = m_numDim;
     if (!arg.isDataPointShapeEqual(1, &numDim))
-        throw RipleyException("setToX: Invalid Data object shape");
+        throw ValueError("setToX: Invalid Data object shape");
     if (!arg.numSamplesEqual(1, getNumNodes()))
-        throw RipleyException("setToX: Illegal number of samples in Data object");
+        throw ValueError("setToX: Illegal number of samples in Data object");
 
     const dim_t NN0 = m_NN[0];
     const dim_t NN1 = m_NN[1];
@@ -1562,29 +1593,18 @@ void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
     }
 }
 
+#ifdef ESYS_HAVE_TRILINOS
 //protected
-void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
+esys_trilinos::const_TrilinosGraph_ptr Rectangle::getTrilinosGraph() const
 {
-    const dim_t numComp = in.getDataPointSize();
-    paso::Coupler_ptr coupler(new paso::Coupler(m_connector, numComp));
-    // expand data object if necessary to be able to grab the whole data
-    const_cast<escript::Data*>(&in)->expand();
-    coupler->startCollect(in.getDataRO());
-
-    const dim_t numDOF = getNumDOF();
-    const dim_t numNodes = getNumNodes();
-    out.requireWrite();
-    const double* buffer = coupler->finishCollect();
-
-#pragma omp parallel for
-    for (index_t i=0; i < numNodes; i++) {
-        const double* src=(m_dofMap[i]<numDOF ?
-                in.getSampleDataRO(m_dofMap[i])
-                : &buffer[(m_dofMap[i]-numDOF)*numComp]);
-        copy(src, src+numComp, out.getSampleDataRW(i));
+    if (m_graph.is_null()) {
+        m_graph = createTrilinosGraph(m_dofId, m_nodeId);
     }
+    return m_graph;
 }
+#endif
 
+#ifdef ESYS_HAVE_PASO
 //protected
 paso::SystemMatrixPattern_ptr Rectangle::getPasoMatrixPattern(
                                                     bool reducedRowOrder,
@@ -1594,11 +1614,12 @@ paso::SystemMatrixPattern_ptr Rectangle::getPasoMatrixPattern(
         return m_pattern;
 
     // first call - create pattern, then return
+    paso::Connector_ptr conn(getPasoConnector());
     const dim_t numDOF = getNumDOF();
-    const dim_t numShared = m_connector->send->numSharedComponents;
-    const dim_t numNeighbours = m_connector->send->numNeighbors;
-    const index_t* offsetInShared = m_connector->send->offsetInShared;
-    const index_t* sendShared = m_connector->send->shared;
+    const dim_t numShared = conn->send->numSharedComponents;
+    const dim_t numNeighbours = conn->send->neighbour.size();
+    const std::vector<index_t>& offsetInShared(conn->send->offsetInShared);
+    const index_t* sendShared = conn->send->shared;
 
     // these are for the couple blocks
     vector<IndexVector> colIndices(numDOF);
@@ -1626,15 +1647,16 @@ paso::SystemMatrixPattern_ptr Rectangle::getPasoMatrixPattern(
     paso::Pattern_ptr rowPattern = createPasoPattern(rowIndices, numDOF);
 
     // allocate paso distribution
-    paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,
-            const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0));
+    escript::Distribution_ptr distribution(new escript::Distribution(
+                                               m_mpiInfo, m_nodeDistribution));
 
     // finally create the system matrix pattern
     m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
             distribution, distribution, mainPattern, colPattern, rowPattern,
-            m_connector, m_connector));
+            conn, conn));
     return m_pattern;
 }
+#endif // ESYS_HAVE_PASO
 
 //private
 void Rectangle::populateSampleIds()
@@ -1799,25 +1821,51 @@ void Rectangle::populateSampleIds()
 }
 
 //private
-vector<IndexVector> Rectangle::getConnections() const
+vector<IndexVector> Rectangle::getConnections(bool includeShared) const
 {
     // returns a vector v of size numDOF where v[i] is a vector with indices
-    // of DOFs connected to i (up to 9 in 2D)
+    // of DOFs connected to i (up to 9 in 2D).
+    // In other words this method returns the occupied (local) matrix columns
+    // for all (local) matrix rows.
+    // If includeShared==true then connections to non-owned DOFs are also
+    // returned (i.e. indices of the column couplings)
     const dim_t nDOF0 = getNumDOFInAxis(0);
     const dim_t nDOF1 = getNumDOFInAxis(1);
-    const dim_t M = nDOF0*nDOF1;
-    vector<IndexVector> indices(M);
-
+    const dim_t numMatrixRows = nDOF0*nDOF1;
+    vector<IndexVector> indices(numMatrixRows);
+
+    if (includeShared) {
+        const index_t left = getFirstInDim(0);
+        const index_t bottom = getFirstInDim(1);
+        const dim_t NN0 = m_NN[0];
+        const dim_t NN1 = m_NN[1];
+#pragma omp parallel for
+        for (index_t i=0; i < numMatrixRows; i++) {
+            const index_t x = left + i % nDOF0;
+            const index_t y = bottom + i / nDOF0;
+            // loop through potential neighbours and add to index if positions
+            // are within bounds
+            for (dim_t i1=y-1; i1<y+2; i1++) {
+                for (dim_t i0=x-1; i0<x+2; i0++) {
+                    if (i0>=0 && i1>=0 && i0<NN0 && i1<NN1) {
+                        indices[i].push_back(m_dofMap[i1*NN0 + i0]);
+                    }
+                }
+            }
+            sort(indices[i].begin(), indices[i].end());
+        }
+    } else {
 #pragma omp parallel for
-    for (index_t i=0; i < M; i++) {
-        const index_t x = i % nDOF0;
-        const index_t y = i / nDOF0;
-        // loop through potential neighbours and add to index if positions are
-        // within bounds
-        for (dim_t i1=y-1; i1<y+2; i1++) {
-            for (dim_t i0=x-1; i0<x+2; i0++) {
-                if (i0>=0 && i1>=0 && i0<nDOF0 && i1<nDOF1) {
-                    indices[i].push_back(i1*nDOF0 + i0);
+        for (index_t i=0; i < numMatrixRows; i++) {
+            const index_t x = i % nDOF0;
+            const index_t y = i / nDOF0;
+            // loop through potential neighbours and add to index if positions
+            // are within bounds
+            for (dim_t i1=y-1; i1<y+2; i1++) {
+                for (dim_t i0=x-1; i0<x+2; i0++) {
+                    if (i0>=0 && i1>=0 && i0<nDOF0 && i1<nDOF1) {
+                        indices[i].push_back(i1*nDOF0 + i0);
+                    }
                 }
             }
         }
@@ -1937,24 +1985,10 @@ void Rectangle::populateDofMap()
         ++numShared;
     }
 
-    // TODO: paso::SharedComponents should take vectors to avoid this
-    Esys_MPI_rank* neighPtr = NULL;
-    index_t* sendPtr = NULL;
-    index_t* recvPtr = NULL;
-    if (neighbour.size() > 0) {
-        neighPtr = &neighbour[0];
-        sendPtr = &sendShared[0];
-        recvPtr = &recvShared[0];
-    }
-
-    // create connector
-    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, sendPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
-            numDOF, neighbour.size(), neighPtr, recvPtr,
-            &offsetInShared[0], 1, 0, m_mpiInfo));
-    m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+#ifdef ESYS_HAVE_PASO
+    createPasoConnector(neighbour, offsetInShared, offsetInShared, sendShared,
+                        recvShared);
+#endif
 
     // useful debug output
     /*
@@ -1979,8 +2013,9 @@ void Rectangle::populateDofMap()
 }
 
 //private
+template<typename Scalar>
 void Rectangle::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
-         const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
+         const vector<Scalar>& EM_S, const vector<Scalar>& EM_F, bool addS,
          bool addF, index_t firstNode, int nEq, int nComp) const
 {
     IndexVector rowIndex(4);
@@ -1989,7 +2024,7 @@ void Rectangle::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
     rowIndex[2] = m_dofMap[firstNode+m_NN[0]];
     rowIndex[3] = m_dofMap[firstNode+m_NN[0]+1];
     if (addF) {
-        double *F_p=F.getSampleDataRW(0);
+        Scalar* F_p = F.getSampleDataRW(0, static_cast<Scalar>(0));
         for (index_t i=0; i<rowIndex.size(); i++) {
             if (rowIndex[i]<getNumDOF()) {
                 for (int eq=0; eq<nEq; eq++) {
@@ -1999,10 +2034,20 @@ void Rectangle::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
         }
     }
     if (addS) {
-        addToSystemMatrix(S, rowIndex, nEq, EM_S);
+        addToSystemMatrix<Scalar>(S, rowIndex, nEq, EM_S);
     }
 }
 
+template
+void Rectangle::addToMatrixAndRHS<real_t>(AbstractSystemMatrix* S, escript::Data& F,
+         const vector<real_t>& EM_S, const vector<real_t>& EM_F, bool addS,
+         bool addF, index_t firstNode, int nEq, int nComp) const;
+
+template
+void Rectangle::addToMatrixAndRHS<cplx_t>(AbstractSystemMatrix* S, escript::Data& F,
+         const vector<cplx_t>& EM_S, const vector<cplx_t>& EM_F, bool addS,
+         bool addF, index_t firstNode, int nEq, int nComp) const;
+
 //protected
 void Rectangle::interpolateNodesOnElements(escript::Data& out,
                                            const escript::Data& in,
@@ -2235,11 +2280,11 @@ namespace
  */
 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
                                 const escript::FunctionSpace& what, long seed,
-                                const boost::python::tuple& filter) const
+                                const bp::tuple& filter) const
 {
     int numvals=escript::DataTypes::noValues(shape);
     if (len(filter) > 0 && numvals != 1)
-        throw RipleyException("Ripley only supports filters for scalar data.");
+        throw NotImplementedError("Ripley only supports filters for scalar data.");
 
     escript::Data res = randomFillWorker(shape, seed, filter);
     if (res.getFunctionSpace() != what) {
@@ -2287,7 +2332,7 @@ escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
  */
 escript::Data Rectangle::randomFillWorker(
                         const escript::DataTypes::ShapeType& shape, long seed,
-                        const boost::python::tuple& filter) const
+                        const bp::tuple& filter) const
 {
     unsigned int radius=0;  // these are only used by gaussian
     double sigma=0.5;
@@ -2299,20 +2344,20 @@ escript::Data Rectangle::randomFillWorker(
     } else if (len(filter) == 3) {
         bp::extract<string> ex(filter[0]);
         if (!ex.check() || (ex()!="gaussian")) {
-            throw RipleyException("Unsupported random filter");
+            throw ValueError("Unsupported random filter");
         }
         bp::extract<unsigned int> ex1(filter[1]);
         if (!ex1.check()) {
-            throw RipleyException("Radius of Gaussian filter must be a positive integer.");
+            throw ValueError("Radius of Gaussian filter must be a positive integer.");
         }
         radius = ex1();
         sigma = 0.5;
         bp::extract<double> ex2(filter[2]);
         if (!ex2.check() || (sigma=ex2()) <= 0) {
-            throw RipleyException("Sigma must be a positive floating point number.");
+            throw ValueError("Sigma must be a positive floating point number.");
         }
     } else {
-        throw RipleyException("Unsupported random filter for Rectangle.");
+        throw ValueError("Unsupported random filter for Rectangle.");
     }
 
     // number of points in the internal region
@@ -2326,14 +2371,14 @@ escript::Data Rectangle::randomFillWorker(
     // That is, would not cross multiple ranks in MPI
 
     if (2*radius >= internal[0]-4) {
-        throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
+        throw ValueError("Radius of gaussian filter is too large for X dimension of a rank");
     }
     if (2*radius >= internal[1]-4) {
-        throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
+        throw ValueError("Radius of gaussian filter is too large for Y dimension of a rank");
     }
 
     double* src = new double[ext[0]*ext[1]*numvals];
-    esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
+    escript::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
 
 #ifdef ESYS_MPI
     if ((internal[0] < 5) || (internal[1] < 5)) {
@@ -2354,7 +2399,7 @@ escript::Data Rectangle::randomFillWorker(
     basey=Y*m_gNE[1]/m_NX[1];
 #endif
 
-    esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
+    escript::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
 */
 
 #ifdef ESYS_MPI
@@ -2419,7 +2464,7 @@ escript::Data Rectangle::randomFillWorker(
         escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
         escript::Data resdat(0, shape, fs, true);
         // don't need to check for exwrite because we just made it
-        escript::DataVector& dv = resdat.getExpandedVectorReference();
+        escript::DataTypes::RealVectorType& dv = resdat.getExpandedVectorReference();
 
         // now we need to copy values over
         for (size_t y=0; y < internal[1]; ++y) {
@@ -2435,7 +2480,7 @@ escript::Data Rectangle::randomFillWorker(
         escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
         escript::Data resdat(0, escript::DataTypes::scalarShape, fs, true);
         // don't need to check for exwrite because we just made it
-        escript::DataVector& dv=resdat.getExpandedVectorReference();
+        escript::DataTypes::RealVectorType& dv=resdat.getExpandedVectorReference();
         double* convolution=get2DGauss(radius, sigma);
         for (size_t y=0; y < internal[1]; ++y) {
             for (size_t x=0; x < internal[0]; ++x) {
@@ -2454,10 +2499,11 @@ dim_t Rectangle::findNode(const double *coords) const
     //is the found element even owned by this rank
     // (inside owned or shared elements but will map to an owned element)
     for (int dim = 0; dim < m_numDim; dim++) {
+        //allows for point outside mapping onto node
         double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
-                - m_dx[dim]/2.; //allows for point outside mapping onto node
+                - m_dx[dim]/2. + escript::DataTypes::real_t_eps();
         double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
-                + m_dx[dim]/2.;
+                + m_dx[dim]/2. - escript::DataTypes::real_t_eps();
         if (min > coords[dim] || max < coords[dim]) {
             return NOT_MINE;
         }
@@ -2502,15 +2548,28 @@ dim_t Rectangle::findNode(const double *coords) const
 Assembler_ptr Rectangle::createAssembler(string type,
                                          const DataMap& constants) const
 {
+    bool isComplex = false;
+    DataMap::const_iterator it;
+    for (it = constants.begin(); it != constants.end(); it++) {
+        if (!it->second.isEmpty() && it->second.isComplex()) {
+            isComplex = true;
+            break;
+        }
+    }
+
     if (type.compare("DefaultAssembler") == 0) {
-        return Assembler_ptr(new DefaultAssembler2D(shared_from_this(), m_dx, m_NE, m_NN));
+        if (isComplex) {
+            return Assembler_ptr(new DefaultAssembler2D<cplx_t>(shared_from_this(), m_dx, m_NE, m_NN));
+        } else {
+            return Assembler_ptr(new DefaultAssembler2D<real_t>(shared_from_this(), m_dx, m_NE, m_NN));
+        }
     } else if (type.compare("WaveAssembler") == 0) {
         return Assembler_ptr(new WaveAssembler2D(shared_from_this(), m_dx, m_NE, m_NN, constants));
     } else if (type.compare("LameAssembler") == 0) {
         return Assembler_ptr(new LameAssembler2D(shared_from_this(), m_dx, m_NE, m_NN));
     }
-    throw RipleyException("Ripley::Rectangle does not support the"
-            " requested assembler");
+    throw NotImplementedError("Ripley::Rectangle does not support the"
+                              " requested assembler");
 }
 
 } // end of namespace ripley
diff --git a/ripley/src/Rectangle.h b/ripley/src/Rectangle.h
index b66cd5d..934a00d 100644
--- a/ripley/src/Rectangle.h
+++ b/ripley/src/Rectangle.h
@@ -17,7 +17,6 @@
 #ifndef __RIPLEY_RECTANGLE_H__
 #define __RIPLEY_RECTANGLE_H__
 
-#include <paso/Coupler.h>
 #include <ripley/RipleyDomain.h>
 
 namespace ripley {
@@ -28,7 +27,7 @@ namespace ripley {
 */
 class RIPLEY_DLL_API Rectangle: public RipleyDomain
 {
-    friend class DefaultAssembler2D;
+    template<class Scalar> friend class DefaultAssembler2D;
     friend class WaveAssembler2D;
     friend class LameAssembler2D;
 public:
@@ -88,12 +87,11 @@ public:
     */
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#ifdef USE_BOOSTIO
+
     /**
     */
     virtual void readBinaryGridFromZipped(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#endif
 
     /**
     */
@@ -233,31 +231,41 @@ protected:
                                   const escript::Data& in) const;
     virtual void assembleIntegrate(DoubleVector& integrals,
                                    const escript::Data& arg) const;
+    virtual std::vector<IndexVector> getConnections(bool includeShared=false) const;
+
+#ifdef ESYS_HAVE_TRILINOS
+    virtual esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph() const;
+#endif
+
+#ifdef ESYS_HAVE_PASO
     virtual paso::SystemMatrixPattern_ptr getPasoMatrixPattern(
                              bool reducedRowOrder, bool reducedColOrder) const;
+#endif
     virtual void interpolateNodesOnElements(escript::Data& out,
                                   const escript::Data& in, bool reduced) const;
     virtual void interpolateNodesOnFaces(escript::Data& out,
                                          const escript::Data& in,
                                          bool reduced) const;
     virtual void nodesToDOF(escript::Data& out, const escript::Data& in) const;
-    virtual void dofToNodes(escript::Data& out, const escript::Data& in) const;
     virtual dim_t getDofOfNode(dim_t node) const;
 
     virtual void populateSampleIds();
     virtual void populateDofMap();
-    virtual std::vector<IndexVector> getConnections() const;
-    virtual void addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
-           const DoubleVector& EM_S, const DoubleVector& EM_F,
+
+    template<typename Scalar>
+    void addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
+           const std::vector<Scalar>& EM_S, const std::vector<Scalar>& EM_F,
            bool addS, bool addF, index_t firstNode, int nEq=1, int nComp=1) const;
 
     template<typename ValueType>
     void readBinaryGridImpl(escript::Data& out, const std::string& filename,
                             const ReaderParameters& params) const;
 
+#ifdef ESYS_HAVE_BOOST_IO
     template<typename ValueType>
     void readBinaryGridZippedImpl(escript::Data& out, 
             const std::string& filename, const ReaderParameters& params) const;
+#endif
 
     template<typename ValueType>
     void writeBinaryGridImpl(const escript::Data& in,
@@ -315,12 +323,15 @@ protected:
     // vector that maps each node to a DOF index (used for the coupler)
     IndexVector m_dofMap;
 
-    // Paso connector used by the system matrix and to interpolate DOF to
-    // nodes
-    paso::Connector_ptr m_connector;
-
+#ifdef ESYS_HAVE_PASO
     // the Paso System Matrix pattern
     mutable paso::SystemMatrixPattern_ptr m_pattern;
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// Trilinos graph structure, cached for efficiency
+    mutable esys_trilinos::const_TrilinosGraph_ptr m_graph;
+#endif
 };
 
 ////////////////////////////// inline methods ////////////////////////////////
@@ -336,8 +347,8 @@ inline dim_t Rectangle::getNumDataPointsGlobal() const
 
 inline double Rectangle::getLocalCoordinate(index_t index, int dim) const
 {
-    EsysAssert((dim>=0 && dim<2), "'dim' out of bounds");
-    EsysAssert((index>=0 && index<m_NN[dim]), "'index' out of bounds");
+    ESYS_ASSERT(dim>=0 && dim<2, "'dim' out of bounds");
+    ESYS_ASSERT(index>=0 && index<m_NN[dim], "'index' out of bounds");
     return m_origin[dim]+m_dx[dim]*(m_offset[dim]+index);
 }
 
@@ -358,7 +369,7 @@ inline dim_t Rectangle::getNumDOF() const
 //protected
 inline dim_t Rectangle::getNumDOFInAxis(unsigned axis) const
 {
-    EsysAssert((axis < m_numDim), "Invalid axis");
+    ESYS_ASSERT(axis < m_numDim, "Invalid axis");
     return (m_gNE[axis]+1)/m_NX[axis];
 }
 
diff --git a/ripley/src/Ripley.h b/ripley/src/Ripley.h
index 3a82909..d1862a5 100644
--- a/ripley/src/Ripley.h
+++ b/ripley/src/Ripley.h
@@ -24,7 +24,7 @@
 
 #include <ripley/system_dep.h>
 
-#include <esysUtils/Esys_MPI.h>
+#include <escript/EsysMPI.h>
 
 #include <boost/shared_ptr.hpp>
 #include <list>
@@ -34,10 +34,15 @@
 
 namespace ripley {
 
+using escript::DataTypes::dim_t;
+using escript::DataTypes::index_t;
+using escript::DataTypes::cplx_t;
+using escript::DataTypes::real_t;
+
 typedef std::pair<index_t,index_t> IndexPair;
 typedef std::vector<index_t> IndexVector;
-typedef std::vector<double> DoubleVector;
-typedef std::vector<Esys_MPI_rank> RankVector;
+typedef std::vector<real_t> DoubleVector;
+typedef std::vector<int> RankVector;
 typedef std::map<std::string,int> TagMap;
 
 enum {
diff --git a/ripley/src/RipleyDomain.cpp b/ripley/src/RipleyDomain.cpp
index cf1f2e4..6fa6b41 100644
--- a/ripley/src/RipleyDomain.cpp
+++ b/ripley/src/RipleyDomain.cpp
@@ -15,35 +15,63 @@
 *****************************************************************************/
 
 #include <ripley/RipleyDomain.h>
+#include <ripley/domainhelpers.h>
+
 #include <escript/DataFactory.h>
 #include <escript/FunctionSpaceFactory.h>
-#include <pasowrap/SystemMatrixAdapter.h>
-#include <pasowrap/TransportProblemAdapter.h>
-#include <ripley/domainhelpers.h>
+#include <escript/index.h>
+#include <escript/SolverOptions.h>
 
-#ifdef USE_CUDA
+#ifdef ESYS_HAVE_CUDA
 #include <ripley/RipleySystemMatrix.h>
 #endif
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/TrilinosMatrixAdapter.h>
+#endif
+
+#ifdef ESYS_HAVE_PASO
+#include <paso/SystemMatrix.h>
+#include <paso/Transport.h>
+#endif
 
 #include <iomanip>
+#include <iostream>
 
 namespace bp = boost::python;
 
 using namespace std;
-using paso::TransportProblemAdapter;
+using escript::ValueError;
+using escript::NotImplementedError;
+
+#ifdef ESYS_HAVE_TRILINOS
+using esys_trilinos::TrilinosMatrixAdapter;
+using esys_trilinos::const_TrilinosGraph_ptr;
+#endif
 
 namespace ripley {
 
+DecompositionPolicy RipleyDomain::m_decompPolicy = DECOMP_ADD_ELEMENTS;
+
+void RipleyDomain::setDecompositionPolicy(DecompositionPolicy value)
+{
+    m_decompPolicy = value;
+}
+
+DecompositionPolicy RipleyDomain::getDecompositionPolicy()
+{
+    return m_decompPolicy;
+}
+
 void tupleListToMap(DataMap& mapping, const bp::list& list)
 {
     for (int i = 0; i < len(list); i++) {
         if (!bp::extract<bp::tuple>(list[i]).check())
-            throw RipleyException("Passed in list contains objects"
-                                  " other than tuples");
+            throw ValueError("Passed in list contains objects"
+                                      " other than tuples");
         bp::tuple t = bp::extract<bp::tuple>(list[i]);
         if (len(t) != 2 || !bp::extract<string>(t[0]).check() ||
                 !bp::extract<escript::Data>(t[1]).check())
-            throw RipleyException("The passed in list must contain tuples"
+            throw ValueError("The passed in list must contain tuples"
                 " of the form (string, escript::Data)");
         mapping[bp::extract<string>(t[0])] = bp::extract<escript::Data>(t[1]);
     }
@@ -54,7 +82,7 @@ RipleyDomain::RipleyDomain(dim_t dim, escript::SubWorld_ptr p) :
     m_status(0)
 {
     if (p.get() == NULL)
-        m_mpiInfo = esysUtils::makeInfo(MPI_COMM_WORLD);
+        m_mpiInfo = escript::makeInfo(MPI_COMM_WORLD);
     else
         m_mpiInfo = p->getMPI();
 
@@ -150,7 +178,7 @@ pair<int,dim_t> RipleyDomain::getDataShape(int fsType) const
     stringstream msg;
     msg << "getDataShape: Invalid function space type " << fsType
         << " for " << getDescription();
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 string RipleyDomain::showTagNames() const
@@ -266,7 +294,7 @@ bool RipleyDomain::probeInterpolationOnDomain(int fsType_source,
         stringstream msg;
         msg << "probeInterpolationOnDomain: Invalid function space type "
             << fsType_target << " for " << getDescription();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 
     switch (fsType_source) {
@@ -292,7 +320,7 @@ bool RipleyDomain::probeInterpolationOnDomain(int fsType_source,
             stringstream msg;
             msg << "probeInterpolationOnDomain: Invalid function space type "
                 << fsType_source << " for " << getDescription();
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 }
@@ -304,7 +332,7 @@ signed char RipleyDomain::preferredInterpolationOnDomain(int fsType_source,
         stringstream msg;
         msg << "preferredInterpolationOnDomain: Invalid function space type "
             << fsType_target << " for " << getDescription();
-        throw RipleyException(msg.str());
+        throw ValueError(msg.str());
     }
 
     if (fsType_source==fsType_target) {
@@ -340,7 +368,7 @@ signed char RipleyDomain::preferredInterpolationOnDomain(int fsType_source,
             stringstream msg;
             msg << "probeInterpolationOnDomain: Invalid function space type "
                 << fsType_source << " for " << getDescription();
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 }
@@ -351,9 +379,9 @@ void RipleyDomain::interpolateOnDomain(escript::Data& target,
     const RipleyDomain& inDomain=dynamic_cast<const RipleyDomain&>(*(in.getFunctionSpace().getDomain()));
     const RipleyDomain& targetDomain=dynamic_cast<const RipleyDomain&>(*(target.getFunctionSpace().getDomain()));
     if (inDomain != *this)
-        throw RipleyException("Illegal domain of interpolant");
+        throw ValueError("Illegal domain of interpolant");
     if (targetDomain != *this)
-        throw RipleyException("Illegal domain of interpolation target");
+        throw ValueError("Illegal domain of interpolation target");
 
     stringstream msg;
     msg << "interpolateOnDomain() not implemented for function space "
@@ -370,7 +398,7 @@ void RipleyDomain::interpolateOnDomain(escript::Data& target,
     // not allowed: reduced nodes/DOF->non-reduced nodes/DOF
     } else if ((inFS==ReducedNodes || inFS==ReducedDegreesOfFreedom)
             && (outFS==Nodes || outFS==DegreesOfFreedom)) {
-        throw RipleyException("interpolateOnDomain: Cannot interpolate "
+        throw ValueError("interpolateOnDomain: Cannot interpolate "
                               "reduced data to non-reduced data.");
     } else if ((inFS==Elements && outFS==ReducedElements)
             || (inFS==FaceElements && outFS==ReducedFaceElements)) {
@@ -426,7 +454,7 @@ void RipleyDomain::interpolateOnDomain(escript::Data& target,
                         }
                         break;
                     default:
-                        throw RipleyException(msg.str());
+                        throw NotImplementedError(msg.str());
                 }
                 break;
 
@@ -468,7 +496,7 @@ void RipleyDomain::interpolateOnDomain(escript::Data& target,
                         break;
 
                     default:
-                        throw RipleyException(msg.str());
+                        throw NotImplementedError(msg.str());
                 }
                 break;
             case Points:
@@ -488,7 +516,7 @@ void RipleyDomain::interpolateOnDomain(escript::Data& target,
                 }
                 break;
             default:
-                throw RipleyException(msg.str());
+                throw NotImplementedError(msg.str());
         }
     }
 }
@@ -513,9 +541,9 @@ void RipleyDomain::setToX(escript::Data& arg) const
     const RipleyDomain& argDomain=dynamic_cast<const RipleyDomain&>(
             *(arg.getFunctionSpace().getDomain()));
     if (argDomain != *this)
-        throw RipleyException("setToX: Illegal domain of data point locations");
+        throw ValueError("setToX: Illegal domain of data point locations");
     if (!arg.isExpanded())
-        throw RipleyException("setToX: Expanded Data object expected");
+        throw ValueError("setToX: Expanded Data object expected");
 
     if (arg.getFunctionSpace().getTypeCode()==Nodes) {
         assembleCoordinates(arg);
@@ -532,11 +560,11 @@ void RipleyDomain::setToGradient(escript::Data& grad, const escript::Data& arg)
     const RipleyDomain& argDomain=dynamic_cast<const RipleyDomain&>(
             *(arg.getFunctionSpace().getDomain()));
     if (argDomain != *this)
-        throw RipleyException("setToGradient: Illegal domain of gradient argument");
+        throw ValueError("setToGradient: Illegal domain of gradient argument");
     const RipleyDomain& gradDomain=dynamic_cast<const RipleyDomain&>(
             *(grad.getFunctionSpace().getDomain()));
     if (gradDomain != *this)
-        throw RipleyException("setToGradient: Illegal domain of gradient");
+        throw ValueError("setToGradient: Illegal domain of gradient");
 
     switch (grad.getFunctionSpace().getTypeCode()) {
         case Elements:
@@ -548,7 +576,7 @@ void RipleyDomain::setToGradient(escript::Data& grad, const escript::Data& arg)
             stringstream msg;
             msg << "setToGradient: not supported for "
                 << functionSpaceTypeAsString(grad.getFunctionSpace().getTypeCode());
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 
@@ -559,7 +587,7 @@ void RipleyDomain::setToGradient(escript::Data& grad, const escript::Data& arg)
         case ReducedNodes:
             break;
         default: {
-            throw RipleyException("setToGradient: only supported for nodal input data");
+            throw ValueError("setToGradient: only supported for nodal input data");
         }
     }
 
@@ -583,7 +611,7 @@ void RipleyDomain::setToIntegrals(vector<double>& integrals, const escript::Data
     const RipleyDomain& argDomain=dynamic_cast<const RipleyDomain&>(
             *(arg.getFunctionSpace().getDomain()));
     if (argDomain != *this)
-        throw RipleyException("setToIntegrals: illegal domain of integration kernel");
+        throw ValueError("setToIntegrals: illegal domain of integration kernel");
 
     switch (arg.getFunctionSpace().getTypeCode()) {
         case Nodes:
@@ -605,7 +633,7 @@ void RipleyDomain::setToIntegrals(vector<double>& integrals, const escript::Data
             stringstream msg;
             msg << "setToIntegrals: not supported for "
                 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 
@@ -631,7 +659,7 @@ bool RipleyDomain::isCellOriented(int fsType) const
     stringstream msg;
     msg << "isCellOriented: invalid function space type " << fsType
         << " on " << getDescription();
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 bool RipleyDomain::canTag(int fsType) const
@@ -654,7 +682,7 @@ bool RipleyDomain::canTag(int fsType) const
     stringstream msg;
     msg << "canTag: invalid function space type " << fsType << " on "
         << getDescription();
-    throw RipleyException(msg.str());
+    throw ValueError(msg.str());
 }
 
 void RipleyDomain::setTags(int fsType, int newTag, const escript::Data& mask) const
@@ -680,7 +708,7 @@ void RipleyDomain::setTags(int fsType, int newTag, const escript::Data& mask) co
         default: {
             stringstream msg;
             msg << "setTags: invalid function space type " << fsType;
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
     if (target->size() != num) {
@@ -720,7 +748,7 @@ int RipleyDomain::getTagFromSampleNo(int fsType, dim_t sampleNo) const
         default: {
             stringstream msg;
             msg << "getTagFromSampleNo: invalid function space type " << fsType;
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
     return -1;
@@ -741,7 +769,7 @@ int RipleyDomain::getNumberOfTagsInUse(int fsType) const
             stringstream msg;
             msg << "getNumberOfTagsInUse: invalid function space type "
                 << fsType;
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 }
@@ -761,7 +789,7 @@ const int* RipleyDomain::borrowListOfTagsInUse(int fsType) const
             stringstream msg;
             msg << "borrowListOfTagsInUse: invalid function space type "
                 << fsType;
-            throw RipleyException(msg.str());
+            throw ValueError(msg.str());
         }
     }
 }
@@ -786,13 +814,17 @@ int RipleyDomain::getSystemMatrixTypeId(const bp::object& options) const
 {
     const escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy>(options);
     int package = sb.getPackage();
+    escript::SolverOptions method = sb.getSolverMethod();
+#ifdef ESYS_HAVE_TRILINOS
+    bool isDirect = escript::isDirectSolver(method);
+#endif
 
     // use CUSP for single rank and supported solvers+preconditioners if CUDA
-    // is available, PASO otherwise
+    // is available, PASO or Trilinos otherwise
+#ifdef ESYS_HAVE_CUDA
     if (package == escript::SO_DEFAULT) {
-#ifdef USE_CUDA
         if (m_mpiInfo->size == 1) {
-            switch (sb.getSolverMethod()) {
+            switch (method) {
                 case escript::SO_DEFAULT:
                 case escript::SO_METHOD_BICGSTAB:
                 case escript::SO_METHOD_CGLS:
@@ -802,44 +834,87 @@ int RipleyDomain::getSystemMatrixTypeId(const bp::object& options) const
                 case escript::SO_METHOD_PRES20:
                     package = escript::SO_PACKAGE_CUSP;
                     break;
-                default:
-                    package = escript::SO_PACKAGE_PASO;
             }
             if (package == escript::SO_PACKAGE_CUSP) {
                 if (sb.getPreconditioner() != escript::SO_PRECONDITIONER_NONE &&
                         sb.getPreconditioner() != escript::SO_PRECONDITIONER_JACOBI) {
-                    package = escript::SO_PACKAGE_PASO;
+                    package = escript::SO_PACKAGE_DEFAULT;
                 }
             }
-        } else {
-            package = escript::SO_PACKAGE_PASO;
         }
-#else // USE_CUDA
-        package = escript::SO_PACKAGE_PASO;
-#endif
     }
+#endif // ESYS_HAVE_CUDA
 
+    // the configuration of ripley should have taken care that we have either
+    // paso or trilinos so here's how we prioritize
+#if defined(ESYS_HAVE_PASO) && defined(ESYS_HAVE_TRILINOS)
+    // we have Paso & Trilinos so use Trilinos for parallel direct solvers
+    if (package == escript::SO_DEFAULT) {
+        if ((method == escript::SO_METHOD_DIRECT && getMPISize() > 1)
+                || isDirect
+                || sb.isComplex()) {
+            package = escript::SO_PACKAGE_TRILINOS;
+        }
+    }
+#endif
+#ifdef ESYS_HAVE_PASO
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_PASO;
+#endif
+#ifdef ESYS_HAVE_TRILINOS
+    if (package == escript::SO_DEFAULT)
+        package = escript::SO_PACKAGE_TRILINOS;
+#endif
     if (package == escript::SO_PACKAGE_CUSP) {
         if (m_mpiInfo->size > 1) {
-            throw RipleyException("CUSP matrices are not supported with more than one rank");
+            throw NotImplementedError("CUSP matrices are not supported with more than one rank");
+        }
+        if (sb.isComplex()) {
+            throw NotImplementedError("CUSP does not support complex-valued matrices");
         }
         int type = (int)SMT_CUSP;
         if (sb.isSymmetric())
             type |= (int)SMT_SYMMETRIC;
         return type;
+    } else if (package == escript::SO_PACKAGE_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        int type = (int)SMT_TRILINOS;
+        if (sb.isComplex())
+            type |= (int)SMT_COMPLEX;
+        // This is required because MueLu (AMG) and Amesos2 (direct) do not
+        // support block matrices at this point. Remove if they ever do...
+        if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_AMG ||
+                sb.getPreconditioner() == escript::SO_PRECONDITIONER_ILUT ||
+                isDirect) {
+            type |= (int)SMT_UNROLL;
+        }
+        return type;
+#else
+        throw RipleyException("Trilinos requested but not built with Trilinos.");
+#endif
+    }
+#ifdef ESYS_HAVE_PASO
+    if (sb.isComplex()) {
+        throw NotImplementedError("Paso does not support complex-valued matrices");
     }
-
     // in all other cases we use PASO
-    return (int)SMT_PASO | paso::SystemMatrixAdapter::getSystemMatrixTypeId(
-            sb.getSolverMethod(), sb.getPreconditioner(), sb.getPackage(),
+    return (int)SMT_PASO | paso::SystemMatrix::getSystemMatrixTypeId(
+            method, sb.getPreconditioner(), sb.getPackage(),
             sb.isSymmetric(), m_mpiInfo);
+#else
+    throw RipleyException("Unable to find a working solver library!");
+#endif
 }
 
 int RipleyDomain::getTransportTypeId(int solver, int preconditioner,
                                      int package, bool symmetry) const
 {
-    return TransportProblemAdapter::getTransportTypeId(solver, preconditioner,
+#ifdef ESYS_USE_PASO
+    return paso::TransportProblem::getTypeId(solver, preconditioner,
             package, symmetry, m_mpiInfo);
+#else
+    throw RipleyException("Transport solvers require Paso but ripley was not compiled with Paso!");
+#endif
 }
 
 escript::ASM_ptr RipleyDomain::newSystemMatrix(int row_blocksize,
@@ -851,32 +926,32 @@ escript::ASM_ptr RipleyDomain::newSystemMatrix(int row_blocksize,
     // is the domain right?
     const RipleyDomain& row_domain=dynamic_cast<const RipleyDomain&>(*(row_functionspace.getDomain()));
     if (row_domain != *this)
-        throw RipleyException("newSystemMatrix: domain of row function space does not match the domain of matrix generator");
+        throw ValueError("newSystemMatrix: domain of row function space does not match the domain of matrix generator");
     const RipleyDomain& col_domain=dynamic_cast<const RipleyDomain&>(*(column_functionspace.getDomain()));
     if (col_domain != *this)
-        throw RipleyException("newSystemMatrix: domain of column function space does not match the domain of matrix generator");
+        throw ValueError("newSystemMatrix: domain of column function space does not match the domain of matrix generator");
     // is the function space type right?
     if (row_functionspace.getTypeCode()==ReducedDegreesOfFreedom)
         reduceRowOrder=true;
     else if (row_functionspace.getTypeCode()!=DegreesOfFreedom)
-        throw RipleyException("newSystemMatrix: illegal function space type for system matrix rows");
+        throw ValueError("newSystemMatrix: illegal function space type for system matrix rows");
     if (column_functionspace.getTypeCode()==ReducedDegreesOfFreedom)
         reduceColOrder=true;
     else if (column_functionspace.getTypeCode()!=DegreesOfFreedom)
-        throw RipleyException("newSystemMatrix: illegal function space type for system matrix columns");
+        throw ValueError("newSystemMatrix: illegal function space type for system matrix columns");
     // are block sizes identical?
     if (row_blocksize != column_blocksize)
-        throw RipleyException("newSystemMatrix: row/column block sizes must be equal");
+        throw ValueError("newSystemMatrix: row/column block sizes must be equal");
     // are function spaces equal
     if (reduceRowOrder != reduceColOrder)
-        throw RipleyException("newSystemMatrix: row/column function spaces must be equal");
+        throw ValueError("newSystemMatrix: row/column function spaces must be equal");
 
     // generate matrix
     //if (reduceRowOrder || reduceColOrder)
-    //    throw RipleyException("newSystemMatrix: reduced order not supported");
+    //    throw NotImplementedError("newSystemMatrix: reduced order not supported");
 
     if (type & (int)SMT_CUSP) {
-#ifdef USE_CUDA
+#ifdef ESYS_HAVE_CUDA
         const dim_t numMatrixRows = getNumDOF();
         bool symmetric = (type & (int)SMT_SYMMETRIC);
         escript::ASM_ptr sm(new SystemMatrix(m_mpiInfo, row_blocksize,
@@ -884,18 +959,34 @@ escript::ASM_ptr RipleyDomain::newSystemMatrix(int row_blocksize,
                     getDiagonalIndices(symmetric), symmetric));
         return sm;
 #else
-        throw RipleyException("newSystemMatrix: ripley was compiled without CUDA support so CUSP solvers & matrices are not available.");
+        throw RipleyException("newSystemMatrix: ripley was not compiled with "
+               "CUDA support so CUSP solvers & matrices are not available.");
+#endif
+    } else if (type & (int)SMT_TRILINOS) {
+#ifdef ESYS_HAVE_TRILINOS
+        const_TrilinosGraph_ptr graph(getTrilinosGraph());
+        bool isComplex = (type & (int)SMT_COMPLEX);
+        bool unroll = (type & (int)SMT_UNROLL);
+        escript::ASM_ptr sm(new TrilinosMatrixAdapter(m_mpiInfo, row_blocksize,
+                    row_functionspace, graph, isComplex, unroll));
+        return sm;
+#else
+        throw RipleyException("newSystemMatrix: ripley was not compiled with "
+               "Trilinos support so the Trilinos solver stack cannot be used.");
 #endif
     } else if (type & (int)SMT_PASO) {
+#ifdef ESYS_HAVE_PASO
         paso::SystemMatrixPattern_ptr pattern(getPasoMatrixPattern(
                                             reduceRowOrder, reduceColOrder));
         type -= (int)SMT_PASO;
-        paso::SystemMatrix_ptr matrix(new paso::SystemMatrix(type, pattern,
-                row_blocksize, column_blocksize, false));
-        escript::ASM_ptr sm(new paso::SystemMatrixAdapter(matrix,
-                    row_blocksize, row_functionspace, column_blocksize,
-                    column_functionspace));
+        escript::ASM_ptr sm(new paso::SystemMatrix(type, pattern,
+                row_blocksize, column_blocksize, false, row_functionspace,
+                column_functionspace));
         return sm;
+#else
+        throw RipleyException("newSystemMatrix: ripley was not compiled with "
+               "Paso support so the Paso solver stack cannot be used.");
+#endif
     } else {
         throw RipleyException("newSystemMatrix: unknown matrix type ID");
     }
@@ -906,7 +997,7 @@ void RipleyDomain::addToSystem(escript::AbstractSystemMatrix& mat,
                                Assembler_ptr assembler) const
 {
     if (isNotEmpty("d_contact", coefs) || isNotEmpty("y_contact", coefs))
-        throw RipleyException(
+        throw ValueError(
                     "addToSystem: Ripley does not support contact elements");
 
     assemblePDE(&mat, rhs, coefs, assembler);
@@ -944,13 +1035,13 @@ void RipleyDomain::addToRHS(escript::Data& rhs, const DataMap& coefs,
                             Assembler_ptr assembler) const
 {
     if (isNotEmpty("y_contact", coefs))
-        throw RipleyException(
+        throw ValueError(
                     "addPDEToRHS: Ripley does not support contact elements");
 
     if (rhs.isEmpty()) {
         if ((isNotEmpty("X", coefs) && isNotEmpty("du", coefs))
                 || isNotEmpty("Y", coefs))
-            throw RipleyException(
+            throw ValueError(
                     "addPDEToRHS: right hand side coefficients are provided "
                     "but no right hand side vector given");
         else
@@ -965,25 +1056,26 @@ void RipleyDomain::addToRHS(escript::Data& rhs, const DataMap& coefs,
 escript::ATP_ptr RipleyDomain::newTransportProblem(int blocksize,
                   const escript::FunctionSpace& functionspace, int type) const
 {
-    bool reduceOrder=false;
     // is the domain right?
     const RipleyDomain& domain=dynamic_cast<const RipleyDomain&>(*(functionspace.getDomain()));
     if (domain != *this)
-        throw RipleyException("newTransportProblem: domain of function space does not match the domain of transport problem generator");
+        throw ValueError("newTransportProblem: domain of function space does not match the domain of transport problem generator");
     // is the function space type right?
-    if (functionspace.getTypeCode()==ReducedDegreesOfFreedom)
-        reduceOrder=true;
-    else if (functionspace.getTypeCode()!=DegreesOfFreedom)
-        throw RipleyException("newTransportProblem: illegal function space type for transport problem");
+    if (functionspace.getTypeCode() != ReducedDegreesOfFreedom &&
+            functionspace.getTypeCode() != DegreesOfFreedom)
+        throw ValueError("newTransportProblem: illegal function space type for transport problem");
 
+#ifdef ESYS_HAVE_PASO
+    const bool reduced = (functionspace.getTypeCode() == ReducedDegreesOfFreedom);
     // generate matrix
-    paso::SystemMatrixPattern_ptr pattern(getPasoMatrixPattern(reduceOrder,
-                                                               reduceOrder));
-    paso::TransportProblem_ptr tp(new paso::TransportProblem(pattern,
-                                                             blocksize));
-    paso::checkPasoError();
-    escript::ATP_ptr atp(new TransportProblemAdapter(tp, blocksize, functionspace));
-    return atp;
+    paso::SystemMatrixPattern_ptr pattern(getPasoMatrixPattern(reduced, reduced));
+    escript::ATP_ptr tp(new paso::TransportProblem(pattern, blocksize,
+                                                   functionspace));
+    return tp;
+#else
+    throw RipleyException("newTransportProblem: transport problems require the "
+           "Paso library which is not available.");
+#endif
 }
 
 void RipleyDomain::addPDEToTransportProblemFromPython(
@@ -1000,25 +1092,24 @@ void RipleyDomain::addPDEToTransportProblem(
                 const DataMap& coefs, Assembler_ptr assembler) const
 {
     if (isNotEmpty("d_contact", coefs) || isNotEmpty("y_contact", coefs))
-        throw RipleyException("addPDEToTransportProblem: Ripley does not support contact elements");
+        throw ValueError("addPDEToTransportProblem: Ripley does not support contact elements");
 
-    TransportProblemAdapter* tpa=dynamic_cast<TransportProblemAdapter*>(&tp);
-    if (!tpa)
-        throw RipleyException("addPDEToTransportProblem: Ripley only accepts Paso transport problems");
+#ifdef ESYS_HAVE_PASO
+    paso::TransportProblem* ptp = dynamic_cast<paso::TransportProblem*>(&tp);
+    if (!ptp)
+        throw ValueError("addPDEToTransportProblem: Ripley only accepts Paso transport problems");
 
-    paso::TransportProblem_ptr ptp(tpa->getPaso_TransportProblem());
+    escript::ASM_ptr mm(ptp->borrowMassMatrix());
+    escript::ASM_ptr tm(ptp->borrowTransportMatrix());
 
-    paso::SystemMatrixAdapter mm(ptp->borrowMassMatrix(), ptp->getBlockSize(),
-                                 tpa->getFunctionSpace(), ptp->getBlockSize(),
-                                 tpa->getFunctionSpace());
-    paso::SystemMatrixAdapter tm(ptp->borrowTransportMatrix(),
-                                 ptp->getBlockSize(), tpa->getFunctionSpace(),
-                                 ptp->getBlockSize(), tpa->getFunctionSpace());
-
-    assemblePDE(&mm, source, coefs, assembler);
-    assemblePDE(&tm, source, coefs, assembler);
-    assemblePDEBoundary(&tm, source, coefs, assembler);
-    assemblePDEDirac(&tm, source, coefs, assembler);
+    assemblePDE(mm.get(), source, coefs, assembler);
+    assemblePDE(tm.get(), source, coefs, assembler);
+    assemblePDEBoundary(tm.get(), source, coefs, assembler);
+    assemblePDEDirac(tm.get(), source, coefs, assembler);
+#else
+    throw RipleyException("addPDEToTransportProblem: transport problems "
+                          "require the Paso library which is not available.");
+#endif
 }
 
 void RipleyDomain::addPDEToTransportProblem(
@@ -1034,10 +1125,68 @@ void RipleyDomain::addPDEToTransportProblem(
 
 }
 
-
 void RipleyDomain::setNewX(const escript::Data& arg)
 {
-    throw RipleyException("setNewX(): operation not supported");
+    throw NotImplementedError("setNewX(): operation not supported");
+}
+
+//protected
+void RipleyDomain::dofToNodes(escript::Data& out, const escript::Data& in) const
+{
+    // expand data object if necessary
+    const_cast<escript::Data*>(&in)->expand();
+    const dim_t numComp = in.getDataPointSize();
+    const dim_t numNodes = getNumNodes();
+    out.requireWrite();
+#ifdef ESYS_HAVE_PASO
+    paso::Coupler_ptr coupler(new paso::Coupler(m_connector, numComp, m_mpiInfo));
+    coupler->startCollect(in.getSampleDataRO(0));
+    const dim_t numDOF = getNumDOF();
+    const real_t* buffer = coupler->finishCollect();
+
+#pragma omp parallel for
+    for (index_t i = 0; i < numNodes; i++) {
+        const index_t dof = getDofOfNode(i);
+        const real_t* src = (dof < numDOF ? in.getSampleDataRO(dof)
+                                          : &buffer[(dof - numDOF) * numComp]);
+        copy(src, src+numComp, out.getSampleDataRW(i));
+    }
+#elif defined(ESYS_HAVE_TRILINOS)
+    using namespace esys_trilinos;
+
+    const_TrilinosGraph_ptr graph(getTrilinosGraph());
+    Teuchos::RCP<const MapType> colMap;
+    Teuchos::RCP<const MapType> rowMap;
+    MapType colPointMap;
+    MapType rowPointMap;
+
+    if (numComp > 1) {
+        colPointMap = RealBlockVector::makePointMap(*graph->getColMap(),
+                                                    numComp);
+        rowPointMap = RealBlockVector::makePointMap(*graph->getRowMap(),
+                                                    numComp);
+        colMap = Teuchos::rcpFromRef(colPointMap);
+        rowMap = Teuchos::rcpFromRef(rowPointMap);
+    } else {
+        colMap = graph->getColMap();
+        rowMap = graph->getRowMap();
+    }
+
+    const ImportType importer(rowMap, colMap);
+    const Teuchos::ArrayView<const real_t> localIn(in.getSampleDataRO(0),
+                                                in.getNumDataPoints()*numComp);
+    Teuchos::RCP<RealVector> lclData = rcp(new RealVector(rowMap, localIn,
+                                                          localIn.size(), 1));
+    Teuchos::RCP<RealVector> gblData = rcp(new RealVector(colMap, 1));
+    gblData->doImport(*lclData, importer, Tpetra::INSERT);
+    Teuchos::ArrayRCP<const real_t> gblArray(gblData->getData(0));
+
+#pragma omp parallel for
+    for (index_t i = 0; i < numNodes; i++) {
+        const real_t* src = &gblArray[getDofOfNode(i) * numComp];
+        copy(src, src+numComp, out.getSampleDataRW(i));
+    }
+#endif // ESYS_HAVE_TRILINOS
 }
 
 //protected
@@ -1112,8 +1261,8 @@ void RipleyDomain::updateTagsInUse(int fsType) const
             tagsInUse=&m_faceTagsInUse;
             break;
         case Points:
-            throw RipleyException("updateTagsInUse for Ripley dirac points "
-                    "not supported");
+            throw NotImplementedError("updateTagsInUse for Ripley dirac points"
+                                      " not supported");
         default:
             return;
     }
@@ -1156,6 +1305,22 @@ void RipleyDomain::updateTagsInUse(int fsType) const
     }
 }
 
+#ifdef ESYS_HAVE_PASO
+void RipleyDomain::createPasoConnector(const RankVector& neighbour, 
+                                       const IndexVector& offsetInSharedSend,
+                                       const IndexVector& offsetInSharedRecv,
+                                       const IndexVector& sendShared,
+                                       const IndexVector& recvShared)
+{
+    const index_t* sendPtr = neighbour.empty() ? NULL : &sendShared[0];
+    const index_t* recvPtr = neighbour.empty() ? NULL : &recvShared[0];
+    paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
+            getNumDOF(), neighbour, sendPtr, offsetInSharedSend));
+    paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
+            getNumDOF(), neighbour, recvPtr, offsetInSharedRecv));
+    m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
+}
+
 //protected
 paso::Pattern_ptr RipleyDomain::createPasoPattern(
                             const vector<IndexVector>& indices, dim_t N) const
@@ -1177,35 +1342,105 @@ paso::Pattern_ptr RipleyDomain::createPasoPattern(
 
     return paso::Pattern_ptr(new paso::Pattern(MATRIX_FORMAT_DEFAULT, M, N, ptr, index));
 }
+#endif // ESYS_HAVE_PASO
 
+#ifdef ESYS_HAVE_TRILINOS
 //protected
-void RipleyDomain::addToSystemMatrix(escript::AbstractSystemMatrix* mat,
-                                     const IndexVector& nodes, dim_t numEq,
-                                     const DoubleVector& array) const
+esys_trilinos::const_TrilinosGraph_ptr RipleyDomain::createTrilinosGraph(
+                                            const IndexVector& myRows,
+                                            const IndexVector& myColumns) const
 {
-    paso::SystemMatrixAdapter* sma =
-                    dynamic_cast<paso::SystemMatrixAdapter*>(mat);
-    if (sma) {
-        paso::SystemMatrix_ptr S(sma->getPaso_SystemMatrix());
-        addToSystemMatrix(S, nodes, numEq, array);
-    } else {
-#ifdef USE_CUDA
-        SystemMatrix* sm = dynamic_cast<SystemMatrix*>(mat);
-        if (sm) {
-            sm->add(nodes, array);
-        } else {
-            throw RipleyException("addToSystemMatrix: unknown system matrix type");
-        }
-#else
-        throw RipleyException("addToSystemMatrix: unknown system matrix type");
+    using namespace esys_trilinos;
+
+    const dim_t numMatrixRows = getNumDOF();
+
+    TrilinosMap_ptr rowMap(new MapType(getNumDataPointsGlobal(), myRows,
+                0, TeuchosCommFromEsysComm(m_mpiInfo->comm)));
+
+    IndexVector columns(getNumNodes());
+    // order is important - our columns (=myRows) come first, followed by
+    // shared ones (=node Id for non-DOF)
+#pragma omp parallel for
+    for (size_t i=0; i<columns.size(); i++) {
+        columns[getDofOfNode(i)] = myColumns[i];
+    }
+    TrilinosMap_ptr colMap(new MapType(getNumDataPointsGlobal(), columns,
+                0, TeuchosCommFromEsysComm(m_mpiInfo->comm)));
+
+    // now build CSR arrays (rowPtr and colInd)
+    const vector<IndexVector>& conns(getConnections(true));
+    Teuchos::ArrayRCP<size_t> rowPtr(numMatrixRows+1);
+    for (size_t i=0; i < numMatrixRows; i++) {
+        rowPtr[i+1] = rowPtr[i] + conns[i].size();
+    }
+
+    Teuchos::ArrayRCP<LO> colInd(rowPtr[numMatrixRows]);
+
+#pragma omp parallel for
+    for (index_t i=0; i < numMatrixRows; i++) {
+        copy(conns[i].begin(), conns[i].end(), &colInd[rowPtr[i]]);
+    }
+
+    TrilinosGraph_ptr graph(new GraphType(rowMap, colMap, rowPtr, colInd));
+    Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    params->set("Optimize Storage", true);
+    graph->fillComplete(rowMap, rowMap, params);
+    return graph;
+}
+#endif // ESYS_HAVE_TRILINOS
+
+//protected
+template<>
+void RipleyDomain::addToSystemMatrix<real_t>(escript::AbstractSystemMatrix* mat,
+                                         const IndexVector& nodes, dim_t numEq,
+                                         const DoubleVector& array) const
+{
+#ifdef ESYS_HAVE_PASO
+    paso::SystemMatrix* psm = dynamic_cast<paso::SystemMatrix*>(mat);
+    if (psm) {
+        addToPasoMatrix(psm, nodes, numEq, array);
+        return;
+    }
+#endif
+#ifdef ESYS_HAVE_CUDA
+    SystemMatrix* rsm = dynamic_cast<SystemMatrix*>(mat);
+    if (rsm) {
+        rsm->add(nodes, array);
+        return;
+    }
 #endif
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(mat);
+    if (tm) {
+        tm->add(nodes, array);
+        return;
     }
+#endif
+    throw RipleyException("addToSystemMatrix: unknown system matrix type");
 }
 
+//protected
+template<>
+void RipleyDomain::addToSystemMatrix<cplx_t>(escript::AbstractSystemMatrix* mat,
+                                         const IndexVector& nodes, dim_t numEq,
+                                         const vector<cplx_t>& array) const
+{
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(mat);
+    if (tm) {
+        tm->add(nodes, array);
+        return;
+    }
+#endif
+    throw RipleyException("addToSystemMatrix: only Trilinos matrices support "
+                          "complex-valued assembly!");
+}
+
+#ifdef ESYS_HAVE_PASO
 //private
-void RipleyDomain::addToSystemMatrix(paso::SystemMatrix_ptr mat,
-                                     const IndexVector& nodes, dim_t numEq,
-                                     const vector<double>& array) const
+void RipleyDomain::addToPasoMatrix(paso::SystemMatrix* mat,
+                                   const IndexVector& nodes, dim_t numEq,
+                                   const vector<double>& array) const
 {
     const dim_t numMyCols = mat->pattern->mainPattern->numInput;
     const dim_t numMyRows = mat->pattern->mainPattern->numOutput;
@@ -1330,6 +1565,7 @@ void RipleyDomain::addToSystemMatrix(paso::SystemMatrix_ptr mat,
     }
 #undef UPDATE_BLOCK
 }
+#endif // ESYS_HAVE_PASO
 
 //private
 void RipleyDomain::assemblePDE(escript::AbstractSystemMatrix* mat,
@@ -1338,8 +1574,8 @@ void RipleyDomain::assemblePDE(escript::AbstractSystemMatrix* mat,
 {
     if (rhs.isEmpty() && (isNotEmpty("X", coefs) || isNotEmpty("du", coefs))
                 && isNotEmpty("Y", coefs))
-        throw RipleyException("assemblePDE: right hand side coefficients are "
-                    "provided but no right hand side vector given");
+        throw ValueError("assemblePDE: right hand side coefficients are "
+                         "provided but no right hand side vector given");
 
     vector<int> fsTypes;
     assembler->collateFunctionSpaceTypes(fsTypes, coefs);
@@ -1350,11 +1586,11 @@ void RipleyDomain::assemblePDE(escript::AbstractSystemMatrix* mat,
     
     int fs=fsTypes[0];
     if (fs != Elements && fs != ReducedElements)
-        throw RipleyException("assemblePDE: illegal function space type for coefficients");
+        throw ValueError("assemblePDE: illegal function space type for coefficients");
 
     for (vector<int>::const_iterator it=fsTypes.begin()+1; it!=fsTypes.end(); it++) {
         if (*it != fs) {
-            throw RipleyException("assemblePDE: coefficient function spaces don't match");
+            throw ValueError("assemblePDE: coefficient function spaces don't match");
         }
     }
 
@@ -1367,15 +1603,20 @@ void RipleyDomain::assemblePDE(escript::AbstractSystemMatrix* mat,
         }
     } else {
         if (!rhs.isEmpty() && rhs.getDataPointSize() != mat->getRowBlockSize())
-            throw RipleyException("assemblePDE: matrix row block size and number of components of right hand side don't match");
+            throw ValueError("assemblePDE: matrix row block size and number of components of right hand side don't match");
         numEq = mat->getRowBlockSize();
         numComp = mat->getColumnBlockSize();
     }
 
     if (numEq != numComp)
-        throw RipleyException("assemblePDE: number of equations and number of solutions don't match");
+        throw ValueError("assemblePDE: number of equations and number of solutions don't match");
 
-    //TODO: check shape and num samples of coeffs
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(mat);
+    if (tm) {
+        tm->resumeFill();
+    }
+#endif
 
     if (numEq==1) {
         if (fs==ReducedElements) {
@@ -1390,6 +1631,12 @@ void RipleyDomain::assemblePDE(escript::AbstractSystemMatrix* mat,
             assembler->assemblePDESystem(mat, rhs, coefs);
         }
     }
+
+#ifdef ESYS_HAVE_TRILINOS
+    if (tm) {
+        tm->fillComplete(true);
+    }
+#endif
 }
 
 //private
@@ -1397,7 +1644,7 @@ void RipleyDomain::assemblePDEBoundary(escript::AbstractSystemMatrix* mat,
         escript::Data& rhs, const DataMap& coefs, Assembler_ptr assembler) const
 {
     if (rhs.isEmpty() && isNotEmpty("y", coefs))
-        throw RipleyException("assemblePDEBoundary: y provided but no right hand side vector given");
+        throw ValueError("assemblePDEBoundary: y provided but no right hand side vector given");
 
     int fs=-1;
     if (isNotEmpty("d", coefs))
@@ -1407,14 +1654,14 @@ void RipleyDomain::assemblePDEBoundary(escript::AbstractSystemMatrix* mat,
         if (fs == -1)
             fs = iy->second.getFunctionSpace().getTypeCode();
         else if (fs != iy->second.getFunctionSpace().getTypeCode())
-            throw RipleyException("assemblePDEBoundary: coefficient function spaces don't match");
+            throw ValueError("assemblePDEBoundary: coefficient function spaces don't match");
     }
     if (fs==-1) {
         return;
     }
 
     if (fs != FaceElements && fs != ReducedFaceElements)
-        throw RipleyException("assemblePDEBoundary: illegal function space type for coefficients");
+        throw ValueError("assemblePDEBoundary: illegal function space type for coefficients");
 
     int numEq, numComp;
     if (!mat) {
@@ -1425,15 +1672,20 @@ void RipleyDomain::assemblePDEBoundary(escript::AbstractSystemMatrix* mat,
         }
     } else {
         if (!rhs.isEmpty() && rhs.getDataPointSize() != mat->getRowBlockSize())
-            throw RipleyException("assemblePDEBoundary: matrix row block size and number of components of right hand side don't match");
+            throw ValueError("assemblePDEBoundary: matrix row block size and number of components of right hand side don't match");
         numEq = mat->getRowBlockSize();
         numComp = mat->getColumnBlockSize();
     }
 
     if (numEq != numComp)
-        throw RipleyException("assemblePDEBoundary: number of equations and number of solutions don't match");
+        throw ValueError("assemblePDEBoundary: number of equations and number of solutions don't match");
 
-    //TODO: check shape and num samples of coeffs
+#ifdef ESYS_HAVE_TRILINOS
+    TrilinosMatrixAdapter* tm = dynamic_cast<TrilinosMatrixAdapter*>(mat);
+    if (tm) {
+        tm->resumeFill();
+    }
+#endif
 
     if (numEq==1) {
         if (fs==ReducedFaceElements)
@@ -1446,6 +1698,12 @@ void RipleyDomain::assemblePDEBoundary(escript::AbstractSystemMatrix* mat,
         else
             assembler->assemblePDEBoundarySystem(mat, rhs, coefs);
     }
+
+#ifdef ESYS_HAVE_TRILINOS
+    if (tm) {
+        tm->fillComplete(true);
+    }
+#endif
 }
 
 void RipleyDomain::assemblePDEDirac(escript::AbstractSystemMatrix* mat,
@@ -1468,7 +1726,7 @@ void RipleyDomain::assemblePDEDirac(escript::AbstractSystemMatrix* mat,
         }
     } else {
         if (!rhs.isEmpty() && rhs.getDataPointSize() != mat->getRowBlockSize())
-            throw RipleyException("assemblePDEDirac: matrix row block size "
+            throw ValueError("assemblePDEDirac: matrix row block size "
                     "and number of components of right hand side don't match");
         nEq = mat->getRowBlockSize();
         nComp = mat->getColumnBlockSize();
@@ -1504,7 +1762,7 @@ bool RipleyDomain::probeInterpolationAcross(int fsType_source,
 void RipleyDomain::interpolateAcross(escript::Data& target,
                                      const escript::Data& source) const
 {
-    throw RipleyException("interpolateAcross() not supported");
+    throw NotImplementedError("interpolateAcross() not supported");
 }
 
 // Expecting ("gaussian", radius, sigma)
diff --git a/ripley/src/RipleyDomain.h b/ripley/src/RipleyDomain.h
index 775e998..37711b5 100644
--- a/ripley/src/RipleyDomain.h
+++ b/ripley/src/RipleyDomain.h
@@ -17,19 +17,26 @@
 #ifndef __RIPLEY_DOMAIN_H__
 #define __RIPLEY_DOMAIN_H__
 
-#include <boost/python/tuple.hpp>
-#include <boost/python/list.hpp>
-
 #include <ripley/Ripley.h>
-#include <ripley/RipleyException.h>
 #include <ripley/AbstractAssembler.h>
+#include <ripley/RipleyException.h>
 
 #include <escript/AbstractContinuousDomain.h>
 #include <escript/Data.h>
 #include <escript/FunctionSpace.h>
 #include <escript/SubWorld.h>
 
+#ifdef ESYS_HAVE_PASO
+#include <paso/Coupler.h>
 #include <paso/SystemMatrix.h>
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+#include <trilinoswrap/types.h>
+#endif
+
+#include <boost/python/list.hpp>
+#include <boost/python/tuple.hpp>
 
 namespace ripley {
 
@@ -40,9 +47,18 @@ enum assembler_t {
 };
 
 enum SystemMatrixType {
-    SMT_PASO = 1024,
-    SMT_CUSP = 2048,
-    SMT_SYMMETRIC = 4096
+    SMT_PASO = 1<<8,
+    SMT_CUSP = 1<<9,
+    SMT_TRILINOS = 1<<10,
+    SMT_SYMMETRIC = 1<<15,
+    SMT_COMPLEX = 1<<16,
+    SMT_UNROLL = 1<<17
+};
+
+enum DecompositionPolicy {
+    DECOMP_ADD_ELEMENTS,
+    DECOMP_EXPAND,
+    DECOMP_STRICT
 };
 
 /**
@@ -97,6 +113,15 @@ public:
     */
     ~RipleyDomain();
 
+    static void setDecompositionPolicy(DecompositionPolicy value);
+    static DecompositionPolicy getDecompositionPolicy();
+
+    /**
+     \brief
+     returns a reference to the MPI information wrapper for this domain
+    */
+    virtual escript::JMPI getMPI() const { return m_mpiInfo; }
+
     /**
        \brief
        returns the number of processors used for this domain
@@ -197,7 +222,7 @@ public:
         if (m_tagMap.find(name) != m_tagMap.end()) {
             return m_tagMap.find(name)->second;
         } else {
-            throw RipleyException("getTag: invalid tag name");
+            throw escript::ValueError("getTag: invalid tag name");
         }
     }
 
@@ -396,7 +421,7 @@ public:
        return a FunctionOnContactZero code
     */
     virtual int getFunctionOnContactZeroCode() const {
-        throw RipleyException("Ripley does not support contact elements");
+        throw escript::NotImplementedError("Ripley does not support contact elements");
     }
 
     /**
@@ -404,7 +429,7 @@ public:
        returns a FunctionOnContactZero code with reduced integration order
     */
     virtual int getReducedFunctionOnContactZeroCode() const {
-        throw RipleyException("Ripley does not support contact elements");
+        throw escript::NotImplementedError("Ripley does not support contact elements");
     }
 
     /**
@@ -412,7 +437,7 @@ public:
        returns a FunctionOnContactOne code
     */
     virtual int getFunctionOnContactOneCode() const {
-        throw RipleyException("Ripley does not support contact elements");
+        throw escript::NotImplementedError("Ripley does not support contact elements");
     }
 
     /**
@@ -420,7 +445,7 @@ public:
        returns a FunctionOnContactOne code with reduced integration order
     */
     virtual int getReducedFunctionOnContactOneCode() const {
-        throw RipleyException("Ripley does not support contact elements");
+        throw escript::NotImplementedError("Ripley does not support contact elements");
     }
 
     /**
@@ -615,14 +640,12 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const = 0;
 
-#ifdef USE_BOOSTIO
     /**
        \brief
        reads grid data from a compressed raw binary file into a Data object
     */
     virtual void readBinaryGridFromZipped(escript::Data& out,
                std::string filename, const ReaderParameters& params) const = 0;
-#endif
 
     /**
        \brief
@@ -717,7 +740,7 @@ public:
     */
     virtual Assembler_ptr createAssembler(std::string type,
                                           const DataMap& options) const {
-        throw RipleyException("Domain does not support custom assemblers");
+        throw escript::NotImplementedError("Domain does not support custom assemblers");
     }
 
     /**
@@ -730,7 +753,7 @@ public:
 protected:
     int m_numDim;
     StatusType m_status;
-    esysUtils::JMPI m_mpiInfo;
+    escript::JMPI m_mpiInfo;
     TagMap m_tagMap;
     mutable std::vector<int> m_nodeTags, m_nodeTagsInUse;
     mutable std::vector<int> m_elementTags, m_elementTagsInUse;
@@ -751,13 +774,34 @@ protected:
     // this is const because setTags is const
     void updateTagsInUse(int fsType) const;
 
+#ifdef ESYS_HAVE_PASO
+    /// creates a Paso connector
+    void createPasoConnector(const RankVector& neighbour,
+                             const IndexVector& offsetInSharedSend,
+                             const IndexVector& offsetInSharedRecv,
+                             const IndexVector& sendShared,
+                             const IndexVector& recvShared);
+
+    /// returns a Paso connector required for data transfer and distributed
+    /// system matrices
+    paso::Connector_ptr getPasoConnector() const { return m_connector; }
+
     /// allocates and returns a Paso pattern structure
     paso::Pattern_ptr createPasoPattern(const std::vector<IndexVector>& indices,
                                         dim_t N) const;
+#endif
+
+#ifdef ESYS_HAVE_TRILINOS
+    /// creates and returns a Trilinos CRS graph suitable to build a sparse
+    /// matrix
+    esys_trilinos::const_TrilinosGraph_ptr createTrilinosGraph(
+            const IndexVector& myRows, const IndexVector& myColumns) const;
+#endif
 
+    template<typename Scalar>
     void addToSystemMatrix(escript::AbstractSystemMatrix* mat,
                            const IndexVector& nodes, dim_t numEq,
-                           const DoubleVector& array) const;
+                           const std::vector<Scalar>& array) const;
 
     void addPoints(const std::vector<double>& coords,
                    const std::vector<int>& tags);
@@ -788,10 +832,20 @@ protected:
     /// copies the integrals of the function defined by 'arg' into 'integrals'
     virtual void assembleIntegrate(DoubleVector& integrals, const escript::Data& arg) const = 0;
 
+#ifdef ESYS_HAVE_TRILINOS
+    /// returns the Trilinos matrix graph
+    virtual esys_trilinos::const_TrilinosGraph_ptr getTrilinosGraph() const = 0;
+#endif
+
+    /// returns occupied matrix column indices for all matrix rows
+    virtual std::vector<IndexVector> getConnections(bool includeShared) const = 0;
+
+#ifdef ESYS_HAVE_PASO
     /// returns the Paso system matrix pattern
     virtual paso::SystemMatrixPattern_ptr getPasoMatrixPattern(
                                               bool reducedRowOrder,
                                               bool reducedColOrder) const = 0;
+#endif
 
     /// interpolates data on nodes in 'in' onto (reduced) elements in 'out'
     virtual void interpolateNodesOnElements(escript::Data& out,
@@ -807,14 +861,22 @@ protected:
     virtual void nodesToDOF(escript::Data& out, const escript::Data& in) const = 0;
 
     /// converts data on degrees of freedom in 'in' to nodes in 'out'
-    virtual void dofToNodes(escript::Data& out, const escript::Data& in) const = 0;
+    virtual void dofToNodes(escript::Data& out, const escript::Data& in) const;
 
     virtual dim_t getDofOfNode(dim_t node) const = 0;
 
 private:
+    static DecompositionPolicy m_decompPolicy;
+
+#ifdef ESYS_HAVE_PASO
+    // Paso connector used by the system matrix and to interpolate DOF to
+    // nodes
+    paso::Connector_ptr m_connector;
+
     /// paso version of adding element matrices to System Matrix
-    void addToSystemMatrix(paso::SystemMatrix_ptr in, const IndexVector& nodes,
-                           dim_t numEq, const DoubleVector& array) const;
+    void addToPasoMatrix(paso::SystemMatrix* in, const IndexVector& nodes,
+                         dim_t numEq, const DoubleVector& array) const;
+#endif
 
     /// calls the right PDE assembly routines after performing input checks
     void assemblePDE(escript::AbstractSystemMatrix* mat, escript::Data& rhs,
diff --git a/ripley/src/RipleyException.cpp b/ripley/src/RipleyException.cpp
deleted file mode 100644
index 90e6dc6..0000000
--- a/ripley/src/RipleyException.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#include <ripley/RipleyException.h>
-
-namespace ripley {
-
-const std::string RipleyException::exceptionNameValue("RipleyException");
-
-const std::string& RipleyException::exceptionName() const
-{
-    return exceptionNameValue;
-}
-
-} // namespace ripley
-
diff --git a/ripley/src/RipleyException.h b/ripley/src/RipleyException.h
index 485bbfc..b271559 100644
--- a/ripley/src/RipleyException.h
+++ b/ripley/src/RipleyException.h
@@ -18,7 +18,7 @@
 #define __RIPLEY_EXCEPTION_H__
 
 #include <ripley/system_dep.h>
-#include <esysUtils/EsysException.h>
+#include <escript/EsysException.h>
 
 namespace ripley {
 
@@ -26,63 +26,10 @@ namespace ripley {
    \brief
    RipleyException exception class.
 */
-class RIPLEY_DLL_API RipleyException : public esysUtils::EsysException
+class RipleyException : public escript::EsysException
 {
-protected:
-    typedef EsysException Parent;
-
 public:
-    /**
-       \brief
-       Default constructor for the exception.
-    */
-    RipleyException() : Parent() { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    RipleyException(const char *cstr) : Parent(cstr) { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    RipleyException(const std::string &str) : Parent(str) { updateMessage(); }
-
-    /**
-       \brief
-       Copy Constructor.
-    */
-    RipleyException(const RipleyException &other) : Parent(other)
-    {
-        updateMessage();
-    }
-
-    /// Destructor
-    virtual ~RipleyException() THROW(NO_ARG) {}
-
-    /**
-       \brief
-       Assignment operator.
-    */
-    inline RipleyException& operator=(const RipleyException &other ) THROW(NO_ARG)
-    {
-        Parent::operator=(other);
-        updateMessage();
-        return *this;
-    }
-
-    /**
-       \brief
-       Returns the name of the exception.
-    */
-    virtual const std::string& exceptionName() const;
-
-private:
-    //
-    // the exception name is immutable and class-wide.
-    static const std::string exceptionNameValue;
+    RipleyException(const std::string& str) : escript::EsysException(str) {}
 };
 
 } // end of namespace ripley
diff --git a/ripley/src/RipleySystemMatrix.cu b/ripley/src/RipleySystemMatrix.cu
index fab759d..7f040e0 100644
--- a/ripley/src/RipleySystemMatrix.cu
+++ b/ripley/src/RipleySystemMatrix.cu
@@ -17,7 +17,7 @@
 #include "RipleySystemMatrix.h" 
 #include "RipleyException.h" 
 
-#include <esysUtils/index.h>
+#include <escript/index.h>
 #include <escript/Data.h>
 #include <escript/SolverOptions.h>
 
@@ -88,7 +88,7 @@ void SystemMatrix::checkCUDA()
 #endif
 }
 
-SystemMatrix::SystemMatrix(esysUtils::JMPI mpiInfo, int blocksize,
+SystemMatrix::SystemMatrix(escript::JMPI mpiInfo, int blocksize,
                            const escript::FunctionSpace& fs, int nRows,
                            const IndexVector& diagonalOffsets, bool symm) :
     AbstractSystemMatrix(blocksize, fs, blocksize, fs),
@@ -250,6 +250,7 @@ void SystemMatrix::runSolver(LinearOperator& A, Vector& x, Vector& b,
     double solvertime = gettime()-T0;
 
     if (monitor.converged()) {
+        sb.updateDiagnostics("converged", true);
         if (sb.isVerbose()) {
             std::cout << "Solver converged to " << monitor.relative_tolerance()
                 << " relative tolerance after " << monitor.iteration_count()
@@ -261,6 +262,8 @@ void SystemMatrix::runSolver(LinearOperator& A, Vector& x, Vector& b,
             << " to " << monitor.relative_tolerance() << " rel. tolerance."
             << std::endl;
     }
+    sb.updateDiagnostics("num_iter", monitor.iteration_count());
+    sb.updateDiagnostics("net_time", solvertime);
 }
 
 void SystemMatrix::setToSolution(escript::Data& out, escript::Data& in,
@@ -281,7 +284,7 @@ void SystemMatrix::setToSolution(escript::Data& out, escript::Data& in,
     }
 
     options.attr("resetDiagnostics")();
-    escript::SolverBuddy sb = bp::extract<escript::SolverBuddy>(options);
+    escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy&>(options);
     out.expand();
     in.expand();
 
@@ -460,7 +463,7 @@ void SystemMatrix::saveHB(const std::string& filename) const
     throw RipleyException("Harwell-Boeing interface not available.");
 }
 
-void SystemMatrix::resetValues()
+void SystemMatrix::resetValues(bool preserveSolverData)
 {
     mat.values.values.assign(mat.values.values.size(), 0.);
     matrixAltered = true;
diff --git a/ripley/src/RipleySystemMatrix.h b/ripley/src/RipleySystemMatrix.h
index d6cfe65..1225d45 100644
--- a/ripley/src/RipleySystemMatrix.h
+++ b/ripley/src/RipleySystemMatrix.h
@@ -38,7 +38,7 @@ typedef cusp::array1d<double, cusp::device_memory> DeviceVectorType;
 class SystemMatrix : public escript::AbstractSystemMatrix
 {
 public:
-    SystemMatrix(esysUtils::JMPI mpiInfo, int blocksize,
+    SystemMatrix(escript::JMPI mpiInfo, int blocksize,
                  const escript::FunctionSpace& fs, int nRows,
                  const IndexVector& diagonalOffsets, bool symmetric);
 
@@ -52,7 +52,7 @@ public:
 
     virtual void saveHB(const std::string& filename) const;
 
-    virtual void resetValues();
+    virtual void resetValues(bool preserveSolverData = false);
 
     void add(const IndexVector& rowIndex, const std::vector<double>& array);
 
@@ -76,7 +76,7 @@ private:
     /// GPU device IDs supporting CUDA
     static std::vector<int> cudaDevices;
 
-    esysUtils::JMPI m_mpiInfo;
+    escript::JMPI m_mpiInfo;
     HostMatrixType mat;
     mutable DeviceMatrixType dmat;
     mutable bool matrixAltered;
diff --git a/ripley/src/SConscript b/ripley/src/SConscript
index c8baa01..5fc384b 100644
--- a/ripley/src/SConscript
+++ b/ripley/src/SConscript
@@ -14,17 +14,9 @@
 #
 ##############################################################################
 
-import os
 Import('*')
 
-local_env = env.Clone()
-py_wrapper_local_env = env.Clone()
-local_unroll_env = env.Clone()
-
-
-# Remove the shared library prefix on all platforms - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'ripley'
 
 sources = """
     blocktools.cpp
@@ -39,7 +31,6 @@ sources = """
     MultiRectangle.cpp
     Rectangle.cpp
     RipleyDomain.cpp
-    RipleyException.cpp
     WaveAssembler2D.cpp
     WaveAssembler3D.cpp
 """.split()
@@ -65,59 +56,54 @@ headers = """
     WaveAssembler3D.h
 """.split()
 
-local_env.Prepend(LIBS = ['pasowrap', 'escript', 'paso', 'esysUtils'])
-if local_env['silo']:
-    local_env.Append(CPPDEFINES = ['USE_SILO'])
-    local_env.AppendUnique(LIBS = env['silo_libs'])
+local_env = env.Clone()
 
-if local_env['cuda']:
+if IS_WINDOWS:
+    local_env.Append(CPPDEFINES = ['RIPLEY_EXPORTS'])
+
+# collect dependencies for other modules
+ripleylibs = []
+ripleylibs += env['escript_libs']
+if env['paso']:
+    ripleylibs += env['paso_libs']
+if env['trilinos']:
+    ripleylibs += env['trilinoswrap_libs']
+if env['silo']:
+    ripleylibs += env['silo_libs']
+if env['compressed_files']:
+    ripleylibs += env['compression_libs']
+if env['cuda']:
     sources.append('RipleySystemMatrix.cu')
     if env['openmp']:
         local_env.Append(CPPDEFINES = ['THRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_OMP'])
-
     local_env['NVCCFLAGS'] += ' -w'
     local_env['SHNVCCFLAGS']  += ' -w'
-    local_env.Append(CPPDEFINES = ['USE_CUDA', 'THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA'])
+    local_env.Append(CPPDEFINES = ['THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA'])
     #TODO: This needs a config option:
     local_env.Append(CPPDEFINES = ['CUSP_USE_TEXTURE_MEMORY'])
-    local_env.Append(LIBS = ['cudart'])
+    ripleylibs.append('cudart')
 
-if IS_WINDOWS:
-    local_env.Append(CPPDEFINES = ['RIPLEY_EXPORTS'])
+local_env.PrependUnique(LIBS = ripleylibs)
 
-module_name = 'ripley'
-
-lib = local_env.SharedLibrary(module_name, sources)
-env.Alias('build_ripley_lib', lib)
-
-include_path = Dir('ripley', local_env['incinstall'])
+env['ripley_libs'] = [module_name] + ripleylibs
 
+include_path = Dir(module_name, local_env['incinstall'])
 hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_ripley_headers', hdr_inst)
 
+lib = local_env.SharedLibrary(module_name, sources)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_ripley_lib', lib_inst)
 
 ### Python wrapper ###
-py_wrapper_local_env.Prepend(LIBS = ['ripley', 'pasowrap', 'escript', 'paso', 'esysUtils'])
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'ripleycpp.cpp')
-env.Alias('build_ripleycpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_ripleycpp_lib', mod_inst)
-
-# configure python module
-local_env.SConscript(dirs = ['#/ripley/py_src'], variant_dir='py', duplicate=0)
-
-# configure unit tests
-local_env.SConscript(dirs = ['#/ripley/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
+py_env = local_env.Clone()
+py_env.PrependUnique(LIBS = [module_name])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'ripleycpp.cpp')
+
+mod_path = Dir(module_name, local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
+
+build = env.Alias('build_ripley', [hdr_inst, lib, py_lib])
+if env['cuda']:
+    env.Alias('build_ripley', 'install_cusp_headers')
+env.Alias('install_ripley', ['build_ripley', lib_inst, mod_inst])
 
diff --git a/ripley/src/WaveAssembler2D.cpp b/ripley/src/WaveAssembler2D.cpp
index 0ac45ab..702b10e 100644
--- a/ripley/src/WaveAssembler2D.cpp
+++ b/ripley/src/WaveAssembler2D.cpp
@@ -14,13 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <ripley/WaveAssembler2D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/index.h>
+
 using escript::Data;
 
 namespace ripley {
@@ -33,17 +31,17 @@ WaveAssembler2D::WaveAssembler2D(escript::const_Domain_ptr dom,
     m_NE(NE),
     m_NN(NN)
 {
-    domain = boost::static_pointer_cast<const Rectangle>(dom);
+    domain = REFCOUNTNS::static_pointer_cast<const Rectangle>(dom);
     isHTI = isVTI = false;
     DataMap::const_iterator a = c.find("c12"), b = c.find("c23");
     if (c.find("c11") == c.end()
                 || c.find("c13") == c.end() || c.find("c33") == c.end()
                 || c.find("c44") == c.end() || c.find("c66") == c.end()
                 || (a == c.end() && b == c.end()))
-        throw RipleyException("required constants missing for WaveAssembler");
+        throw escript::ValueError("required constants missing for WaveAssembler");
 
     if (a != c.end() && b != c.end()) {
-        throw RipleyException("WaveAssembler2D() doesn't support general "
+        throw escript::NotImplementedError("WaveAssembler2D() doesn't support general "
                               "form waves (yet)");
     } else if (a == c.end()) {
         c23 = b->second;
@@ -64,7 +62,7 @@ WaveAssembler2D::WaveAssembler2D(escript::const_Domain_ptr dom,
             || fs != c33.getFunctionSpace().getTypeCode()
             || fs != c44.getFunctionSpace().getTypeCode()
             || fs != c66.getFunctionSpace().getTypeCode()) {
-        throw RipleyException("C tensor elements are in mismatching function spaces");
+        throw escript::ValueError("C tensor elements are in mismatching function spaces");
     }
 }
 
@@ -89,7 +87,7 @@ void WaveAssembler2D::assemblePDESystem(escript::AbstractSystemMatrix* mat,
                                         Data& rhs, const DataMap& coefs) const
 {
     if (isNotEmpty("X", coefs))
-        throw RipleyException("Coefficient X was given to WaveAssembler "
+        throw escript::ValueError("Coefficient X was given to WaveAssembler "
                 "unexpectedly. Specialised domains can't be used for general "
                 "assemblage.");
     const Data& A = unpackData("A", coefs);
@@ -100,7 +98,7 @@ void WaveAssembler2D::assemblePDESystem(escript::AbstractSystemMatrix* mat,
     const Data& du = unpackData("du", coefs);
 
     if ((!du.isEmpty()) && du.getFunctionSpace().getTypeCode() != c11.getFunctionSpace().getTypeCode()) {
-        throw RipleyException("WaveAssembler3D: du and C tensor in mismatching function spaces");
+        throw escript::ValueError("WaveAssembler2D: du and C tensor in mismatching function spaces");
     }
 
     dim_t numEq, numComp;
diff --git a/ripley/src/WaveAssembler2D.h b/ripley/src/WaveAssembler2D.h
index 735ddde..e6daeb2 100644
--- a/ripley/src/WaveAssembler2D.h
+++ b/ripley/src/WaveAssembler2D.h
@@ -36,31 +36,31 @@ public:
 
     void assemblePDESingle(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESingle() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESingle() not supported by this assembler");
     }
     void assemblePDEBoundarySingle(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySingle() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySingle() not supported by this assembler");
     }
     void assemblePDESingleReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESingleReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESingleReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySingleReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySingleReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySingleReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySystem(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySystem() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySystem() not supported by this assembler");
     }
     void assemblePDESystemReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESystemReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESystemReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySystemReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySystemReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySystemReduced() not supported by this assembler");
     }
 
     void collateFunctionSpaceTypes(std::vector<int>& fsTypes,
@@ -68,7 +68,7 @@ public:
 
 private:
     DataMap c;
-    boost::shared_ptr<const Rectangle> domain;
+    POINTER_WRAPPER_CLASS(const Rectangle) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/WaveAssembler3D.cpp b/ripley/src/WaveAssembler3D.cpp
index e1cc64f..1cf502b 100644
--- a/ripley/src/WaveAssembler3D.cpp
+++ b/ripley/src/WaveAssembler3D.cpp
@@ -14,13 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <ripley/WaveAssembler3D.h>
 #include <ripley/domainhelpers.h>
 
+#include <escript/index.h>
+
 using escript::Data;
 
 namespace ripley {
@@ -33,18 +31,18 @@ WaveAssembler3D::WaveAssembler3D(escript::const_Domain_ptr dom,
     m_NE(NE),
     m_NN(NN)
 {
-    domain = boost::static_pointer_cast<const Brick>(dom);
+    domain = REFCOUNTNS::static_pointer_cast<const Brick>(dom);
     isHTI = isVTI = false;
     DataMap::const_iterator a = c.find("c12"), b = c.find("c23");
     if (c.find("c11") == c.end()
                 || c.find("c13") == c.end() || c.find("c33") == c.end()
                 || c.find("c44") == c.end() || c.find("c66") == c.end()
                 || (a == c.end() && b == c.end()))
-        throw RipleyException("required constants missing for WaveAssembler");
+        throw escript::ValueError("required constants missing for WaveAssembler");
 
     if (a != c.end() && b != c.end()) {
-        throw RipleyException("WaveAssembler3D() doesn't support general "
-                              "form waves (yet)");
+        throw escript::NotImplementedError("WaveAssembler3D() doesn't support "
+                                           "general form waves (yet)");
     } else if (a == c.end()) {
         c23 = b->second;
         isHTI = true;
@@ -64,7 +62,7 @@ WaveAssembler3D::WaveAssembler3D(escript::const_Domain_ptr dom,
             || fs != c33.getFunctionSpace().getTypeCode()
             || fs != c44.getFunctionSpace().getTypeCode()
             || fs != c66.getFunctionSpace().getTypeCode()) {
-        throw RipleyException("C tensor elements are in mismatching function spaces");
+        throw escript::ValueError("C tensor elements are in mismatching function spaces");
     }
 }
 
@@ -101,7 +99,7 @@ void WaveAssembler3D::assemblePDESystem(escript::AbstractSystemMatrix* mat,
     const Data& du = unpackData("du", coefs);
 
     if ((!du.isEmpty()) && du.getFunctionSpace().getTypeCode() != c11.getFunctionSpace().getTypeCode()) {
-        throw RipleyException("WaveAssembler3D: du and C tensor in mismatching function spaces");
+        throw escript::ValueError("WaveAssembler3D: du and C tensor in mismatching function spaces");
     }
 
     dim_t numEq, numComp;
diff --git a/ripley/src/WaveAssembler3D.h b/ripley/src/WaveAssembler3D.h
index 4bfd9c4..f0c28fd 100644
--- a/ripley/src/WaveAssembler3D.h
+++ b/ripley/src/WaveAssembler3D.h
@@ -37,31 +37,31 @@ public:
 
     void assemblePDESingle(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESingle() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESingle() not supported by this assembler");
     }
     void assemblePDEBoundarySingle(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySingle() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySingle() not supported by this assembler");
     }
     void assemblePDESingleReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESingleReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESingleReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySingleReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySingleReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySingleReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySystem(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySystem() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySystem() not supported by this assembler");
     }
     void assemblePDESystemReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDESystemReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDESystemReduced() not supported by this assembler");
     }
     void assemblePDEBoundarySystemReduced(escript::AbstractSystemMatrix* mat,
                            escript::Data& rhs, const DataMap& coefs) const {
-        throw RipleyException("assemblePDEBoundarySystemReduced() not supported by this assembler");
+        throw escript::NotImplementedError("assemblePDEBoundarySystemReduced() not supported by this assembler");
     }
 
     void collateFunctionSpaceTypes(std::vector<int>& fsTypes,
@@ -69,7 +69,7 @@ public:
 
 private:
     DataMap c;
-    boost::shared_ptr<const Brick> domain;
+    POINTER_WRAPPER_CLASS(const Brick) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/ripley/src/blocktools.cpp b/ripley/src/blocktools.cpp
index fea3f0d..43acbe3 100644
--- a/ripley/src/blocktools.cpp
+++ b/ripley/src/blocktools.cpp
@@ -14,10 +14,10 @@
 *
 *****************************************************************************/
 
+#include "blocktools.h"
 
-#include <iostream>	// for the debug method
 #include <cstring>	// for memset
-#include "blocktools.h"
+#include <iostream>	// for the debug method
 
 using namespace std;
 
@@ -436,4 +436,3 @@ unsigned char getSrcBuffID(unsigned char destx, unsigned char desty, unsigned ch
     return sourcex+sourcey*3+sourcez*9;
 }
 
-
diff --git a/ripley/src/blocktools.h b/ripley/src/blocktools.h
index db7e800..1234fd5 100644
--- a/ripley/src/blocktools.h
+++ b/ripley/src/blocktools.h
@@ -17,10 +17,6 @@
 #ifndef __RIPLEY_BLOCKTOOLS_H__
 #define __RIPLEY_BLOCKTOOLS_H__
 
-
-#include <vector>
-
-
 /* This file contains two main classes for dealing with a large 3D region which
  * has been divided into a 3D Grid of Blocks (usually to be distributed).
  * Each block is divided into 27 subblocks. The first and last subblocks in
@@ -58,8 +54,11 @@
  *                     an integral type.
  */
 
-#include <esysUtils/Esys_MPI.h>
-typedef Esys_MPI_rank neighbourID_t; // This should be the MPI_rank type
+#include <escript/EsysMPI.h>
+
+#include <vector>
+
+typedef int neighbourID_t; // This should be the MPI_rank type
 typedef unsigned coord_t;            // if we ever get more than 2^32 ranks, we have other problems
 
 typedef std::pair<neighbourID_t, int> neighpair;
diff --git a/ripley/src/blocktools2.cpp b/ripley/src/blocktools2.cpp
index ff1115d..26808c4 100644
--- a/ripley/src/blocktools2.cpp
+++ b/ripley/src/blocktools2.cpp
@@ -14,10 +14,10 @@
 *
 *****************************************************************************/
 
+#include "blocktools.h"
 
-#include <iostream>	// for the debug method
 #include <cstring>	// for memset
-#include "blocktools.h"
+#include <iostream>	// for the debug method
 
 
 BlockGrid2::BlockGrid2(coord_t x, coord_t y)
@@ -385,6 +385,3 @@ unsigned char getSrcBuffID2(unsigned char destx, unsigned char desty, bool delta
     return sourcex+sourcey*3;
 }
 
-
-
-
diff --git a/ripley/src/domainhelpers.cpp b/ripley/src/domainhelpers.cpp
index 60be56e..1391f11 100644
--- a/ripley/src/domainhelpers.cpp
+++ b/ripley/src/domainhelpers.cpp
@@ -14,15 +14,11 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <ripley/domainhelpers.h>
 #include <ripley/RipleyException.h>
 #include <cmath>
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/filtering_stream.hpp>
 #endif
@@ -43,7 +39,7 @@ void factorise(std::vector<int>& factors, int product)
     }
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 std::vector<char> unzip(const std::vector<char>& compressed)
 {
     std::vector<char> decompressed = std::vector<char>();
diff --git a/ripley/src/domainhelpers.h b/ripley/src/domainhelpers.h
index 314e737..625f4ad 100644
--- a/ripley/src/domainhelpers.h
+++ b/ripley/src/domainhelpers.h
@@ -59,13 +59,12 @@ inline void doublyLink(std::vector<ripley::IndexVector>& va,
 */
 void factorise(std::vector<int>& factors, int product);
 
-
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 /**
     converts the given gzip compressed char vector into an uncompressed form 
 */
 std::vector<char> unzip(const std::vector<char>& compressed);
-#endif // USE_BOOSTIO
+#endif // ESYS_HAVE_BOOST_IO
 
 } // namespace ripley
 
diff --git a/ripley/src/generate_assamblage.py b/ripley/src/generate_assamblage.py
index e230225..8a097c7 100644
--- a/ripley/src/generate_assamblage.py
+++ b/ripley/src/generate_assamblage.py
@@ -338,7 +338,7 @@ def createGradientCode(F, x, Q, gridoffset="", loopindex=(0,1,2)):
                      k1+=",1"
                   elif  loopindex[i] == -2:
                      k1+=",M%s-1"%i
-            TXT+="const register double %s = in[INDEX2(i,INDEX%s(%s, %s),NCOMP)];\n"%(s.name,DIM,k1[1:],M1)
+            TXT+="const double %s = in[INDEX2(i,INDEX%s(%s, %s),NCOMP)];\n"%(s.name,DIM,k1[1:],M1)
    #  interpolation to quadrature points
    for q in range(len(Q)):
          IDX2="INDEX%s(%s,%s)"%(DIM,k,N)
@@ -394,7 +394,7 @@ def createCode(F, x, Q, gridoffset="", loopindex=(0,1,2)):
                      k1+=",1"
                   elif  loopindex[i] == -2:
                      k1+=",M%s-1"%i
-            TXT+="const register double %s = in[INDEX2(i,INDEX%s(%s, %s),NCOMP)];\n"%(s.name,DIM,k1[1:],M1)
+            TXT+="const double %s = in[INDEX2(i,INDEX%s(%s, %s),NCOMP)];\n"%(s.name,DIM,k1[1:],M1)
    #  interpolation to quadrature points
    for q in range(len(Q)):
       IDX2="INDEX%s(%s,%s)"%(DIM,k,N)
@@ -457,9 +457,9 @@ def generatePDECode(DATA_A, EM,GLOBAL_TMP, system=False):
 
         OUT2=""
         for p in LOCAL_TMP:
-             OUT2+="  const register double %s = %s;\n"%(LOCAL_TMP[p],ccode(p))
+             OUT2+="  const double %s = %s;\n"%(LOCAL_TMP[p],ccode(p))
         for p in LOCAL2_TMP:
-             OUT2+="  const register double %s = %s;\n"%(LOCAL2_TMP[p],ccode(p))
+             OUT2+="  const double %s = %s;\n"%(LOCAL2_TMP[p],ccode(p))
         return OUT2+OUT
 
 def makePDE(S, x, Q, W, DIM=2, system=False):
@@ -492,9 +492,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_A.append(A)
                   if system:
-                      CODE2+="const register double %s = A_p[INDEX5(k,%s,m,%s,%s, p.numEqu,%s,p.numComp,%s)];\n"%(A_name, di, dj, q, DIM, DIM)
+                      CODE2+="const double %s = A_p[INDEX5(k,%s,m,%s,%s, p.numEqu,%s,p.numComp,%s)];\n"%(A_name, di, dj, q, DIM, DIM)
                   else:
-                      CODE2+="const register double %s = A_p[INDEX3(%s,%s,%s,%s,%s)];\n"%(A_name, di, dj, q, DIM, DIM)
+                      CODE2+="const double %s = A_p[INDEX3(%s,%s,%s,%s,%s)];\n"%(A_name, di, dj, q, DIM, DIM)
                    for i in range(len(S)):
                       for j in range(len(S)):
                           EM[(i,j)] = EM[(i,j)] + (A * W[q] * diff(S[i],x[di]) * diff(S[j],x[dj])).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
@@ -518,9 +518,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_A.append(A)
              if system:
-                      CODE2+="const register double %s = A_p[INDEX4(k,%s,m,%s p.numEqu,%s, p.numComp)];\n"%(A_name, di, dj, DIM)
+                      CODE2+="const double %s = A_p[INDEX4(k,%s,m,%s p.numEqu,%s, p.numComp)];\n"%(A_name, di, dj, DIM)
              else:
-                      CODE2+="const register double %s = A_p[INDEX2(%s,%s,%s)];\n"%(A_name, di, dj, DIM)
+                      CODE2+="const double %s = A_p[INDEX2(%s,%s,%s)];\n"%(A_name, di, dj, DIM)
              for q in range(len(Q)):
                    for i in range(len(S)):
                       for j in range(len(S)):
@@ -555,9 +555,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_B.append(A)
                   if system:
-                      CODE2+="const register double %s = B_p[INDEX4(k,%s,m,%s, p.numEqu,%s,p.numComp)];\n"%(A_name, di,  q, DIM)
+                      CODE2+="const double %s = B_p[INDEX4(k,%s,m,%s, p.numEqu,%s,p.numComp)];\n"%(A_name, di,  q, DIM)
                   else:
-                      CODE2+="const register double %s = B_p[INDEX2(%s,%s,%s)];\n"%(A_name, di, q, DIM)
+                      CODE2+="const double %s = B_p[INDEX2(%s,%s,%s)];\n"%(A_name, di, q, DIM)
                    for i in range(len(S)):
                       for j in range(len(S)):
                           EM[(i,j)] = EM[(i,j)] + (A * W[q] * diff(S[i],x[di]) * S[j]).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
@@ -580,9 +580,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_B.append(A)
              if system:
-                      CODE2+="const register double %s = B_p[INDEX3(k,%s,m, p.numEqu,%s)];\n"%(A_name, di,  DIM)
+                      CODE2+="const double %s = B_p[INDEX3(k,%s,m, p.numEqu,%s)];\n"%(A_name, di,  DIM)
              else:
-                      CODE2+="const register double %s = B_p[%s];\n"%(A_name, di)
+                      CODE2+="const double %s = B_p[%s];\n"%(A_name, di)
              for q in range(len(Q)):
                    for i in range(len(S)):
                       for j in range(len(S)):
@@ -617,9 +617,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_C.append(A)
                   if system:
-                      CODE2+="const register double %s = C_p[INDEX4(k,m,%s, %s, p.numEqu,p.numComp,%s)];\n"%(A_name, dj,  q, DIM)
+                      CODE2+="const double %s = C_p[INDEX4(k,m,%s, %s, p.numEqu,p.numComp,%s)];\n"%(A_name, dj,  q, DIM)
                   else:
-                      CODE2+="const register double %s = C_p[INDEX2(%s,%s,%s)];\n"%(A_name, dj, q, DIM)
+                      CODE2+="const double %s = C_p[INDEX2(%s,%s,%s)];\n"%(A_name, dj, q, DIM)
                    for i in range(len(S)):
                       for j in range(len(S)):
                           EM[(i,j)] = EM[(i,j)] + (A * W[q] * diff(S[j],x[dj]) * S[i]).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
@@ -642,9 +642,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_C.append(A)
              if system:
-                      CODE2+="const register double %s = C_p[INDEX3(k,m,%s, p.numEqu,p.numComp)];\n"%(A_name, dj)
+                      CODE2+="const double %s = C_p[INDEX3(k,m,%s, p.numEqu,p.numComp)];\n"%(A_name, dj)
              else:
-                      CODE2+="const register double %s = C_p[%s];\n"%(A_name, dj)
+                      CODE2+="const double %s = C_p[%s];\n"%(A_name, dj)
              for q in range(len(Q)):
                    for i in range(len(S)):
                       for j in range(len(S)):
@@ -678,9 +678,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_D.append(A)
                   if system:
-                      CODE2+="const register double %s = D_p[INDEX3(k,m,%s, p.numEqu,p.numComp)];\n"%(A_name, q)
+                      CODE2+="const double %s = D_p[INDEX3(k,m,%s, p.numEqu,p.numComp)];\n"%(A_name, q)
                   else:
-                      CODE2+="const register double %s = D_p[%s];\n"%(A_name, q)
+                      CODE2+="const double %s = D_p[%s];\n"%(A_name, q)
                    for i in range(len(S)):
                       for j in range(len(S)):
                           EM[(i,j)] = EM[(i,j)] + (A * W[q] * S[j] * S[i]).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
@@ -703,9 +703,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_D.append(A)
              if system:
-                      CODE2+="const register double %s = D_p[INDEX2(k,m, p.numEqu)];\n"%(A_name,)
+                      CODE2+="const double %s = D_p[INDEX2(k,m, p.numEqu)];\n"%(A_name,)
              else:
-                      CODE2+="const register double %s = D_p[0];\n"%(A_name,)
+                      CODE2+="const double %s = D_p[0];\n"%(A_name,)
              for q in range(len(Q)):
                    for i in range(len(S)):
                       for j in range(len(S)):
@@ -739,9 +739,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_X.append(A)
                   if system:
-                      CODE2+="const register double %s = X_p[INDEX3(k,%s, %s, p.numEqu,%s)];\n"%(A_name, dj,  q, DIM)
+                      CODE2+="const double %s = X_p[INDEX3(k,%s, %s, p.numEqu,%s)];\n"%(A_name, dj,  q, DIM)
                   else:
-                      CODE2+="const register double %s = X_p[INDEX2(%s,%s,%s)];\n"%(A_name, dj,q,DIM)
+                      CODE2+="const double %s = X_p[INDEX2(%s,%s,%s)];\n"%(A_name, dj,q,DIM)
                    for j in range(len(S)):
                           EM[j] = EM[j] + (A * W[q] * diff(S[j],x[dj])).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
         CODE+=CODE2+generatePDECode(DATA_X, EM, GLOBAL_TMP,system)
@@ -761,9 +761,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_X.append(A)
              if system:
-                      CODE2+="const register double %s = X_p[INDEX2(k,%s, p.numEqu)];\n"%(A_name, dj)
+                      CODE2+="const double %s = X_p[INDEX2(k,%s, p.numEqu)];\n"%(A_name, dj)
              else:
-                      CODE2+="const register double %s = X_p[%s];\n"%(A_name, dj)
+                      CODE2+="const double %s = X_p[%s];\n"%(A_name, dj)
              for q in range(len(Q)):
                    for j in range(len(S)):
                           EM[j] = EM[j] + (A * W[q] * diff(S[j],x[dj])).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
@@ -795,9 +795,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
                   A=Symbol(A_name)
                   DATA_Y.append(A)
                   if system:
-                      CODE2+="const register double %s = Y_p[INDEX3(k,%s, p.numEqu)];\n"%(A_name, q)
+                      CODE2+="const double %s = Y_p[INDEX3(k,%s, p.numEqu)];\n"%(A_name, q)
                   else:
-                      CODE2+="const register double %s = Y_p[%s];\n"%(A_name, q)
+                      CODE2+="const double %s = Y_p[%s];\n"%(A_name, q)
                    for i in range(len(S)):
                           EM[i] = EM[i] + (A * W[q] * S[i]).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
         CODE+=CODE2+generatePDECode(DATA_Y, EM, GLOBAL_TMP,system)
@@ -817,9 +817,9 @@ def makePDE(S, x, Q, W, DIM=2, system=False):
              A=Symbol(A_name)
              DATA_Y.append(A)
              if system:
-                      CODE2+="const register double %s = Y_p[k];\n"%(A_name,)
+                      CODE2+="const double %s = Y_p[k];\n"%(A_name,)
              else:
-                      CODE2+="const register double %s = Y_p[0];\n"%(A_name,)
+                      CODE2+="const double %s = Y_p[0];\n"%(A_name,)
              for q in range(len(Q)):
                    for i in range(len(S)):
                           EM[i] = EM[i] + (A * W[q] * S[i]).subs( [ (x[jj], Q[q][jj]) for jj in range(DIM) ] )
diff --git a/ripley/src/ripleycpp.cpp b/ripley/src/ripleycpp.cpp
index d44744c..0a2b7de 100644
--- a/ripley/src/ripleycpp.cpp
+++ b/ripley/src/ripleycpp.cpp
@@ -14,15 +14,14 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <ripley/AbstractAssembler.h>
 #include <ripley/Brick.h>
 #include <ripley/MultiBrick.h>
-#include <ripley/Rectangle.h>
 #include <ripley/MultiRectangle.h>
-#include <esysUtils/esysExceptionTranslator.h>
+#include <ripley/Rectangle.h>
+
+#include <escript/ExceptionTranslators.h>
+#include <escript/SubWorld.h>
 
 #include <boost/python.hpp> 
 #include <boost/python/module.hpp>
@@ -30,8 +29,6 @@
 #include <boost/python/detail/defaults_gen.hpp>
 #include <boost/version.hpp>
 
-#include "escript/SubWorld.h"
-
 using namespace boost::python;
 
 namespace ripley {
@@ -87,12 +84,12 @@ escript::Data readBinaryGrid(std::string filename, escript::FunctionSpace fs,
     return res;
 }
 
-#ifdef USE_BOOSTIO
 escript::Data readBinaryGridFromZipped(std::string filename, escript::FunctionSpace fs,
         const object& pyShape, double fill, int byteOrder, int dataType,
         const object& pyFirst, const object& pyNum, const object& pyMultiplier,
         const object& pyReverse)
 {
+#ifdef ESYS_HAVE_BOOST_IO
     int dim=fs.getDim();
     ReaderParameters params;
 
@@ -111,8 +108,10 @@ escript::Data readBinaryGridFromZipped(std::string filename, escript::FunctionSp
     escript::Data res(fill, shape, fs, true);
     dom->readBinaryGridFromZipped(res, filename, params);
     return res;
-}
+#else
+    throw RipleyException("Ripley was not compiled with zip support!");
 #endif
+}
 
 escript::Data readNcGrid(std::string filename, std::string varname,
         escript::FunctionSpace fs, const object& pyShape, double fill,
@@ -491,7 +490,9 @@ BOOST_PYTHON_MODULE(ripleycpp)
     docstring_options docopt(true, true, false);
 #endif
 
-    register_exception_translator<ripley::RipleyException>(&(esysUtils::RuntimeErrorTranslator));
+    // register escript's default translators
+    REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS;
+    register_exception_translator<ripley::RipleyException>(&escript::RuntimeErrorTranslator);
 
     scope().attr("__doc__") = "To use this module, please import esys.ripley";
     scope().attr("BYTEORDER_NATIVE") = (int)ripley::BYTEORDER_NATIVE;
@@ -501,6 +502,15 @@ BOOST_PYTHON_MODULE(ripleycpp)
     scope().attr("DATATYPE_FLOAT32") = (int)ripley::DATATYPE_FLOAT32;
     scope().attr("DATATYPE_FLOAT64") = (int)ripley::DATATYPE_FLOAT64;
 
+    scope().attr("DECOMP_ADD_ELEMENTS") = (int)ripley::DECOMP_ADD_ELEMENTS;
+    scope().attr("DECOMP_EXPAND") = (int)ripley::DECOMP_EXPAND;
+    scope().attr("DECOMP_STRICT") = (int)ripley::DECOMP_STRICT;
+
+    def("setDecompositionPolicy", ripley::RipleyDomain::setDecompositionPolicy,
+        args("value"),
+        "Sets the automatic domain decomposition policy for new domains.\n\n"
+        ":param value: policy\n:type value: ``int``");
+
     def("Brick", ripley::_brick, (arg("n0"),arg("n1"),arg("n2"),arg("l0")=1.0,
         arg("l1")=1.0,arg("l2")=1.0,arg("d0")=-1,arg("d1")=-1,arg("d2")=-1,
         arg("diracPoints")=list(),arg("diracTags")=list(),
@@ -563,13 +573,11 @@ BOOST_PYTHON_MODULE(ripleycpp)
                 arg("byteOrder"), arg("dataType"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
             "Reads a binary Grid");
-#ifdef USE_BOOSTIO
     def("_readBinaryGridFromZipped", &ripley::readBinaryGridFromZipped, (arg("filename"),
                 arg("functionspace"), arg("shape"), arg("fill")=0.,
                 arg("byteOrder"), arg("dataType"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
             "Reads a binary Grid");
-#endif
     def("_readNcGrid", &ripley::readNcGrid, (arg("filename"), arg("varname"),
                 arg("functionspace"), arg("shape"), arg("fill"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
@@ -673,8 +681,10 @@ BOOST_PYTHON_MODULE(ripleycpp)
         .def("getMPIRank",&ripley::RipleyDomain::getMPIRank,":return: the rank of this process\n:rtype: ``int``")
         .def("MPIBarrier",&ripley::RipleyDomain::MPIBarrier,"Wait until all processes have reached this point")
         .def("onMasterProcessor",&ripley::RipleyDomain::onMasterProcessor,":return: True if this code is executing on the master process\n:rtype: `bool`");
-    /* These two class exports are necessary to ensure that the extra methods added by ripley make it to python.
-     * This change became necessary when the Brick and Rectangle constructors turned into factories instead of classes */
+    // These two class exports are necessary to ensure that the extra methods
+    // added by ripley make it to python. This change became necessary when
+    // the Brick and Rectangle constructors turned into factories instead of
+    // classes
     class_<ripley::Brick, bases<ripley::RipleyDomain> >("RipleyBrick", "", no_init);
     class_<ripley::Rectangle, bases<ripley::RipleyDomain> >("RipleyRectangle", "", no_init);
     class_<ripley::MultiRectangle, bases<ripley::RipleyDomain> >("RipleyMultiRectangle", "", no_init);
diff --git a/ripley/src/system_dep.h b/ripley/src/system_dep.h
index db010a3..b3ac35e 100644
--- a/ripley/src/system_dep.h
+++ b/ripley/src/system_dep.h
@@ -17,8 +17,6 @@
 #ifndef __RIPLEY_SYSTEM_DEP_H__
 #define __RIPLEY_SYSTEM_DEP_H__
 
-#include <cmath>
-
 #define RIPLEY_DLL_API
 
 #ifdef _WIN32
@@ -32,6 +30,7 @@
 #   endif
 #endif
 
+#include <escript/DataTypes.h>
 
 // byte swapping / endianness:
 
diff --git a/ripley/test/SConscript b/ripley/test/SConscript
index e897082..7430f65 100644
--- a/ripley/test/SConscript
+++ b/ripley/test/SConscript
@@ -14,7 +14,6 @@
 #
 ##############################################################################
 
-
 Import('*')
 local_env = env.Clone()
 
@@ -24,21 +23,16 @@ if local_env['cppunit'] and local_env['cuda']:
     testname='ripley_UnitTest'
 
     # build the executable
-    local_env.Prepend(LIBS=['ripley', 'pasowrap', 'paso', 'escript', 'esysUtils']+env['cppunit_libs'])
-
-    if local_env['silo']:
-        local_env.Append(CPPDEFINES = ['USE_SILO'])
-        local_env.AppendUnique(LIBS = env['silo_libs'])
+    local_env.PrependUnique(LIBS=env['ripley_libs']+env['cppunit_libs'])
 
     if env['openmp']:
         local_env.Append(CPPDEFINES = ['THRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_OMP'])
 
     local_env['NVCCFLAGS'] += ' -w'
     local_env['SHNVCCFLAGS']  += ' -w'
-    local_env.Append(CPPDEFINES = ['USE_CUDA', 'THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA'])
+    local_env.Append(CPPDEFINES = ['THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA'])
     #TODO: This needs a config option:
     local_env.Append(CPPDEFINES = ['CUSP_USE_TEXTURE_MEMORY'])
-    local_env.Append(LIBS = ['cudart'])
 
     program = local_env.Program(testname, sources)
 
@@ -50,10 +44,10 @@ if local_env['cppunit'] and local_env['cuda']:
     Alias("run_tests", testname+'.passed')
 
     # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/ripley/test", ('./'+testname,))
+    from grouptest import GroupTest
+    tgroup=GroupTest("ripleycpp", "$BINRUNNER ", (), "", "$BUILD_DIR/ripley/test", ('./'+testname,))
     TestGroups.append(tgroup)
 
 # configure python unit tests
-local_env.SConscript(dirs = ['#/ripley/test/python'], variant_dir='python', duplicate=0, exports=['py_wrapper_lib'])
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/ripley/test/SystemMatrixTestCase.cpp b/ripley/test/SystemMatrixTestCase.cpp
index a327538..e1ea0c3 100644
--- a/ripley/test/SystemMatrixTestCase.cpp
+++ b/ripley/test/SystemMatrixTestCase.cpp
@@ -15,9 +15,12 @@
 *****************************************************************************/
 
 #include "SystemMatrixTestCase.h"
-#include <escript/FunctionSpaceFactory.h>
+
 #include <ripley/Rectangle.h>
 #include <ripley/RipleySystemMatrix.h>
+
+#include <escript/FunctionSpaceFactory.h>
+
 #include <cppunit/TestCaller.h>
 
 using namespace CppUnit;
@@ -173,7 +176,7 @@ TestSuite* SystemMatrixTestCase::suite()
 
 void SystemMatrixTestCase::setUp()
 {
-    mpiInfo = esysUtils::makeInfo(MPI_COMM_WORLD);
+    mpiInfo = escript::makeInfo(MPI_COMM_WORLD);
     domain.reset(new ripley::Rectangle(4, 3, 0., 0., 1., 1.));
 }
 
diff --git a/ripley/test/SystemMatrixTestCase.h b/ripley/test/SystemMatrixTestCase.h
index ddd119b..0cadcb0 100644
--- a/ripley/test/SystemMatrixTestCase.h
+++ b/ripley/test/SystemMatrixTestCase.h
@@ -44,7 +44,7 @@ private:
     escript::ASM_ptr createMatrix(int blocksize, bool symmetric);
     escript::Data createInputVector(int blocksize);
 
-    esysUtils::JMPI mpiInfo;
+    escript::JMPI mpiInfo;
     escript::Domain_ptr domain;
 };
 
diff --git a/ripley/test/python/SConscript b/ripley/test/python/SConscript
index 796548e..4755a25 100644
--- a/ripley/test/python/SConscript
+++ b/ripley/test/python/SConscript
@@ -14,37 +14,34 @@
 #
 ##############################################################################
 
-import os
+from os.path import splitext
 Import('*')
 
 local_env = env.Clone()
 
-# 
-#  files defining test runs (passing in a release)
-# 
+# files defining test runs (passing in a release)
 testruns = Glob('run_*.py', strings=True)
 
-#Add Unit Test to target alias
+# add unit test to target alias
 local_env.PrependENVPath('PYTHONPATH', Dir('#/ripley/test/python/').abspath)
 local_env.PrependENVPath('PYTHONPATH', Dir('.').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/escriptcore/test/python').abspath)
-local_env['ENV']['RIPLEY_TEST_DATA']=Dir('.').srcnode().abspath
-local_env['ENV']['RIPLEY_WORKDIR']=Dir('.').abspath
+local_env['ENV']['RIPLEY_TEST_DATA'] = Dir('.').srcnode().abspath
+local_env['ENV']['RIPLEY_WORKDIR'] = Dir('.').abspath
 # needed for a test from the util base class in escript
-local_env['ENV']['ESCRIPT_WORKDIR']=Dir('.').abspath
-env.Alias('local_py_tests',[os.path.splitext(x)[0]+'.passed' for x in testruns])
-env.Alias('py_tests', [os.path.splitext(x)[0]+'.passed' for x in testruns])
+local_env['ENV']['ESCRIPT_WORKDIR'] = Dir('.').abspath
+env.Alias('local_py_tests',[splitext(x)[0]+'.passed' for x in testruns])
+env.Alias('py_tests', [splitext(x)[0]+'.passed' for x in testruns])
 
 # run all tests
 program = local_env.RunPyUnitTest(testruns)
-Depends(program, py_wrapper_lib)
-Depends(program, 'build_py_tests')
+Requires(program, ['install_escript', 'build_py_tests'])
 if env['usempi']:
-    Depends(program, env['prefix']+"/lib/pythonMPI")
+    Requires(program, ['install_pythonMPI'])
 
 # Add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("RIPLEY_TEST_DATA","$BATCH_ROOT/ripley/test/python"),('RIPLEY_WORKDIR','$BUILD_DIR/ripley/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/ripley/test/python","$BATCH_ROOT/ripley/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("ripley", "$PYTHONRUNNER ", (("RIPLEY_TEST_DATA","$BATCH_ROOT/ripley/test/python"),('RIPLEY_WORKDIR','$BUILD_DIR/ripley/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/ripley/test/python", "$BATCH_ROOT/ripley/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/ripley/test/python")
 TestGroups.append(tgroup)
 
diff --git a/ripley/test/python/run_customAssemblersOnRipley.py b/ripley/test/python/run_customAssemblersOnRipley.py
index f7635d1..0137049 100644
--- a/ripley/test/python/run_customAssemblersOnRipley.py
+++ b/ripley/test/python/run_customAssemblersOnRipley.py
@@ -219,10 +219,10 @@ class RipleyWaveAssemblerTestBase(unittest.TestCase):
         for i in [self.V_p, self.V_s, self.c11, self.c12, self.c13, self.c23,
                 self.c33, self.c44, self.c66]:
             i.expand()
-        with self.assertRaises(RuntimeError) as e:
+        with self.assertRaises(ValueError) as e:
             self.run_HTI_assembly()
         self.assertTrue("mismatching function spaces" in str(e.exception))
-        with self.assertRaises(RuntimeError) as e:
+        with self.assertRaises(ValueError) as e:
             self.run_VTI_assembly()
         self.assertTrue("mismatching function spaces" in str(e.exception))
 
@@ -236,10 +236,10 @@ class RipleyWaveAssemblerTestBase(unittest.TestCase):
         self.c33 = Scalar(33., ReducedFunction(self.domain))
         self.c44 = Scalar(44., ReducedFunction(self.domain))
         self.c66 = Scalar(66., ReducedFunction(self.domain))
-        with self.assertRaises(RuntimeError) as e:
+        with self.assertRaises(ValueError) as e:
             self.run_HTI_assembly()
         self.assertTrue("mismatching function spaces" in str(e.exception))
-        with self.assertRaises(RuntimeError) as e:
+        with self.assertRaises(ValueError) as e:
             self.run_VTI_assembly()
         self.assertTrue("mismatching function spaces" in str(e.exception))
 
diff --git a/ripley/test/python/run_escriptOnMultiResolution.py b/ripley/test/python/run_escriptOnMultiResolution.py
index e3b492e..90b33c5 100644
--- a/ripley/test/python/run_escriptOnMultiResolution.py
+++ b/ripley/test/python/run_escriptOnMultiResolution.py
@@ -30,7 +30,7 @@ from esys.escriptcore.testing import *
 from esys.escript import *
 from esys.ripley import MultiRectangle, MultiBrick, ripleycpp, MultiResolutionDomain
 from test_objects import Test_Dump, Test_SetDataPointValue, Test_saveCSV, Test_TableInterpolation
-from test_objects import Test_Domain, Test_GlobalMinMax, Test_Lazy
+from test_objects import Test_Domain, Test_Lazy
 
 from test_shared import Test_Shared
 
@@ -151,8 +151,8 @@ class Test_randomOnMultiRipley(unittest.TestCase):
         fs=ContinuousFunction(Rectangle(n0=5*(int(sqrt(mpiSize)+1)),n1=5*(int(sqrt(mpiSize)+1))))
         RandomData((), fs, 2,("gaussian",1,0.5))
         RandomData((), fs, 0,("gaussian",2,0.76))
-        self.assertRaises(RuntimeError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
-        self.assertRaises(RuntimeError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
+        self.assertRaises(NotImplementedError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
+        self.assertRaises(ValueError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
         RandomData((2,3),fs)
 
     @unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
@@ -161,8 +161,8 @@ class Test_randomOnMultiRipley(unittest.TestCase):
         fs=ContinuousFunction(Brick(n0=5*mpiSize, n1=5*mpiSize, n2=5*mpiSize))
         RandomData((), fs, 2,("gaussian",1,0.5))
         RandomData((), fs, 0,("gaussian",2,0.76))
-        self.assertRaises(RuntimeError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
-        self.assertRaises(RuntimeError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
+        self.assertRaises(NotImplementedError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
+        self.assertRaises(ValueError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
         RandomData((2,3),fs)
 
 class Test_multiResolution(unittest.TestCase):
diff --git a/ripley/test/python/run_escriptOnRipley.py b/ripley/test/python/run_escriptOnRipley.py
index 76d54b7..b9af4a9 100644
--- a/ripley/test/python/run_escriptOnRipley.py
+++ b/ripley/test/python/run_escriptOnRipley.py
@@ -30,7 +30,7 @@ from esys.escriptcore.testing import *
 from esys.escript import *
 from esys.ripley import Rectangle, Brick, ripleycpp
 from test_objects import Test_Dump, Test_SetDataPointValue, Test_saveCSV, Test_TableInterpolation
-from test_objects import Test_Domain, Test_GlobalMinMax, Test_Lazy
+from test_objects import Test_Domain, Test_Lazy
 
 from test_shared import Test_Shared
 
@@ -100,7 +100,7 @@ class Test_DomainOnRipley(Test_Domain):
         if getMPISizeWorld() == 1: self.assertTrue(len(tags)==len(ref_tags), "tags list has wrong length.")
         for i in tags: self.assertTrue(i in ref_tags,"tag %s is missing."%i)
 
-class Test_DataOpsOnRipley(Test_Dump, Test_SetDataPointValue, Test_GlobalMinMax, Test_Lazy):
+class Test_DataOpsOnRipley(Test_Dump, Test_SetDataPointValue, Test_Lazy):
     def setUp(self):
         self.domain=Rectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
         self.domain_with_different_number_of_samples=Rectangle(n0=7*NE*NX-1, n1=3*NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
@@ -179,8 +179,8 @@ class Test_randomOnRipley(unittest.TestCase):
         fs=ContinuousFunction(Rectangle(10*(int(sqrt(mpiSize)+1)),10*(int(sqrt(mpiSize)+1))))
         RandomData((), fs, 2,("gaussian",1,0.5))
         RandomData((), fs, 0,("gaussian",2,0.76))
-        self.assertRaises(RuntimeError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
-        self.assertRaises(RuntimeError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
+        self.assertRaises(NotImplementedError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
+        self.assertRaises(ValueError, RandomData, (), fs, 0, ("gaussian",11,0.1)) #radius too large
         RandomData((2,3),fs)
 
     def test_FillBrick(self):
@@ -188,8 +188,8 @@ class Test_randomOnRipley(unittest.TestCase):
         fs=ContinuousFunction(Brick(10*mpiSize,10*mpiSize, 10*mpiSize))
         RandomData((), fs, 2,("gaussian",1,0.5))
         RandomData((), fs, 0,("gaussian",2,0.76))
-        self.assertRaises(RuntimeError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
-        self.assertRaises(RuntimeError, RandomData, (), fs, 0, ("gaussian",20,0.1)) #radius too large
+        self.assertRaises(NotImplementedError, RandomData, (2,2), fs, 0, ("gaussian",2,0.76)) #data not scalar
+        self.assertRaises(ValueError, RandomData, (), fs, 0, ("gaussian",20,0.1)) #radius too large
         RandomData((2,3),fs)
 
 
diff --git a/ripley/test/python/run_linearPDEsOnRipley.py b/ripley/test/python/run_linearPDEsOnRipley.py
index d2e9dd3..fe90e8b 100644
--- a/ripley/test/python/run_linearPDEsOnRipley.py
+++ b/ripley/test/python/run_linearPDEsOnRipley.py
@@ -1,103 +1,103 @@
-
-########################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# Earth Systems Science Computational Center (ESSCC)
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-########################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-Earth Systems Science Computational Center (ESSCC)
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for the linearPDE and pdetools on ripley
-
-:remark:
-
-:var __author__: name of author
-:var __licence__: licence agreement
-:var __url__: url entry point on documentation
-:var __version__: version
-:var __date__: date of the version
-"""
-
-__author__="Lutz Gross, l.gross at uq.edu.au"
-
-import os
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from test_linearPDEs import Test_Poisson, Test_LinearPDE, Test_TransportPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping
-from test_assemblage import Test_assemblage_2Do1, Test_assemblage_3Do1
-from test_pdetools import Test_pdetools, Test_pdetools_noLumping
-from esys.escript import *
-from esys.ripley import Rectangle, Brick
-
-
-try:
-     RIPLEY_TEST_DATA=os.environ['RIPLEY_TEST_DATA']
-except KeyError:
-     RIPLEY_TEST_DATA='.'
-
-NE=10 # number of element in each spatial direction (must be even)
-mpiSize=getMPISizeWorld()
-
-class Test_LinearPDEOnRipleyRect(Test_LinearPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping, Test_pdetools, Test_assemblage_2Do1, Test_TransportPDE):
-    RES_TOL=1.e-7
-    ABS_TOL=1.e-8
-    def setUp(self):
-        for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
-            NX=x
-            NY=mpiSize//x
-            if NX*NY == mpiSize:
-                break
-        self.domain=Rectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
-        self.order = 1
-    def tearDown(self):
-        del self.domain
-
-class Test_LinearPDEOnRipleyBrick(Test_LinearPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping, Test_pdetools, Test_assemblage_3Do1, Test_TransportPDE):
-    RES_TOL=1.e-7
-    ABS_TOL=1.e-8
-    def setUp(self):
-        for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
-            NX=x[0]
-            NY=x[1]
-            NZ=mpiSize//(x[0]*x[1])
-            if NX*NY*NZ == mpiSize:
-                break
-
-        self.domain = Brick(n0=NE*NX-1, n1=NE*NY-1, n2=NE*NZ-1, l0=1., l1=1., l2=1., d0=NX, d1=NY, d2=NZ)
-        self.order = 1
-
-    def tearDown(self):
-        del self.domain
-
-class Test_PoissonOnRipley(Test_Poisson):
-    RES_TOL=1.e-7
-    ABS_TOL=1.e-8
-    def setUp(self):
-        for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
-            NX=x
-            NY=mpiSize//x
-            if NX*NY == mpiSize:
-                break
-        self.domain=Rectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
-    def tearDown(self):
-        del self.domain
-
-
-if __name__ == '__main__':
-    run_tests(__name__, exit_on_failure=True)
-
+
+########################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# Earth Systems Science Computational Center (ESSCC)
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+########################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+Earth Systems Science Computational Center (ESSCC)
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for the linearPDE and pdetools on ripley
+
+:remark:
+
+:var __author__: name of author
+:var __licence__: licence agreement
+:var __url__: url entry point on documentation
+:var __version__: version
+:var __date__: date of the version
+"""
+
+__author__="Lutz Gross, l.gross at uq.edu.au"
+
+import os
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+from test_linearPDEs import Test_Poisson, Test_LinearPDE, Test_TransportPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping
+from test_assemblage import Test_assemblage_2Do1, Test_assemblage_3Do1
+from test_pdetools import Test_pdetools, Test_pdetools_noLumping
+from esys.escript import *
+from esys.ripley import Rectangle, Brick
+
+
+try:
+     RIPLEY_TEST_DATA=os.environ['RIPLEY_TEST_DATA']
+except KeyError:
+     RIPLEY_TEST_DATA='.'
+
+NE=8 # number of element in each spatial direction (must be even)
+mpiSize=getMPISizeWorld()
+
+class Test_LinearPDEOnRipleyRect(Test_LinearPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping, Test_pdetools, Test_assemblage_2Do1, Test_TransportPDE):
+    RES_TOL=1.e-7
+    ABS_TOL=1.e-8
+    def setUp(self):
+        for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+            NX=x
+            NY=mpiSize//x
+            if NX*NY == mpiSize:
+                break
+        self.domain=Rectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
+        self.order = 1
+    def tearDown(self):
+        del self.domain
+
+class Test_LinearPDEOnRipleyBrick(Test_LinearPDE, Test_LameEquation, Test_Helmholtz, Test_LinearPDE_noLumping, Test_pdetools, Test_assemblage_3Do1, Test_TransportPDE):
+    RES_TOL=1.e-7
+    ABS_TOL=1.e-8
+    def setUp(self):
+        for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
+            NX=x[0]
+            NY=x[1]
+            NZ=mpiSize//(x[0]*x[1])
+            if NX*NY*NZ == mpiSize:
+                break
+
+        self.domain = Brick(n0=NE*NX-1, n1=NE*NY-1, n2=NE*NZ-1, l0=1., l1=1., l2=1., d0=NX, d1=NY, d2=NZ)
+        self.order = 1
+
+    def tearDown(self):
+        del self.domain
+
+class Test_PoissonOnRipley(Test_Poisson):
+    RES_TOL=1.e-7
+    ABS_TOL=1.e-8
+    def setUp(self):
+        for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+            NX=x
+            NY=mpiSize//x
+            if NX*NY == mpiSize:
+                break
+        self.domain=Rectangle(n0=NE*NX-1, n1=NE*NY-1, l0=1., l1=1., d0=NX, d1=NY)
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+    run_tests(__name__, exit_on_failure=True)
+
diff --git a/ripley/test/python/run_pasoSolversOnMultiRes.py b/ripley/test/python/run_pasoSolversOnMultiRes.py
new file mode 100644
index 0000000..1bf890c
--- /dev/null
+++ b/ripley/test/python/run_pasoSolversOnMultiRes.py
@@ -0,0 +1,156 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on ripley multiresolution domains
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import getMPISizeWorld, hasFeature, sqrt
+from esys.ripley import MultiResolutionDomain
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_PASO = hasFeature('paso')
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+mpiSize=getMPISizeWorld()
+for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+    NX=x
+    NY=mpiSize//x
+    if NX*NY == mpiSize:
+        break
+
+for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
+    NXb=x[0]
+    NYb=x[1]
+    NZb=mpiSize//(x[0]*x[1])
+    if NXb*NYb*NZb == mpiSize:
+        break
+
+def Rectangle(**kwargs):
+    m = MultiResolutionDomain(2, **kwargs)
+    return m.getLevel(1)
+
+def Brick(**kwargs):
+    m = MultiResolutionDomain(3, **kwargs)
+    return m.getLevel(1)
+
+ at unittest.skipIf(not HAVE_PASO, "PASO not available")
+class SimpleSolveOnPaso(SimpleSolveTestCase):
+    pass
+
+class Test_SimpleSolveMultires2D_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultires3D_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveMultires2D_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultires3D_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveMultires2D_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultires3D_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveMultires2D_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultires3D_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+   run_tests(__name__, exit_on_failure=True)
diff --git a/ripley/test/python/run_pasoSolversOnRipley.py b/ripley/test/python/run_pasoSolversOnRipley.py
new file mode 100644
index 0000000..79318ef
--- /dev/null
+++ b/ripley/test/python/run_pasoSolversOnRipley.py
@@ -0,0 +1,145 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Apache License, version 2.0
+http://www.apache.org/licenses/LICENSE-2.0"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on ripley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import getMPISizeWorld, hasFeature, sqrt
+from esys.ripley import Rectangle, Brick
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_PASO = hasFeature('paso')
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+mpiSize=getMPISizeWorld()
+for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+    NX=x
+    NY=mpiSize//x
+    if NX*NY == mpiSize:
+        break
+
+for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
+    NXb=x[0]
+    NYb=x[1]
+    NZb=mpiSize//(x[0]*x[1])
+    if NXb*NYb*NZb == mpiSize:
+        break
+
+ at unittest.skipIf(not HAVE_PASO, "PASO not available")
+class SimpleSolveOnPaso(SimpleSolveTestCase):
+    pass
+
+class Test_SimpleSolveRipley2D_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Paso_BICGSTAB_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley2D_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Paso_PCG_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley2D_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Paso_MINRES_Jacobi(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley2D_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Paso_TFQMR_RILU(SimpleSolveOnPaso):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.PASO
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+   run_tests(__name__, exit_on_failure=True)
+
diff --git a/ripley/test/python/run_readWriteOnMultiRes.py b/ripley/test/python/run_readWriteOnMultiRes.py
index be9d3c9..684e907 100644
--- a/ripley/test/python/run_readWriteOnMultiRes.py
+++ b/ripley/test/python/run_readWriteOnMultiRes.py
@@ -35,6 +35,7 @@ try:
 except KeyError:
      RIPLEY_WORKDIR='/tmp'
 
+HAVE_UNZIP = hasFeature('unzip')
 
 mpiSize = getMPISizeWorld()
 mpiRank = getMPIRankWorld()
@@ -92,7 +93,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
             data, ref = self.generateUniqueData(ftype)
             with self.assertRaises(RuntimeError):
                 result = self.writeThenRead(data, ftype, fcode)
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain)))
 
     def test_writeGrid3D(self):
@@ -102,7 +103,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
             data, ref = self.generateUniqueData(ftype)
             with self.assertRaises(RuntimeError):
                 result = self.writeThenRead(data, ftype, fcode)
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain)))
 
 class Test_writeBinaryGridRipley_LITTLE_FLOAT32(WriteBinaryGridTestBase):
@@ -241,7 +242,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                             ref = np.append(ref, extra, axis=1-d)
 
                 # step 4 - compare
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain)))
 
     @unittest.skipIf(mpiSize > 1, "3D Multiresolution domains don't support multiprocess yet")
@@ -287,7 +288,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                             ref = np.append(ref, extra, axis=2-d)
 
                 # step 4 - compare
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain)))
 
 
@@ -377,7 +378,7 @@ class Test_readBinaryGridZippedRipley(unittest.TestCase):
             return readBinaryGrid(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
 
-        if not hasattr(ripleycpp, "_readBinaryGridFromZipped"):
+        if not HAVE_UNZIP:
             raise unittest.SkipTest("unzip library not available (boost_iostreams)")
         return ripleycpp._readBinaryGridFromZipped(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
diff --git a/ripley/test/python/run_readWriteOnRipley.py b/ripley/test/python/run_readWriteOnRipley.py
index 7279f16..ddeddfa 100644
--- a/ripley/test/python/run_readWriteOnRipley.py
+++ b/ripley/test/python/run_readWriteOnRipley.py
@@ -35,6 +35,8 @@ try:
 except KeyError:
      RIPLEY_WORKDIR='/tmp'
 
+HAVE_UNZIP = hasFeature('unzip')
+
 #NE=4 # number elements, must be even
 #for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
 #    NX=x
@@ -96,7 +98,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
         for ftype,fcode in [(ReducedFunction,'RF'), (ContinuousFunction,'CF'), (Solution, 'Sol')]:
             data, ref = self.generateUniqueData(ftype)
             result = self.writeThenRead(data, ftype, fcode)
-            self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ftype(self.domain)))
 
     def test_writeGrid3D(self):
@@ -105,7 +107,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
         for ftype,fcode in [(ReducedFunction,'RF'), (ContinuousFunction,'CF'), (Solution, 'Sol')]:
             data, ref = self.generateUniqueData(ftype)
             result = self.writeThenRead(data, ftype, fcode)
-            self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ftype(self.domain)))
 
 class Test_writeBinaryGridRipley_LITTLE_FLOAT32(WriteBinaryGridTestBase):
@@ -243,7 +245,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                         ref = np.append(ref, extra, axis=1-d)
 
             # step 4 - compare
-            self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ftype(self.domain)))
 
     def test_readGrid3D(self):
@@ -287,7 +289,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                         ref = np.append(ref, extra, axis=2-d)
 
             # step 4 - compare
-            self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ftype(self.domain)))
 
 
@@ -377,7 +379,7 @@ class Test_readBinaryGridZippedRipley(unittest.TestCase):
             return readBinaryGrid(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
 
-        if not hasattr(ripleycpp, "_readBinaryGridFromZipped"):
+        if not HAVE_UNZIP:
             raise unittest.SkipTest("unzip library not available (boost_iostreams)")
         return ripleycpp._readBinaryGridFromZipped(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
diff --git a/ripley/test/python/run_simplesolveOnMultiRes.py b/ripley/test/python/run_simplesolveOnMultiRes.py
deleted file mode 100644
index 602af27..0000000
--- a/ripley/test/python/run_simplesolveOnMultiRes.py
+++ /dev/null
@@ -1,404 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for PDE solvers on ripley
-"""
-
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from esys.escript import *
-from esys.ripley import MultiResolutionDomain
-from esys.escript.linearPDEs import LinearPDE, SolverOptions
-import numpy
-
-SOLVER_VERBOSE=False 
-SOLVER_TOL=1.e-8
-REL_TOL=1.e-6
-
-FAC_DIAG=1.
-FAC_OFFDIAG=-0.4
-
-# number of elements in the spatial directions
-NE0=12
-NE1=12
-NE2=8
-mpiSize=getMPISizeWorld()
-for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
-    NX=x
-    NY=mpiSize//x
-    if NX*NY == mpiSize:
-        break
-
-for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
-    NXb=x[0]
-    NYb=x[1]
-    NZb=mpiSize//(x[0]*x[1])
-    if NXb*NYb*NZb == mpiSize:
-        break
-
-def Rectangle(**kwargs):
-    m = MultiResolutionDomain(2, **kwargs)
-    return m.getLevel(1)
-
-def Brick(**kwargs):
-    m = MultiResolutionDomain(3, **kwargs)
-    return m.getLevel(1)
-
-class Test_SimpleSolveRipley2D_Single_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        # Tell about how many MPI CPUs and OpenMP threads
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_Single_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_System_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
- at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
-class Test_SimpleSolveRipley3D_Single_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
- at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
-class Test_SimpleSolveRipley3D_System_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_Single_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_System_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
- at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
-class Test_SimpleSolveRipley3D_Single_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
- at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
-class Test_SimpleSolveRipley3D_System_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-
-if __name__ == '__main__':
-   run_tests(__name__, exit_on_failure=True)
diff --git a/ripley/test/python/run_simplesolveOnRipley.py b/ripley/test/python/run_simplesolveOnRipley.py
deleted file mode 100644
index 623f9cc..0000000
--- a/ripley/test/python/run_simplesolveOnRipley.py
+++ /dev/null
@@ -1,392 +0,0 @@
-
-##############################################################################
-#
-# Copyright (c) 2003-2016 by The University of Queensland
-# http://www.uq.edu.au
-#
-# Primary Business: Queensland, Australia
-# Licensed under the Apache License, version 2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-# Development 2012-2013 by School of Earth Sciences
-# Development from 2014 by Centre for Geoscience Computing (GeoComp)
-#
-##############################################################################
-
-from __future__ import print_function, division
-
-__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
-http://www.uq.edu.au
-Primary Business: Queensland, Australia"""
-__license__="""Licensed under the Apache License, version 2.0
-http://www.apache.org/licenses/LICENSE-2.0"""
-__url__="https://launchpad.net/escript-finley"
-
-"""
-Test suite for PDE solvers on ripley
-"""
-
-import esys.escriptcore.utestselect as unittest
-from esys.escriptcore.testing import *
-from esys.escript import *
-from esys.ripley import Rectangle,Brick
-from esys.escript.linearPDEs import LinearPDE, SolverOptions
-import numpy
-
-SOLVER_VERBOSE=False 
-SOLVER_TOL=1.e-8
-REL_TOL=1.e-6
-
-FAC_DIAG=1.
-FAC_OFFDIAG=-0.4
-
-# number of elements in the spatial directions
-NE0=12
-NE1=12
-NE2=8
-mpiSize=getMPISizeWorld()
-for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
-    NX=x
-    NY=mpiSize//x
-    if NX*NY == mpiSize:
-        break
-
-for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
-    NXb=x[0]
-    NYb=x[1]
-    NZb=mpiSize//(x[0]*x[1])
-    if NXb*NYb*NZb == mpiSize:
-        break
-
-class Test_SimpleSolveRipley2D_Single_Paso_BICGSTAB_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        # Tell about how many MPI CPUs and OpenMP threads
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.BICGSTAB)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_Single_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_System_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley3D_Single_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley3D_System_Paso_PCG_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.PCG)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_Single_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Scalar(0,Solution(domain))
-        u_ex=1.+2.*x[0]+3.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        # -------- test gradient --------------------------------
-        g=grad(u_ex)
-        self.assertTrue(Lsup(g_ex-g)<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(2),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley2D_System_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]
-        u_ex[1]=-1.+3.*x[0]+2.*x[1]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(2,2),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[1,0]=3.
-        g_ex[1,1]=2.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=2)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(2,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(2)
-        A[1,:,1,:]=kronecker(2)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((2,2))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley3D_Single_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,),Solution(domain))
-        g_ex[0]=2.
-        g_ex[1]=3.
-        g_ex[2]=4.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=1)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask)
-        pde.setValue(A=kronecker(3),y=inner(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-
-class Test_SimpleSolveRipley3D_System_Paso_MINRES_Jacobi(unittest.TestCase):
-     def test_solve(self):
-        domain=Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
-        x=Solution(domain).getX()
-        # --- set exact solution ----
-        u_ex=Vector(0,Solution(domain))
-        u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2]
-        u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2]
-        u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2]
-        # --- set exact gradient -----------
-        g_ex=Data(0.,(3,3),Solution(domain))
-        g_ex[0,0]=2.
-        g_ex[0,1]=3.
-        g_ex[0,2]=4.
-        g_ex[1,0]=4.
-        g_ex[1,1]=1.
-        g_ex[1,2]=-2.
-        g_ex[2,0]=8.
-        g_ex[2,1]=4.
-        g_ex[2,2]=5.
-        # -------- test gradient --------------------------------
-        self.assertTrue(Lsup(g_ex-grad(u_ex))<REL_TOL*Lsup(g_ex))
-        # -------- set-up PDE ----------------------------------- 
-        pde=LinearPDE(domain,numEquations=3)
-        mask=whereZero(x[0])
-        pde.setValue(r=u_ex,q=mask*numpy.ones(3,))
-        A=Tensor4(0,Function(domain))
-        A[0,:,0,:]=kronecker(3)
-        A[1,:,1,:]=kronecker(3)
-        A[2,:,2,:]=kronecker(3)
-        Y=Vector(0.,Function(domain))
-        Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG
-        Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG
-        Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG
-        pde.setValue(A=A,
-                     D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numpy.ones((3,3))*FAC_OFFDIAG,
-                     Y=Y,
-                     y=matrixmult(g_ex,domain.getNormal()))
-        # -------- get the solution ---------------------------
-        pde.getSolverOptions().setTolerance(SOLVER_TOL)
-        pde.getSolverOptions().setSolverMethod(SolverOptions.MINRES)
-        pde.getSolverOptions().setPreconditioner(SolverOptions.JACOBI)
-        pde.getSolverOptions().setPackage(SolverOptions.PASO)
-        pde.getSolverOptions().setVerbosity(SOLVER_VERBOSE)
-        u=pde.getSolution()
-        # -------- test the solution ---------------------------
-        error=Lsup(u-u_ex)
-        self.assertTrue(error<REL_TOL*Lsup(u_ex), "solution error %s is too big."%error)
-        
-
-if __name__ == '__main__':
-   run_tests(__name__, exit_on_failure=True)
diff --git a/ripley/test/python/run_splitworldOnRipley.py b/ripley/test/python/run_splitworldOnRipley.py
index 07e7f8a..1c28b51 100644
--- a/ripley/test/python/run_splitworldOnRipley.py
+++ b/ripley/test/python/run_splitworldOnRipley.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -16,7 +16,7 @@
 
 from __future__ import print_function, division
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
diff --git a/ripley/test/python/run_trilinosSolversOnMultiRes.py b/ripley/test/python/run_trilinosSolversOnMultiRes.py
new file mode 100644
index 0000000..f656d3b
--- /dev/null
+++ b/ripley/test/python/run_trilinosSolversOnMultiRes.py
@@ -0,0 +1,252 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Open Software License version 3.0
+http://www.opensource.org/licenses/osl-3.0.php"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on ripley multiresolution domains
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import getMPISizeWorld, hasFeature, sqrt
+from esys.ripley import MultiResolutionDomain
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_TRILINOS = hasFeature('trilinos')
+skip_muelu_long = False #hasFeature("longindex")
+
+# number of elements in the spatial directions
+NE0=10
+NE1=10
+NE2=9
+mpiSize=getMPISizeWorld()
+for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+    NX=x
+    NY=mpiSize//x
+    if NX*NY == mpiSize:
+        break
+
+for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
+    NXb=x[0]
+    NYb=x[1]
+    NZb=mpiSize//(x[0]*x[1])
+    if NXb*NYb*NZb == mpiSize:
+        break
+
+def Rectangle(**kwargs):
+    m = MultiResolutionDomain(2, **kwargs)
+    return m.getLevel(1)
+
+def Brick(**kwargs):
+    m = MultiResolutionDomain(3, **kwargs)
+    return m.getLevel(1)
+
+ at unittest.skipIf(not HAVE_TRILINOS, "Trilinos not available")
+class SimpleSolveOnTrilinos(SimpleSolveTestCase):
+    pass
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveMultiRes2D_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### GMRES + Jacobi
+
+class Test_SimpleSolveMultiRes2D_Trilinos_GMRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.GMRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_GMRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.GMRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveMultiRes2D_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveMultiRes2D_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveMultiRes2D_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### LSQR + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveMultiRes2D_Trilinos_LSQR_AMG(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.LSQR
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveMultiRes2D_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveMultiRes3D_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + ILUT
+
+class Test_SimpleSolveMultiRes2D_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(mpiSize > 1, "3D Multiresolution domains require single process")
+class Test_SimpleSolveMultiRes3D_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+   run_tests(__name__, exit_on_failure=True)
+
diff --git a/ripley/test/python/run_trilinosSolversOnRipley.py b/ripley/test/python/run_trilinosSolversOnRipley.py
new file mode 100644
index 0000000..5bcf3c2
--- /dev/null
+++ b/ripley/test/python/run_trilinosSolversOnRipley.py
@@ -0,0 +1,247 @@
+
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+from __future__ import print_function, division
+
+__copyright__="""Copyright (c) 2003-2016 by The University of Queensland
+http://www.uq.edu.au
+Primary Business: Queensland, Australia"""
+__license__="""Licensed under the Open Software License version 3.0
+http://www.opensource.org/licenses/osl-3.0.php"""
+__url__="https://launchpad.net/escript-finley"
+
+"""
+Test suite for PDE solvers on ripley
+"""
+
+from test_simplesolve import SimpleSolveTestCase
+import esys.escriptcore.utestselect as unittest
+from esys.escriptcore.testing import *
+
+from esys.escript import getMPISizeWorld, hasFeature, sqrt
+from esys.ripley import Rectangle, Brick
+from esys.escript.linearPDEs import SolverOptions
+
+HAVE_TRILINOS = hasFeature('trilinos')
+skip_muelu_long = False #hasFeature("longindex")
+
+# number of elements in the spatial directions
+NE0=12
+NE1=12
+NE2=8
+mpiSize=getMPISizeWorld()
+for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
+    NX=x
+    NY=mpiSize//x
+    if NX*NY == mpiSize:
+        break
+
+for x in [(int(mpiSize**(1/3.)),int(mpiSize**(1/3.))),(2,3),(2,2),(1,2),(1,1)]:
+    NXb=x[0]
+    NYb=x[1]
+    NZb=mpiSize//(x[0]*x[1])
+    if NXb*NYb*NZb == mpiSize:
+        break
+
+ at unittest.skipIf(not HAVE_TRILINOS, "Trilinos not available")
+class SimpleSolveOnTrilinos(SimpleSolveTestCase):
+    pass
+
+### BiCGStab + Jacobi
+
+class Test_SimpleSolveRipley2D_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_BICGSTAB_Jacobi(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.BICGSTAB
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### GMRES + Jacobi
+
+class Test_SimpleSolveRipley2D_Trilinos_GMRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.GMRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_GMRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.GMRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + Jacobi
+
+class Test_SimpleSolveRipley2D_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_PCG_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### MINRES + Jacobi
+
+class Test_SimpleSolveRipley2D_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_MINRES_Jacobi(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.MINRES
+        self.preconditioner = SolverOptions.JACOBI
+
+    def tearDown(self):
+        del self.domain
+
+### TFQMR + RILU
+
+class Test_SimpleSolveRipley2D_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_TFQMR_RILU(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.TFQMR
+        self.preconditioner = SolverOptions.RILU
+
+    def tearDown(self):
+        del self.domain
+
+### LSQR + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveRipley2D_Trilinos_LSQR_AMG(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.LSQR
+        self.preconditioner = SolverOptions.AMG
+
+    def _setSolverOptions(self, so):
+        so.setTrilinosParameter("number of equations", 2)
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + AMG
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveRipley2D_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def _setSolverOptions(self, so):
+        so.setTrilinosParameter("number of equations", 2)
+
+    def tearDown(self):
+        del self.domain
+
+ at unittest.skipIf(skip_muelu_long, "MueLu AMG incompatible with index type long")
+class Test_SimpleSolveRipley3D_Trilinos_PCG_AMG(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.AMG
+
+    def _setSolverOptions(self, so):
+        so.setTrilinosParameter("number of equations", 3)
+
+    def tearDown(self):
+        del self.domain
+
+### PCG + ILUT
+
+class Test_SimpleSolveRipley2D_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    def setUp(self):
+        self.domain = Rectangle(n0=NE0*NX-1, n1=NE1*NY-1, d0=NX, d1=NY)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+class Test_SimpleSolveRipley3D_Trilinos_PCG_ILUT(SimpleSolveOnTrilinos):
+    SOLVER_TOL = 1.e-9
+    def setUp(self):
+        self.domain = Brick(n0=NE0*NXb-1, n1=NE1*NYb-1, n2=NE2*NZb-1, d0=NXb, d1=NYb, d2=NZb)
+        self.package = SolverOptions.TRILINOS
+        self.method = SolverOptions.PCG
+        self.preconditioner = SolverOptions.ILUT
+
+    def tearDown(self):
+        del self.domain
+
+
+if __name__ == '__main__':
+   run_tests(__name__, exit_on_failure=True)
+
diff --git a/ripley/test/ripley_UnitTest.cpp b/ripley/test/ripley_UnitTest.cpp
index 16ec3f9..dd29acf 100644
--- a/ripley/test/ripley_UnitTest.cpp
+++ b/ripley/test/ripley_UnitTest.cpp
@@ -14,9 +14,7 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include <esysUtils/first.h>
-#include <esysUtils/Esys_MPI.h>
+#include <escript/Esys_MPI.h>
 
 #include "SystemMatrixTestCase.h"
 
diff --git a/run-escript.in b/run-escript.in
index 140fb6b..6953039 100644
--- a/run-escript.in
+++ b/run-escript.in
@@ -74,10 +74,6 @@ else
 fi
 ##### End finding ESCRIPT_ROOT ########
 
-PYTHON_MPI_NULL="$ESCRIPT_ROOT/lib/pythonMPI"
-PYTHON_MPI_REDIRECT="$ESCRIPT_ROOT/lib/pythonMPIredirect"
-PYTHON_CMD=python
-
 # if possible please express paths relative to $ESCRIPT_ROOT unless
 # they are in an unrelated location
 
@@ -111,6 +107,10 @@ get_buildvar () {
     echo $(grep "^$1=" "$BUILDINFO_FILE" |cut -d= -f2)
 }
 
+PYTHON_MPI_NULL="$ESCRIPT_ROOT/lib/pythonMPI"
+PYTHON_MPI_REDIRECT="$ESCRIPT_ROOT/lib/pythonMPIredirect"
+PYTHON_CMD=$(get_buildvar python)
+
 HELP_TEXT="
 Usage: run-escript [options] script.py [arguments...]
 	-n nn		number of nodes to use
@@ -471,7 +471,7 @@ if [ "$MPI_FLAVOUR" = "OPENMPI" ]
 then
     if [ -z `which rsh``which ssh` ]
     then
-        AGENTOVERRIDE="--gmca orte_rsh_agent /bin/false"
+        AGENTOVERRIDE="--gmca plm_rsh_agent /bin/false"
     fi
 fi 
 
diff --git a/scons/__init__.py b/scons/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scons/templates/README_FIRST b/scons/templates/README_FIRST
new file mode 100644
index 0000000..6bef12b
--- /dev/null
+++ b/scons/templates/README_FIRST
@@ -0,0 +1,320 @@
+
+This directory contains template configuration files for escript for some
+targets.
+
+Configuring your build
+======================
+
+To build escript on your machine create a file named
+<sourcedir>/scons/<hostname>_options.py
+where <sourcedir> is the escript source directory and <hostname> is your
+machine's short hostname.
+If you find a template file whose name matches what you are running, you can
+import that file from your new options file then customize to your needs.
+For example, if you are running Ubuntu Vivid and would like to build with MPI
+support, you can insert the following in your options file:
+
+from templates.vivid_mpi_options import *
+
+If you can't find a matching template file you can either import one that
+comes close or start from scratch and set the options as required. All
+recognised options are explained below.
+
+Prefixes
+========
+
+There are two ways to specify where to find dependency headers and libraries
+(via the <dependency>_prefix option):
+1) If your installation follows the general scheme where headers are located
+   in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
+   it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
+2) Otherwise provide a list with two elements, where the first one is the
+   include path, and the second the library path, e.g.
+   boost_prefix=['/usr/include/boost1_48', '/usr/lib']
+All <dependency>_prefix settings default to '/usr'
+
+Options
+=======
+
+The following is an exhaustive list of escript options you can set.
+Each option is followed by a brief explanation.
+
+escript_opts_version = 203
+  The options file version. SCons will refuse to build if there have been
+  changes to the set of variables and your file has not been updated.
+  This setting is mandatory.
+
+prefix = '/usr/local'
+  Installation prefix - files will be installed in subdirectories underneath
+  this path. DEFAULT: '<sourcedir>' (source directory)
+
+build_dir = '/tmp/escriptbuild'
+  Top-level directory for intermediate build and test files.
+  DEFAULT: '<sourcedir>/build'
+
+verbose = True
+  Set to True to print the full compiler/linker command line.  DEFAULT: False
+
+cxx = 'g++'
+  C++ compiler command name or full path. DEFAULT: auto-detected
+
+cc_flags = ''
+  Flags to use with the C++ compiler. Do not set this unless you know
+  what you are doing - use cxx_extra to specify additional flags.
+  DEFAULT: compiler-dependent
+
+cc_optim = '-O3 -march=native'
+  Additional compiler (optimization) flags, only applied for non-debug builds
+  DEFAULT: compiler-dependent
+
+cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
+  Additional compiler flags only applied for debug builds
+  DEFAULT: compiler-dependent
+
+cxx_extra = '-Wextra -Wno-unused-parameter -I/opt/local/include'
+  Additional flags to add to the C++ compiler. DEFAULT: '' (empty)
+
+ld_extra = ''
+  Additional flags to add to the linker. DEFAULT: '' (empty)
+
+nvcc = '/usr/local/bin/nvcc'
+  Path to CUDA compiler [new in 202]. DEFAULT: auto-detected
+
+nvccflags = '-arch=sm_35 -DBOOST_NOINLINE="__attribute__((noinline))"'
+  Flags for CUDA compiler [new in 202].  DEFAULT: '' (empty)
+
+werror = False
+  Whether to treat compiler warnings as errors. DEFAULT: True
+
+debug = True
+  Whether to build a debug version (applying cc_debug flags)
+  DEFAULT: False
+
+openmp = True
+  Set to True to add flags that enable OpenMP parallelization
+  DEFAULT: False
+
+omp_flags = '-fopenmp'
+  Additional compiler flags for OpenMP builds. DEFAULT: compiler-dependent
+
+omp_ldflags = '-fopenmp'
+  Additional linker flags for OpenMP builds. DEFAULT: compiler-dependent
+
+boost_prefix = '/usr/local'
+  Prefix or paths to boost headers and libraries. See note above.
+
+boost_libs = ['boost_python-py27']
+  boost-python library/libraries to link against
+
+cppunit_prefix = '/usr/local'
+  Prefix or paths to CppUnit headers and libraries. See note above.
+  Only required if you like to run the C++ unit tests.
+
+cppunit_libs = ['cppunit']
+  CppUnit library/libraries to link against. Only required if you like to run
+  the C++ unit tests.
+
+mpi = 'OPENMPI'
+  Flavour of MPI implementation
+  Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
+  DEFAULT: 'none' (disable MPI)
+
+mpi_prefix = '/usr/lib/openmpi'
+  Prefix or paths to MPI headers and libraries. See note above about prefixes.
+
+mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
+  MPI libraries to link against.
+
+cuda = True
+  Whether to add support for GPU-based ripley system matrix (requires nvcc
+  and thrust headers, experimental) [new in 202].  DEFAULT: False
+
+cuda_prefix = '/usr/local'
+  Prefix or paths to NVidia CUDA installation. See note above. [new in 202]
+
+netcdf = True
+  Whether to use the netCDF library for dump file support and netCDF-based
+  downunder data import. Requires the legacy netCDF C++ libraries and headers.
+  DEFAULT: False
+
+netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
+  Prefix or paths to netCDF headers and libraries. See note above.
+
+netcdf_libs = ['netcdf_c++', 'netcdf']
+  netCDF library/libraries to link against
+
+parmetis = True
+  Whether to use the parMETIS library (only relevant if building dudley and/or
+  finley with MPI). DEFAULT: False
+
+parmetis_prefix = '/usr/local'
+  Prefix or paths to parMETIS headers and libraries. See note above.
+
+parmetis_libs = ['parmetis', 'metis']
+  parMETIS library/libraries to link against
+
+#mkl = True
+  Whether to add support for the Intel MKL (Math Kernel Library) direct solver
+  DEFAULT: False
+
+mkl_prefix = ['/opt/intel/mkl/include', '/opt/intel/mkl/lib/intel64']
+  Prefix or paths to MKL headers and libraries. See note above.
+
+mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
+  MKL library/libraries to link against
+
+umfpack = True
+  Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
+  DEFAULT: False
+
+umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
+  Prefix or paths to UMFPACK headers and libraries. See note above.
+
+umfpack_libs = ['umfpack', 'blas', 'amd']
+  UMFPACK library/libraries to link against
+
+boomeramg = True
+  Whether to use BoomerAMG (requires MPI). DEFAULT: False
+
+boomeramg_prefix = '/usr/local'
+  Prefix or paths to BoomerAMG headers and libraries. See note above.
+
+boomeramg_libs = ['HYPRE']
+  BoomerAMG library/libraries to link against
+
+lapack = True
+  Whether to use BLAS/LAPACK. Note, LAPACK is incompatible with long indices.
+  DEFAULT: 'auto' (try to detect MKL lapack or ATLAS)
+
+lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
+  Prefix or paths to LAPACK headers and libraries. See note above.
+
+lapack_libs = ['lapack_atlas']
+  LAPACK library/libraries to link against
+
+silo = True
+  Whether to use LLNL's SILO library for Silo output file support in weipa
+  DEFAULT: False
+
+silo_prefix = '/usr/local'
+  Prefix or paths to SILO headers and libraries. See note above.
+
+silo_libs = ['siloh5', 'hdf5']
+  SILO library/libraries to link against
+
+trilinos = True
+  Whether to enable support for the Trilinos solver stack. [new in 203]
+  DEFAULT: False
+
+trilinos_prefix = '/usr/local'
+  Prefix or paths to Trilinos headers and libraries. See note above.
+
+trilinos_libs = []
+  Trilinos libraries to link against. DEFAULT: auto-detected
+
+visit = True
+  Whether to use LLNL's VisIt simulation interface (only version 2 supported)
+  DEFAULT: False
+
+visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
+  Prefix or paths to VisIt's sim2 headers and libraries. See note above.
+
+visit_libs = ['simV2']
+  Sim2 library/libraries to link against
+
+domains = ['finley', 'ripley']
+  List of domain families to build [new in 202].
+  DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
+
+paso = True
+  Whether to build the Paso solver library. Setting this to False only makes
+  sense if you have Trilinos enabled. DEFAULT: True
+
+weipa = True
+  Whether to build the weipa data export library. DEFAULT: True
+
+Advanced Options
+================
+
+Setting the following options may break your build.
+
+dudley_assemble_flags = '-funroll-loops'
+  Compiler flags for some optimisations in dudley
+
+prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
+launcher = "mpirun -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
+postlaunch = ""
+  With these three options you can define how to launch programs in your
+  environment. This is relevant for MPI builds and/or where a batch system
+  or job scheduler is in use. 
+  The content of these options is literally copied into the escript launcher
+  after applying the following substitutions:
+  %b = executable, %n = number of nodes, %p = number of processes,
+  %N = total number of processes, %t = number of threads,
+  %f = name of hostfile, %h = comma-separated list of hosts,
+  %e = comma-separated list of environment variables to export
+
+iknowwhatimdoing = True
+  enables code that is non-standard and not recommended for general use.
+
+tools_names = ['intelc']
+  compiler toolset to use. DEFAULT: auto-detected
+
+env_export = []
+  Additional environmental variables to export to the tools
+
+forcelazy = 'on'
+  For testing use only, sets the default value for autolazy.
+  DEFAULT: 'leave_alone'
+
+forcecollres = 'on'
+  For testing use only, sets the default value for force resolving collective
+  operations.  DEFAULT: 'leave_alone'
+
+sys_libs = []
+  Extra libraries to link with
+
+pythoncmd = '/usr/bin/python3'
+  Python executable to use for compiling. Must be compatible with the
+  boost python library
+  DEFAULT: auto-detected (interpreter executing scons)
+
+pythonlibname = 'python3.5m'
+  Name of the Python library. DEFAULT: auto-detected.
+
+pythonlibpath = '/usr/lib'
+  Path to Python library. DEFAULT: auto-detected.
+
+pythonincpath = '/usr/include/python3.5'
+  Path to Python include files. DEFAULT: auto-detected.
+
+longindices = True
+  Whether to map index_t to long (for very large local matrices) [new in 202]
+  DEFAULT: False
+
+compressed_files = False
+  Enable reading compressed binary grids in ripley? (requires boost iostreams)
+  DEFAULT: True
+
+compression_libs = 'boost_iostreams-mt'
+  Compression libraries to link with. DEFAULT: 'boost_iostreams'
+
+osx_dependency_fix = True
+  Whether to apply a dependency fix to libraries (only relevant on OS X).
+  DEFAULT: False
+
+papi = True
+  Whether to use the PAPI (Performance API) library (currently broken).
+  DEFAULT: False
+
+papi_prefix = '/usr/local'
+  Prefix or paths to PAPI headers and libraries. See note above.
+
+papi_libs = ['papi']
+  PAPI library/libraries to link against
+
+papi_instrument_solver = True
+  Whether to use PAPI to instrument solver iterations. DEFAULT: False
+
+# vim: syntax=rst
+
diff --git a/scons/templates/centos7_0_options.py b/scons/templates/centos7_0_options.py
index a7fdf34..66ccc84 100644
--- a/scons/templates/centos7_0_options.py
+++ b/scons/templates/centos7_0_options.py
@@ -15,293 +15,10 @@
 ##############################################################################
 
 # This is a template configuration file for escript on CentOS.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.centos6_5_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True (not supported on CentOS 6.5 due to boost version)
 compressed_files = False
 
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/fedora21_5_options.py b/scons/templates/fedora21_5_options.py
index e5a2155..10075f3 100644
--- a/scons/templates/fedora21_5_options.py
+++ b/scons/templates/fedora21_5_options.py
@@ -15,293 +15,10 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Fedora Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.fedora20_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
 
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/freebsd10_0_options.py b/scons/templates/freebsd10_0_options.py
index 21190ee..6f545b0 100644
--- a/scons/templates/freebsd10_0_options.py
+++ b/scons/templates/freebsd10_0_options.py
@@ -15,293 +15,17 @@
 ##############################################################################
 
 # This is a template configuration file for escript on FreeBSD.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.freebsd10_0_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
+escript_opts_version = 203
 cxx_extra = '-I/usr/local/lib/python2.7/site-packages/numpy/core/include'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
 boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
 cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/usr/local'
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
 silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
 silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
 silo_libs = ['silo']
 
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/homebrew_10.10_options.py b/scons/templates/homebrew_10.10_options.py
index 74e463a..834b099 100644
--- a/scons/templates/homebrew_10.10_options.py
+++ b/scons/templates/homebrew_10.10_options.py
@@ -14,258 +14,18 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript/finley on Linux.
-# Copy this file to <hostname>_options.py, where <hostname> is your machine's
-# short hostname, then customize to your needs.
+# This is a template configuration file for escript on OS X homebrew
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_44', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '.' (current directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: 'build'
-#build_dir = 'build'
-
-# C compiler command name or full path.
-# DEFAULT: auto-detected
-#cc = 'gcc'
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with both C and C++ compilers. Do not set unless you know
-# what you are doing - use cc_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-cc_flags     = "-pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing"
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -mmmx -msse'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g'
-
-# Additional flags to add to the C compiler only
-# DEFAULT: '' (empty)
-#cc_extra = ''
-
-# Additional flags to add to the C++ compiler only
-# DEFAULT: '' (empty)
+escript_opts_version = 203
+cc_flags = "-pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing"
 cxx_extra = '-Wno-c99-extensions'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version
-# DEFAULT: False
-#debug = True
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
-#openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/local'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Prefix or paths to boost-python headers and libraries. See note above.
 boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-#if this is not 'python' the command will be used instead of the python
-#version scons is running on
-#pythoncmd='python'
-
-#Set to true to build with python3 [You will need to set pythoncmd as well]
-#usepython3=False
-
-#name of the python library to link against.  For Python2 you should not need
-#to set this
-#pythonlibname=''
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
 cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Whether to use the netCDF library for dump file support
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/usr/local'
-
-# netCDF library/libraries to link against
 netcdf_libs = ['netcdf_c++', 'netcdf']
 
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to use the Intel PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
-# Whether to use Intel MKL (Math Kernel Library)
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = '/usr'
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_solver', 'mkl_em64t', 'mkl_core', 'guide', 'pthread']
-
-# Whether to use UMFPACK (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = '/usr/local'
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-# Build dynamic libraries only
-#DEFAULT: False
-#build_shared = True
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# Use intel's VSL library for random data
-# DEFAULT: False
-#vsl_random = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-#tools_names = ['default']
-
-#iknowwhatimdoing = False
-
-#forcelazy = 'leave_alone'
-
-#forcecollres = 'leave_alone'
-
diff --git a/scons/templates/homebrew_11_options.py b/scons/templates/homebrew_11_options.py
index ecb4c36..5d60509 100644
--- a/scons/templates/homebrew_11_options.py
+++ b/scons/templates/homebrew_11_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -14,262 +14,19 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript/finley on Linux.
-# Copy this file to <hostname>_options.py, where <hostname> is your machine's
-# short hostname, then customize to your needs.
+# This is a template configuration file for escript on OS X homebrew 11.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_44', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Do we need to modify shared libraries to list dependencies with
-# absolute paths (For OSX.11)
+escript_opts_version = 203
 osx_dependency_fix = True
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '.' (current directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: 'build'
-#build_dir = 'build'
-
-# C compiler command name or full path.
-# DEFAULT: auto-detected
-#cc = 'gcc'
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with both C and C++ compilers. Do not set unless you know
-# what you are doing - use cc_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-cc_flags     = "-pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing"
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -mmmx -msse'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g'
-
-# Additional flags to add to the C compiler only
-# DEFAULT: '' (empty)
-#cc_extra = ''
-
-# Additional flags to add to the C++ compiler only
-# DEFAULT: '' (empty)
+cc_flags = "-pedantic -Wall -fPIC -Wno-unknown-pragmas -Wno-sign-compare -Wno-system-headers -Wno-long-long -Wno-strict-aliasing"
 cxx_extra = '-Wno-c99-extensions'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version
-# DEFAULT: False
-#debug = True
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
-#openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/local'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Prefix or paths to boost-python headers and libraries. See note above.
 boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-#if this is not 'python' the command will be used instead of the python
-#version scons is running on
-#pythoncmd='python'
-
-#Set to true to build with python3 [You will need to set pythoncmd as well]
-#usepython3=False
-
-#name of the python library to link against.  For Python2 you should not need
-#to set this
-#pythonlibname=''
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
 cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Whether to use the netCDF library for dump file support
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/usr/local'
-
-# netCDF library/libraries to link against
 netcdf_libs = ['netcdf_c++', 'netcdf']
 
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to use the Intel PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
-# Whether to use Intel MKL (Math Kernel Library)
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = '/usr'
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_solver', 'mkl_em64t', 'mkl_core', 'guide', 'pthread']
-
-# Whether to use UMFPACK (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = '/usr/local'
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-# Build dynamic libraries only
-#DEFAULT: False
-#build_shared = True
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# Use intel's VSL library for random data
-# DEFAULT: False
-#vsl_random = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-#tools_names = ['default']
-
-#iknowwhatimdoing = False
-
-#forcelazy = 'leave_alone'
-
-#forcecollres = 'leave_alone'
-
diff --git a/scons/templates/homebrew_options.py b/scons/templates/homebrew_options.py
index 69f4dde..d05723a 100644
--- a/scons/templates/homebrew_options.py
+++ b/scons/templates/homebrew_options.py
@@ -14,257 +14,16 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript/finley on Linux.
-# Copy this file to <hostname>_options.py, where <hostname> is your machine's
-# short hostname, then customize to your needs.
+# This is a template configuration file for escript on OS X homebrew.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_44', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 201
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '.' (current directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: 'build'
-#build_dir = 'build'
-
-# C compiler command name or full path.
-# DEFAULT: auto-detected
-#cc = 'gcc'
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with both C and C++ compilers. Do not set unless you know
-# what you are doing - use cc_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -mmmx -msse'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g'
-
-# Additional flags to add to the C compiler only
-# DEFAULT: '' (empty)
-#cc_extra = ''
-
-# Additional flags to add to the C++ compiler only
-# DEFAULT: '' (empty)
-#cxx_extra = ''
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version
-# DEFAULT: False
-#debug = True
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
-#openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
+escript_opts_version = 203
 mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/local'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Prefix or paths to boost-python headers and libraries. See note above.
 boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-#if this is not 'python' the command will be used instead of the python
-#version scons is running on
-#pythoncmd='python'
-
-#Set to true to build with python3 [You will need to set pythoncmd as well]
-#usepython3=False
-
-#name of the python library to link against.  For Python2 you should not need
-#to set this
-#pythonlibname=''
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
 cppunit_prefix = '/opt/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Whether to use the netCDF library for dump file support
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/usr/local'
-
-# netCDF library/libraries to link against
 netcdf_libs = ['netcdf_c++', 'netcdf']
 
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to use the Intel PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
-# Whether to use Intel MKL (Math Kernel Library)
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = '/usr'
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_solver', 'mkl_em64t', 'mkl_core', 'guide', 'pthread']
-
-# Whether to use UMFPACK (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = '/usr/local'
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-# Build dynamic libraries only
-#DEFAULT: False
-#build_shared = True
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# Use intel's VSL library for random data
-# DEFAULT: False
-#vsl_random = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-#tools_names = ['default']
-
-#iknowwhatimdoing = False
-
-#forcelazy = 'leave_alone'
-
-#forcecollres = 'leave_alone'
-
diff --git a/scons/templates/jessie_mpi_options.py b/scons/templates/jessie_mpi_options.py
index cae5ae6..1cb53f9 100644
--- a/scons/templates/jessie_mpi_options.py
+++ b/scons/templates/jessie_mpi_options.py
@@ -17,3 +17,4 @@
 from .jessie_options import *
 
 mpi='OPENMPI'
+
diff --git a/scons/templates/jessie_options.py b/scons/templates/jessie_options.py
index f9f98a0..c6494a7 100644
--- a/scons/templates/jessie_options.py
+++ b/scons/templates/jessie_options.py
@@ -15,302 +15,27 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Debian/GNU Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.wheezy_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py27']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
 #umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
 umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
 umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
 lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
 #silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
 silo_libs = ['siloh5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/current/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo -x %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-#      
-# Compiler flags for some optimisations in dudley
 dudley_assemble_flags = '-funroll-loops'
 
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
-
 from site_init import getdebbuildflags
 # Now we add the debian build flags
-debstuff=getdebbuildflags()
-if (len(debstuff)>0):
+debstuff = getdebbuildflags()
+if len(debstuff) > 0:
   print("Building with the following additional flags from debian: "+str(debstuff))
 for i in debstuff:
   k=i[0]
diff --git a/scons/templates/jessie_py3_mpi_options.py b/scons/templates/jessie_py3_mpi_options.py
index f47a8e2..4d6aca6 100644
--- a/scons/templates/jessie_py3_mpi_options.py
+++ b/scons/templates/jessie_py3_mpi_options.py
@@ -14,13 +14,10 @@
 #
 ##############################################################################
 
-from .jessie_options import *
+# This is a template configuration file for escript on Debian/GNU Linux.
+# Refer to README_FIRST for usage instructions.
 
-# boost-python library/libraries to link against
-boost_libs = ['boost_python-py34']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.4m'
-pythonincpath='/usr/include/python3.4'
+from .jessie_py3_options import *
+
+mpi = 'OPENMPI'
 
-mpi='OPENMPI'
diff --git a/scons/templates/jessie_py3_options.py b/scons/templates/jessie_py3_options.py
index 87e0760..5cd152a 100644
--- a/scons/templates/jessie_py3_options.py
+++ b/scons/templates/jessie_py3_options.py
@@ -14,12 +14,13 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Debian/GNU Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .jessie_options import *
 
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py34']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.4m'
-pythonincpath='/usr/include/python3.4'
+pythoncmd = 'python3'
+#pythonlibname = 'python3.4m'
+#pythonincpath = '/usr/include/python3.4'
 
diff --git a/scons/templates/macports_10.10_options.py b/scons/templates/macports_10.10_options.py
index 35a22cc..c4a7e75 100644
--- a/scons/templates/macports_10.10_options.py
+++ b/scons/templates/macports_10.10_options.py
@@ -14,294 +14,17 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript on MAC using ports.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.macports_options import *
-# then customize to your needs.
+# This is a template configuration file for escript on Mac OS X using ports.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-I/usr/local/lib/python2.7/site-packages/numpy/core/include'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
 boost_prefix = '/opt/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
 cppunit_prefix = '/opt/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/opt/local'
-
-# netCDF library/libraries to link against
-netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
 silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
 silo_prefix = '/opt/local'
-
-# SILO library/libraries to link against
 silo_libs = ['siloh5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
 tools_names = ['clang']
 
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/macports_options.py b/scons/templates/macports_options.py
index 3fd8786..d196a76 100644
--- a/scons/templates/macports_options.py
+++ b/scons/templates/macports_options.py
@@ -14,294 +14,17 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript on MAC using ports.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.macports_options import *
-# then customize to your needs.
+# This is a template configuration file for escript on Mac OS X using ports.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-I/usr/local/lib/python2.7/site-packages/numpy/core/include'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
 boost_prefix = '/opt/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
 cppunit_prefix = '/opt/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
 netcdf_prefix = '/opt/local'
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
 silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
 silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
 silo_libs = ['silo']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
 tools_names = ['clang']
 
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/opensuse13_2_options.py b/scons/templates/opensuse13_2_options.py
index cb9fd88..c755259 100644
--- a/scons/templates/opensuse13_2_options.py
+++ b/scons/templates/opensuse13_2_options.py
@@ -15,293 +15,8 @@
 ##############################################################################
 
 # This is a template configuration file for escript on OpenSUSE.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.opensuse12_3_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
 
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
-#boost_libs = ['boost_python-mt']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-#mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-#mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
-#netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-#umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-#umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-#lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-#lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
-#dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/sid_mpi_options.py b/scons/templates/sid_mpi_options.py
index f1ba262..9957992 100644
--- a/scons/templates/sid_mpi_options.py
+++ b/scons/templates/sid_mpi_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -14,6 +14,10 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Debian GNU/Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .sid_options import *
 
-mpi='OPENMPI'
+mpi = 'OPENMPI'
+
diff --git a/scons/templates/sid_options.py b/scons/templates/sid_options.py
index 98c0e57..205b130 100644
--- a/scons/templates/sid_options.py
+++ b/scons/templates/sid_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -15,326 +15,54 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Debian/GNU Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.wheezy_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
+#cxx_extra = '-Wno-literal-suffix'
 openmp = True
+#mpi = 'OPENMPI'
+mpi_prefix = '/usr/lib/openmpi'
+mpi_libs = ['mpi_cxx', 'mpi']
+netcdf = True
+#umfpack = True
+umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
+umfpack_libs = ['umfpack', 'blas', 'amd']
+lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
+#silo = True
+silo_libs = ['siloh5', 'hdf5_openmpi']
+dudley_assemble_flags = '-funroll-loops'
 
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 
 import subprocess
 import os
-
-p=subprocess.Popen(["ld","--verbose"], stdout=subprocess.PIPE)
-out,err=p.communicate()
-spath=[x[13:-3] for x in out.split() if 'SEARCH_DIR' in x]
-p2name=''
-p3name=''
+p = subprocess.Popen(["ld","--verbose"], stdout=subprocess.PIPE)
+out,err = p.communicate()
+spath = [x[13:-3] for x in out.split() if 'SEARCH_DIR' in x]
+p2name = ''
+p3name = ''
 for name in spath:
   try:
     l=os.listdir(name)
     p2res=[x for x in l if x.startswith('libboost_python-py2') and x.endswith('.so')]
     p3res=[x for x in l if x.startswith('libboost_python-py3') and x.endswith('.so')]
     if len(p2name)==0 and len(p2res)>0:
-      p2name=p2res[0]
+      p2name=p2res[-1]
     if len(p3name)==0 and len(p3res)>0:
-      p3name=p3res[0]
+      p3name=p3res[-1]
   except OSError:
     pass
 
 # boost-python library/libraries to link against
 boost_libs = [p2name[3:-3]]
 
-
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
-#mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
-mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
-mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
-netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
-#umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
-umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
-umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
-lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-silo_libs = ['siloh5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/current/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo -x %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-#      
-# Compiler flags for some optimisations in dudley
-dudley_assemble_flags = '-funroll-loops'
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
+# this can be used by options files importing us
+boost_py2_libs = [p2name[3:-3]]
+boost_py3_libs = [p3name[3:-3]]
 
 from site_init import getdebbuildflags
 # Now we add the debian build flags
-debstuff=getdebbuildflags()
-if (len(debstuff)>0):
+debstuff = getdebbuildflags()
+if len(debstuff) > 0:
   print("Building with the following additional flags from debian: "+str(debstuff))
 for i in debstuff:
   k=i[0]
diff --git a/scons/templates/sid_py3_mpi_options.py b/scons/templates/sid_py3_mpi_options.py
index f414fd5..b1b3748 100644
--- a/scons/templates/sid_py3_mpi_options.py
+++ b/scons/templates/sid_py3_mpi_options.py
@@ -14,48 +14,10 @@
 #
 ##############################################################################
 
-from .sid_options import *
+# This is a template configuration file for escript on Debian/GNU Linux.
+# Refer to README_FIRST for usage instructions.
 
-mpi='OPENMPI'
-usepython3=True
-pythoncmd='python3'
+from scons.templates.sid_py3_options import *
 
-import subprocess
-import sysconfig
+mpi = 'OPENMPI'
 
-#pythonlibname='python3.4m'
-#pythonlibname=sysconfig.get_config_var("LDLIBRARY")
-
-p=subprocess.Popen([pythoncmd, '-c', 'import sysconfig\nprint(sysconfig.get_config_var("LDLIBRARY"))'], stdout=subprocess.PIPE)
-pythonlibname=p.stdout.readline().encode().strip()
-p.wait()
-
-#pythonincpath='/usr/include/python3.4'
-#pythonincpath=sysconfig.get_config_var("INCLUDEPY")
-
-p=subprocess.Popen([pythoncmd, '-c', 'import sysconfig\nprint(sysconfig.get_config_var("INCLUDEPY"))'], stdout=subprocess.PIPE)
-pythonincpath=p.stdout.readline().encode().strip()
-p.wait()
-
-
-import os
-
-p=subprocess.Popen(["ld","--verbose"], stdout=subprocess.PIPE)
-out,err=p.communicate()
-spath=[x[13:-3] for x in out.split() if 'SEARCH_DIR' in x]
-p2name=''
-p3name=''
-for name in spath:
-  try:
-    l=os.listdir(name)
-    p2res=[x for x in l if x.startswith('libboost_python-py2') and x.endswith('.so')]
-    p3res=[x for x in l if x.startswith('libboost_python-py3') and x.endswith('.so')]
-    if len(p2name)==0 and len(p2res)>0:
-      p2name=p2res[0]
-    if len(p3name)==0 and len(p3res)>0:
-      p3name=p3res[0]
-  except OSError:
-    pass
-
-# boost-python library/libraries to link against
-boost_libs = [p3name[3:-3]]
diff --git a/scons/templates/sid_py3_options.py b/scons/templates/sid_py3_options.py
index 3b9f21f..6c42c10 100644
--- a/scons/templates/sid_py3_options.py
+++ b/scons/templates/sid_py3_options.py
@@ -14,47 +14,12 @@
 #
 ##############################################################################
 
-from .sid_options import *
+# This is a template configuration file for escript on Debian/GNU Linux.
+# Refer to README_FIRST for usage instructions.
 
-usepython3=True
-pythoncmd='python3'
+from scons.templates.sid_options import *
 
-import subprocess
-import sysconfig
+pythoncmd = 'python3'
 
-#pythonlibname='python3.4m'
-#pythonlibname=sysconfig.get_config_var("LDLIBRARY")
+boost_libs = boost_py3_libs
 
-p=subprocess.Popen([pythoncmd, '-c', 'import sysconfig\nprint(sysconfig.get_config_var("LDLIBRARY"))'], stdout=subprocess.PIPE)
-pythonlibname=p.stdout.readline().encode().strip()
-p.wait()
-
-#pythonincpath='/usr/include/python3.4'
-#pythonincpath=sysconfig.get_config_var("INCLUDEPY")
-
-p=subprocess.Popen([pythoncmd, '-c', 'import sysconfig\nprint(sysconfig.get_config_var("INCLUDEPY"))'], stdout=subprocess.PIPE)
-pythonincpath=p.stdout.readline().encode().strip()
-p.wait()
-
-
-import os
-
-p=subprocess.Popen(["ld","--verbose"], stdout=subprocess.PIPE)
-out,err=p.communicate()
-spath=[x[13:-3] for x in out.split() if 'SEARCH_DIR' in x]
-p2name=''
-p3name=''
-for name in spath:
-  try:
-    l=os.listdir(name)
-    p2res=[x for x in l if x.startswith('libboost_python-py2') and x.endswith('.so')]
-    p3res=[x for x in l if x.startswith('libboost_python-py3') and x.endswith('.so')]
-    if len(p2name)==0 and len(p2res)>0:
-      p2name=p2res[0]
-    if len(p3name)==0 and len(p3res)>0:
-      p3name=p3res[0]
-  except OSError:
-    pass
-
-# boost-python library/libraries to link against
-boost_libs = [p3name[3:-3]]
diff --git a/scons/templates/trusty_options.py b/scons/templates/trusty_options.py
index 8808e84..291d4e1 100644
--- a/scons/templates/trusty_options.py
+++ b/scons/templates/trusty_options.py
@@ -15,293 +15,18 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Ubuntu Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.trusty_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py27']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
 #umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
 umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
 umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
 lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
 dudley_assemble_flags = '-funroll-loops'
 
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/utopic_options.py b/scons/templates/utopic_options.py
index 8808e84..291d4e1 100644
--- a/scons/templates/utopic_options.py
+++ b/scons/templates/utopic_options.py
@@ -15,293 +15,18 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Ubuntu Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.trusty_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py27']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
 #umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
 umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
 umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
 lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
 dudley_assemble_flags = '-funroll-loops'
 
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/vivid_mpi_options.py b/scons/templates/vivid_mpi_options.py
index 779e554..499532d 100644
--- a/scons/templates/vivid_mpi_options.py
+++ b/scons/templates/vivid_mpi_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -14,6 +14,10 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Ubuntu Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .vivid_options import *
 
 mpi='OPENMPI'
+
diff --git a/scons/templates/vivid_options.py b/scons/templates/vivid_options.py
index 24baa0d..6398e8f 100644
--- a/scons/templates/vivid_options.py
+++ b/scons/templates/vivid_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -15,293 +15,17 @@
 ##############################################################################
 
 # This is a template configuration file for escript on Ubuntu Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.vivid_options import *
-# then customize to your needs.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py27']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
 #umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
 umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
 umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
 lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
-#silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
-#silo_libs = ['siloh5', 'hdf5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/2.1.0/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = ['finley', 'ripley']
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# Compiler flags for some optimisations in dudley
 dudley_assemble_flags = '-funroll-loops'
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 -x ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
diff --git a/scons/templates/vivid_py3_mpi_options.py b/scons/templates/vivid_py3_mpi_options.py
index 9d88bdd..51258e5 100644
--- a/scons/templates/vivid_py3_mpi_options.py
+++ b/scons/templates/vivid_py3_mpi_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -14,13 +14,10 @@
 #
 ##############################################################################
 
-from .vivid_options import *
+# This is a template configuration file for escript on Ubuntu Linux.
+# Refer to README_FIRST for usage instructions.
 
-# boost-python library/libraries to link against
-boost_libs = ['boost_python-py34']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.4m'
-pythonincpath='/usr/include/python3.4'
+from .vivid_py3_options import *
+
+mpi = 'OPENMPI'
 
-mpi='OPENMPI'
diff --git a/scons/templates/vivid_py3_options.py b/scons/templates/vivid_py3_options.py
index a0ce67b..1affd8b 100644
--- a/scons/templates/vivid_py3_options.py
+++ b/scons/templates/vivid_py3_options.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2003-2016 by The University of Queensland
+# Copyright (c) 2003-2015 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -14,12 +14,13 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Ubuntu Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .vivid_options import *
 
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py34']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.4m'
-pythonincpath='/usr/include/python3.4'
+pythoncmd = 'python3'
+#pythonlibname = 'python3.4m'
+#pythonincpath = '/usr/include/python3.4'
 
diff --git a/scons/templates/wheezy_mpi_options.py b/scons/templates/wheezy_mpi_options.py
index b9fd6a9..f8bcaa1 100644
--- a/scons/templates/wheezy_mpi_options.py
+++ b/scons/templates/wheezy_mpi_options.py
@@ -14,6 +14,10 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Debian GNU/Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .wheezy_options import *
 
-mpi='OPENMPI'
+mpi = 'OPENMPI'
+
diff --git a/scons/templates/wheezy_options.py b/scons/templates/wheezy_options.py
index e572fdd..4030e7b 100644
--- a/scons/templates/wheezy_options.py
+++ b/scons/templates/wheezy_options.py
@@ -14,299 +14,24 @@
 #
 ##############################################################################
 
-# This is a template configuration file for escript on Debian/GNU Linux.
-# Create a file named <sourcedir>/scons/<hostname>_options.py, where
-# <sourcedir> is the escript source directory and <hostname> is your machine's
-# short hostname, add the line
-# from templates.wheezy_options import *
-# then customize to your needs.
+# This is a template configuration file for escript on Debian GNU/Linux.
+# Refer to README_FIRST for usage instructions.
 
-# PREFIXES:
-# There are two ways to specify where to find dependent headers and libraries
-# (via the <dependency>_prefix):
-# 1) If your installation follows the general scheme where headers are located
-#    in <prefix>/include[32,64], and libraries in <prefix>/lib[32,64] then
-#    it is sufficient to specify this prefix, e.g. boost_prefix='/usr'
-# 2) Otherwise provide a list with two elements, where the first one is the
-#    include path, and the second the library path, e.g.
-#    boost_prefix=['/usr/include/boost1_48', '/usr/lib']
-# All <dependency>_prefix settings default to '/usr'
-
-# The options file version. SCons will refuse to build if there have been
-# changes to the set of variables and your file has not been updated.
-# This setting is mandatory.
-escript_opts_version = 202
-
-# Installation prefix. Files will be installed in subdirectories underneath.
-# DEFAULT: '<sourcedir>' (source directory)
-#prefix = '/usr/local'
-
-# Top-level directory for intermediate build and test files.
-# DEFAULT: '<sourcedir>/build'
-#build_dir = '/tmp/escriptbuild'
-
-# Set to True to print the full compiler/linker command line
-# DEFAULT: False
-#verbose = True
-
-# C++ compiler command name or full path.
-# DEFAULT: auto-detected
-#cxx = 'g++'
-
-# Flags to use with the C++ compiler. Do not set unless you know
-# what you are doing - use cxx_extra to specify additional flags!
-# DEFAULT: compiler-dependent
-#cc_flags = ''
-
-# Additional compiler (optimization) flags for non-debug builds
-# DEFAULT: compiler-dependent
-#cc_optim = '-O3 -march=native'
-
-# Additional compiler flags for debug builds
-# DEFAULT: compiler-dependent
-#cc_debug = '-g3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG'
-
-# Additional flags to add to the C++ compiler
-# DEFAULT: '' (empty)
-#cxx_extra = '-Wextra -Wno-unused-parameter'
-
-# Additional flags to add to the linker
-# DEFAULT: '' (empty)
-#ld_extra = ''
-
-# Path to CUDA compiler [new in 202]
-# DEFAULT: auto-detected
-#nvcc = '/usr/local/bin/nvcc'
-
-# Flags for CUDA compiler [new in 202]
-# DEFAULT: '' (empty)
-#nvccflags = '-arch=sm_30 -DBOOST_NOINLINE="__attribute__((noinline))"'
-
-# Whether to treat compiler warnings as errors
-# DEFAULT: True
-#werror = False
-
-# Whether to build a debug version (applying cc_debug flags)
-# DEFAULT: False
-#debug = True
-
-# Set to True to add flags that enable OpenMP parallelization
-# DEFAULT: False
+escript_opts_version = 203
 openmp = True
-
-# Additional compiler flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_flags = '-fopenmp'
-
-# Additional linker flags for OpenMP builds
-# DEFAULT: compiler-dependent
-#omp_ldflags = '-fopenmp'
-
-# Prefix or paths to boost headers and libraries. See note above.
-#boost_prefix = '/usr/local'
-
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-mt-py27']
-
-# Prefix or paths to CppUnit headers and libraries. See note above.
-# Only required for C++ unit tests.
-#cppunit_prefix = '/usr/local'
-
-# CppUnit library/libraries to link against
-#cppunit_libs = ['cppunit']
-
-# Flavour of MPI implementation
-# Recognized values: 'none', 'MPT', 'MPICH', 'MPICH2', 'OPENMPI', 'INTELMPI'
-# DEFAULT: 'none' (disable MPI)
 #mpi = 'OPENMPI'
-
-# Prefix or paths to MPI headers and libraries. See note above about prefixes.
 mpi_prefix = '/usr/lib/openmpi'
-
-# MPI libraries to link against
 mpi_libs = ['mpi_cxx', 'mpi', 'open-rte', 'open-pal']
-
-# Whether to add support for GPU-based ripley system matrix (requires nvcc
-# and thrust headers) [new in 202]
-# DEFAULT: False
-#cuda = True
-
-# Prefix or paths to NVidia thrust installation. See note above. [new in 202]
-#thrust_prefix = '/usr/local'
-
-# Whether to use the netCDF library for dump file support and netCDF-based
-# downunder data import
-# DEFAULT: False
 netcdf = True
-
-# Prefix or paths to netCDF headers and libraries. See note above.
-#netcdf_prefix = ['/usr/include/netcdf-3', '/usr/lib']
-
-# netCDF library/libraries to link against
-#netcdf_libs = ['netcdf_c++', 'netcdf']
-
-# Whether to use the parMETIS library (only in conjunction with MPI)
-# DEFAULT: False
-#parmetis = True
-
-# Prefix or paths to parMETIS headers and libraries. See note above.
-#parmetis_prefix = '/usr/local'
-
-# parMETIS library/libraries to link against
-#parmetis_libs = ['parmetis', 'metis']
-
-# Whether to add support for the Intel MKL (Math Kernel Library) direct solver
-# DEFAULT: False
-#mkl = True
-
-# Prefix or paths to MKL headers and libraries. See note above.
-#mkl_prefix = ['/opt/intel/composer_xe_2015/mkl/include', '/opt/intel/composer_xe_2015/mkl/lib/intel64']
-
-# MKL library/libraries to link against
-#mkl_libs = ['mkl_intel_lp64', 'mkl_intel_thread', 'mkl_core', 'pthread']
-
-# Whether to add support for the UMFPACK direct solver (requires AMD and BLAS)
-# DEFAULT: False
 #umfpack = True
-
-# Prefix or paths to UMFPACK headers and libraries. See note above.
 umfpack_prefix = ['/usr/include/suitesparse', '/usr/lib']
-
-# UMFPACK library/libraries to link against
 umfpack_libs = ['umfpack', 'blas', 'amd']
-
-# Whether to use BoomerAMG (requires MPI)
-# DEFAULT: False
-#boomeramg = True
-
-# Prefix or paths to BoomerAMG headers and libraries. See note above.
-#boomeramg_prefix = '/usr/local'
-
-# BoomerAMG library/libraries to link against
-#boomeramg_libs = ['HYPRE']
-
-# Flavour of LAPACK implementation
-# Recognized values: 'none', 'clapack', 'mkl'
-# DEFAULT: 'none' (do not use LAPACK)
-#lapack = 'clapack'
-
-# Prefix or paths to LAPACK headers and libraries. See note above.
 lapack_prefix = ['/usr/include/atlas', '/usr/lib/atlas-base']
-
-# LAPACK library/libraries to link against
-lapack_libs = ['lapack_atlas']
-
-# Whether to use LLNL's SILO library for Silo output file support in weipa
-# DEFAULT: False
 #silo = True
-
-# Prefix or paths to SILO headers and libraries. See note above.
-#silo_prefix = '/usr/local'
-
-# SILO library/libraries to link against
 silo_libs = ['siloh5']
-
-# Whether to use LLNL's VisIt simulation interface (only version 2 supported)
-# DEFAULT: False
-#visit = True
-
-# Prefix or paths to VisIt's sim2 headers and libraries. See note above.
-#visit_prefix = '/opt/visit/current/linux-intel/libsim/V2'
-
-# Sim2 library/libraries to link against
-#visit_libs = ['simV2']
-
-# List of domain families to build [new in 202]
-# DEFAULT: 'all' (i.e. dudley, finley, ripley, speckley)
-#domains = 'finley,ripley'
-
-
-### ADVANCED OPTIONS ###
-# Do not change the following options unless you know what they do
-
-# launcher, prelaunch, postlaunch: for MPI builds/batch system runs
-# the following substitutions are applied to all three:
-# %b = executable, %n = number of nodes, %p = number of processes,
-# %N = total number of processes, # %t = number of threads,
-# %f = name of hostfile, %h = comma-separated list of hosts,
-# %e = comma-separated list of environment variables to export
-#prelaunch = "EE=$(echo -x %e|sed -e 's/,/ -x /g')"
-#launcher = "mpirun --gmca mpi_warn_on_fork 0 ${EE} --bynode --bind-to-none --host %h -np %N %b"
-#postlaunch = ""
-
-#      
-# Compiler flags for some optimisations in dudley
 dudley_assemble_flags = '-funroll-loops'
 
-# enables code that is non-standard
-#iknowwhatimdoing = True
-
-# compiler toolset to use
-#tools_names = ['intelc']
-
-# Additional environmental variables to export to the tools
-#env_export = []
-
-# For testing use only, sets the default value for autolazy
-# DEFAULT: 'leave_alone'
-#forcelazy = 'on'
-
-# For testing use only, sets the default value for force resolving collective
-# operations
-# DEFAULT: 'leave_alone'
-#forcecollres = 'on'
-
-# Whether to create dynamic libraries for esysUtils and paso
-# DEFAULT: False
-build_shared = True
-
-# Extra libraries to link with
-#sys_libs = []
-
-# Python executable to use for compiling. Must be compatible with the
-# boost python library
-# DEFAULT: auto-detected (interpreter executing scons)
-#pythoncmd = '/usr/bin/python3'
-
-# Whether this is a Python 3 build
-# DEFAULT: False
-#usepython3 = True
-
-# Name of the python library
-# DEFAULT: auto-detected for python 2.x
-#pythonlibname = 'python3.4m'
-
-# Path to Python include files
-# DEFAULT: auto-detected for python 2.x
-#pythonincpath = '/usr/include/python3.4'
-
-# Whether to map index_t to long (for very large matrices) [new in 202]
-# DEFAULT: False
-#longindices = True
-
-# Enable reading compressed binary grids in ripley? (requires boost iostreams)
-# DEFAULT: True
-#compressed_files = False
-
-# Compression libraries to link with
-# DEFAULT: 'boost_iostreams'
-#compression_libs = 'boost_iostreams-mt'
-
-# Whether to use the PAPI (Performance API) library
-# DEFAULT: False
-#papi = True
-
-# Prefix or paths to PAPI headers and libraries. See note above.
-#papi_prefix = '/usr/local'
-
-# PAPI library/libraries to link against
-#papi_libs = ['papi']
-
-# Whether to use PAPI to instrument solver iterations
-# DEFAULT: False
-#papi_instrument_solver = True
-
-
 from site_init import getdebbuildflags
 # Now we add the debian build flags
 debstuff=getdebbuildflags()
diff --git a/scons/templates/wheezy_py3_mpi_options.py b/scons/templates/wheezy_py3_mpi_options.py
index f7007e5..369d691 100644
--- a/scons/templates/wheezy_py3_mpi_options.py
+++ b/scons/templates/wheezy_py3_mpi_options.py
@@ -14,13 +14,10 @@
 #
 ##############################################################################
 
-from .wheezy_options import *
+# This is a template configuration file for escript on Debian GNU/Linux.
+# Refer to README_FIRST for usage instructions.
 
-# boost-python library/libraries to link against
-boost_libs = ['boost_python-py32']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.2mu'
-pythonincpath='/usr/include/python3.2'
+from .wheezy_py3_options import *
 
 mpi='OPENMPI'
+
diff --git a/scons/templates/wheezy_py3_options.py b/scons/templates/wheezy_py3_options.py
index 51c7dfa..c1cd22a 100644
--- a/scons/templates/wheezy_py3_options.py
+++ b/scons/templates/wheezy_py3_options.py
@@ -14,12 +14,13 @@
 #
 ##############################################################################
 
+# This is a template configuration file for escript on Debian GNU/Linux.
+# Refer to README_FIRST for usage instructions.
+
 from .wheezy_options import *
 
-# boost-python library/libraries to link against
 boost_libs = ['boost_python-py32']
-usepython3=True
-pythoncmd='python3'
-pythonlibname='python3.2mu'
-pythonincpath='/usr/include/python3.2'
+pythoncmd = 'python3'
+#pythonlibname = 'python3.2mu'
+#pythonincpath = '/usr/include/python3.2'
 
diff --git a/scons/templates/windows_options.py b/scons/templates/windows_options.py
index f182085..fe9f8c6 100644
--- a/scons/templates/windows_options.py
+++ b/scons/templates/windows_options.py
@@ -255,7 +255,7 @@ escript_opts_version = 202
 
 #iknowwhatimdoing = False
 
-#forcelazy = 'leave_alone'
+#forcelazy = 'auto'
 
-#forcecollres = 'leave_alone'
+#forcecollres = 'auto'
 
diff --git a/scripts/extracttests.sh b/scripts/extracttests.sh
index eb92ad7..cc724a8 100755
--- a/scripts/extracttests.sh
+++ b/scripts/extracttests.sh
@@ -3,21 +3,31 @@
 
 # To be run from an esys13 root directory which has had a full build done in it.
 #This will make a directory of files which can be shipped elsewhere to test an install
+# The script now accounts for a build directory being somewhere other than ./build
 
 
-if [ $# -lt 1 ]
+if [ $# -lt 2 ]
 then
-   echo "Usage: $0 targetdirectory"
+   echo "Usage: $0 build_directory targetdirectory"
    exit 1
 fi
 
-if [ -f $1 ]
+if [ -f $2 ]
 then
    echo "Target exists and is not a directory"
    exit 2
 fi
 
-if [ "$1" == ".." ]
+if [ ! -d $1 ]
+then
+    echo "Build dir either does not exist or is not a directory"
+    exit 2
+fi
+
+bdir=$1
+targetdir=$2
+
+if [ "$dest" == ".." ]
 then
    # coz if you call this from inside a directory called src, you
    # wipe out your working copy
@@ -31,28 +41,26 @@ then
    exit 3
 fi
 
-if [ ! -d $1 ]
+if [ ! -d $targetdir/build ]
 then
-   mkdir $1
+   mkdir -p $targetdir/build
 fi
 
-targetdir=$1
+cp itest.sh $targetdir
+find . -maxdepth 1 -type d -not -name '*debian' -not -name '.' -not -name build -not -name esys -not -name bin -not -name lib -not -name '.?*' -print0 | xargs -0 -I'{}' cp -r '{}' $targetdir
 
-cp -r * $targetdir
+cp -r $bdir/* $targetdir/build
 cd $targetdir || exit 4
+    
+find build -name '*.o' -print0 | xargs -0 rm -f
+find build -name '*.os' -print0 | xargs -0 rm -f
+find build -name '*.so' -print0 | xargs -0 rm -f 
+find build -name '*.a' -print0 | xargs -0 rm -f
+find build -name '*.pyc' -print0 | xargs -0 rm -f
+find build -name '*.passed' -print0 | xargs -0 rm -f
+find build -name '*.skipped' -print0 | xargs -0 rm -f
 
-rm -rf esys
-rm -rf bin
-rm -rf lib
-rm -rf utest.sh
-rm -rf 
-find build -name '*.o' | xargs rm
-find build -name '*.os' | xargs rm
-find build -name '*.so' | xargs rm
-find build -name '*.a' | xargs rm
-find build -name '*.pyc' | xargs rm
-find . -name 'src' | xargs rm -r
-rm -r scons
-rm -r doc/user doc/cookbook 
-find doc -name '*.tex' | xargs rm
-rm -rf localdebian
+find . -name 'src' -print0 | xargs -0 rm -rf
+rm -rf scons
+rm -rf doc/user doc/cookbook 
+find doc -name '*.tex' -print0 | xargs -0 rm -f
diff --git a/scripts/makesrc.sh b/scripts/makesrc.sh
index 53b7057..ee7e4dd 100755
--- a/scripts/makesrc.sh
+++ b/scripts/makesrc.sh
@@ -2,7 +2,7 @@
 #Make the source tarball for debian release
 #Run this from a clean checkout
 
-SRCVERSION=`head -1 debian/changelog | tr -d '()' | tr -s '-' ' '| cut -d\  -f3`
+SRCVERSION=`head -1 localdebian/changelog | tr -d '()' | tr -s '-' ' '| cut -d\  -f3`
 
 svnversion | grep -q :
 if [ $? == 0 ]
@@ -11,6 +11,7 @@ then
     echo "Exiting"
     exit 1
 fi
+svnversion > svn_version
 
 ls scons/*options.py > toexclude
 
diff --git a/site_scons/dependencies.py b/site_scons/dependencies.py
index 4b87a6b..b2f54f3 100644
--- a/site_scons/dependencies.py
+++ b/site_scons/dependencies.py
@@ -31,8 +31,21 @@ from site_init import findLibWithHeader, detectModule
 
 REQUIRED_BOOST = (1, 46)
 
+def CheckComplexAcos(context):
+    context.Message('Checking for working complex std::acos()... ')
+    result = context.TryRun("""
+#include <complex>
+int main() { std::complex<double> x(0,3.14159265359), y(1.5707963,-1.8622957);
+return std::abs(std::acos(x)-y) < 1e-6 ? 0:-1;}
+""", '.cpp')
+    # scons < 2.4 fix:
+    if type(result)==tuple:
+        result = result[0]
+    context.Result(result)
+    return result
+
 def checkCompiler(env):
-    conf = Configure(env.Clone())
+    conf = Configure(env.Clone(), custom_tests = {'CheckComplexAcos': CheckComplexAcos})
     if 'CheckCXX' in dir(conf): # exists since scons 1.1.0
         if not conf.CheckCXX():
             print("Cannot run C++ compiler '%s' (check config.log)" % (env['CXX']))
@@ -57,6 +70,9 @@ def checkCompiler(env):
     if conf.CheckCXXHeader('libkern/OSByteOrder.h'):
         conf.env.Append(CPPDEFINES = ['HAVE_OSBYTEORDER_H'])
 
+    if not conf.CheckComplexAcos():
+        conf.env.Append(CPPDEFINES = ['ESYS_USE_BOOST_ACOS'])
+    
     return conf.Finish()
 
 def checkPython(env):
@@ -65,7 +81,7 @@ def checkPython(env):
     # but we need to deal with the case where python is not in its INSTALL
     # directory.
     # Use the python scons is running
-    if env['pythoncmd']=='python':
+    if env['pythoncmd'] == sys.executable:
         python_inc_path=sysconfig.get_python_inc()
         if env['IS_WINDOWS']:
             python_lib_path=os.path.join(sysconfig.get_config_var('prefix'), 'libs')
@@ -74,67 +90,70 @@ def checkPython(env):
         else:
             python_lib_path=sysconfig.get_config_var('LIBDIR')
 
-        #python_libs=[sysconfig.get_config_var('LDLIBRARY')] # only on linux
         if env['IS_WINDOWS']:
             python_libs=['python%s%s'%(sys.version_info[0], sys.version_info[1])]
         else:
-            python_libs=['python'+sysconfig.get_python_version()]
+            python_libs = sysconfig.get_config_var('LDLIBRARY')
+            if python_libs[:3] == 'lib':
+                python_libs = [python_libs[3:-3]]
+            else:
+                python_libs = ['python'+sysconfig.get_python_version()]
 
-        env['buildvars']['python']=sys.executable
-        env['buildvars']['python_version']=str(sys.version_info[0])+"."+str(sys.version_info[1])+"."+str(sys.version_info[2])
+        verstring=".".join([str(i) for i in sys.version_info[:3]])
 
-    #if we want to use a python other than the one scons is running
+    # if we want to use a python other than the one scons is running
+    # Note: we assume scons is running python 2 in the following.
     else:
+        p = Popen([env['pythoncmd'], '-c', 'from __future__ import print_function;import sys;print(str(sys.version_info[0])+"."+str(sys.version_info[1])+"."+str(sys.version_info[2]))'], stdout=PIPE)
+        verstring, _ = p.communicate()
+        verstring = verstring.strip()
+        ispython3 = (verstring[0] == '3')
         initstring='from __future__ import print_function;from distutils import sysconfig;'
-        if env['pythonlibname']!='':
-            python_libs=env['pythonlibname']
-        else:   # work it out by calling python
-            if ['IS_WINDOWS']:
-                cmd='print("python%s%s"%(sys.version_info[0], sys.version_info[1]))'
+        if env['pythonlibname'] != '':
+            python_libs = env['pythonlibname']
+        else: # work it out by calling python
+            cmd = 'print(sysconfig.get_config_var("LDLIBRARY"))'
+            p = Popen([env['pythoncmd'], '-c', initstring+cmd], stdout=PIPE)
+            python_libs, _ = p.communicate()
+            if python_libs[:3] == 'lib':
+                python_libs = [python_libs.strip()[3:-3]]
             else:
-                cmd='print("python"+sysconfig.get_python_version())'
-            p=Popen([env['pythoncmd'], '-c', initstring+cmd], stdout=PIPE)
-            python_libs=p.stdout.readline()
-            if env['usepython3']:       # This is to convert unicode str into py2 string
-                python_libs=python_libs.encode() # If scons runs on py3 then this must be rethought
-            p.wait()
-            python_libs=python_libs.strip()
-
-        # Now we know whether we are using python3 or not
-        p=Popen([env['pythoncmd'], '-c',  initstring+'print(sysconfig.get_python_inc())'], stdout=PIPE)
-        python_inc_path=p.stdout.readline()
-        if env['usepython3']:
-             python_inc_path=python_inc_path.encode()
-        p.wait()
-        python_inc_path=python_inc_path.strip()
-        if env['IS_WINDOWS']:
-            cmd="import os;os.path.join(sysconfig.get_config_var('prefix'), 'libs')"
-        elif env['PLATFORM']=='darwin':
+                cmd = 'print("python"+sysconfig.get_python_version())'
+                p = Popen([env['pythoncmd'], '-c', initstring+cmd], stdout=PIPE)
+                python_libs, _ = p.communicate()
+                python_libs = [python_libs.strip()]
+
+        p = Popen([env['pythoncmd'], '-c',  initstring+'print(sysconfig.get_python_inc())'], stdout=PIPE)
+        python_inc_path, _ = p.communicate()
+        if ispython3:
+            python_inc_path = python_inc_path.encode()
+        python_inc_path = python_inc_path.strip()
+        if env['PLATFORM'] == 'darwin':
             cmd="sysconfig.get_config_var(\"LIBPL\")"
         else:
             cmd="sysconfig.get_config_var(\"LIBDIR\")"
 
-        p=Popen([env['pythoncmd'], '-c', initstring+'print('+cmd+')'], stdout=PIPE)
-        python_lib_path=p.stdout.readline()
-        if env['usepython3']:
-            python_lib_path=python_lib_path.decode()
-        p.wait()
-        python_lib_path=python_lib_path.strip()
+        p = Popen([env['pythoncmd'], '-c', initstring+'print('+cmd+')'], stdout=PIPE)
+        python_lib_path, _ = p.communicate()
+        if ispython3:
+            python_lib_path = python_lib_path.decode()
+        python_lib_path = python_lib_path.strip()
 
-        env['buildvars']['python']=env['pythoncmd']
-        p=Popen([env['pythoncmd'], '-c', 'from __future__ import print_function;import sys;print(str(sys.version_info[0])+"."+str(sys.version_info[1])+"."+str(sys.version_info[2]))'], stdout=PIPE)
-        verstring=p.stdout.readline().strip()
-        p.wait()
-        env['buildvars']['python_version']=verstring
 
+    env['python_version'] = verstring
+    ispython3 = (verstring[0] == '3')
+    if ispython3:
+        env.Append(CPPDEFINES=['ESPYTHON3'])
+    env['buildvars']['python_version'] = verstring
+    env['buildvars']['python'] = env['pythoncmd']
     # Check for an override from the config file.
     # Ideally, this should be automatic but we need to deal with the case
     # where python is not in its INSTALL directory
-    if env['pythonlibpath']!='':
-        python_lib_path=env['pythonlibpath']
+    if env['pythonlibpath'] != '':
+        python_lib_path = env['pythonlibpath']
 
-    if env['pythonincpath']!='':
-        python_inc_path=env['pythonincpath']
+    if env['pythonincpath'] != '':
+        python_inc_path = env['pythonincpath']
 
     conf = Configure(env.Clone())
 
@@ -159,14 +178,13 @@ def checkPython(env):
 
 def checkCudaVersion(env):
     # NVCC availability is already checked in the Tool file
-    p=Popen([env['NVCC'], '-V'], stdout=PIPE)
-    out=p.stdout.readlines()
-    env['nvcc_version']='(unknown version)'
-    p.wait()
-    for line in out:
+    p = Popen([env['NVCC'], '-V'], stdout=PIPE)
+    out,_ = p.communicate()
+    env['nvcc_version'] = '(unknown version)'
+    for line in out.split('\n'):
         if 'release' in line:
-            version=line[line.find('release'):].strip()
-            env['nvcc_version']=version
+            version = line[line.find('release'):].strip()
+            env['nvcc_version'] = version
             break
     env['buildvars']['nvcc']=env['NVCC']
     return env
@@ -215,15 +233,11 @@ def checkNumpy(env):
 
     ## check for numpy header (optional)
     conf = Configure(env.Clone())
-    if env['usepython3']:
-        # FIXME: This is until we can work out how to make the checks in python 3
-        conf.env['numpy_h']=False
+    if conf.CheckCXXHeader(['Python.h','numpy/ndarrayobject.h']):
+        conf.env.Append(CPPDEFINES = ['ESYS_HAVE_NUMPY_H'])
+        conf.env['numpy_h']=True
     else:
-        if conf.CheckCXXHeader(['Python.h','numpy/ndarrayobject.h']):
-            conf.env.Append(CPPDEFINES = ['HAVE_NUMPY_H'])
-            conf.env['numpy_h']=True
-        else:
-            conf.env['numpy_h']=False
+        conf.env['numpy_h']=False
 
     return conf.Finish()
 
@@ -233,6 +247,7 @@ def checkCUDA(env):
         env.AppendUnique(CPPPATH = [cuda_inc_path])
         env.AppendUnique(LIBPATH = [cuda_lib_path])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], cuda_lib_path)
+        env.Append(CPPDEFINES = ['ESYS_HAVE_CUDA'])
         env['cuda']=True
     except:
         env['cuda']=False
@@ -278,6 +293,44 @@ def checkOptionalModules(env):
 
     return env
 
+def checkForTrilinos(env):
+    trilinos_inc_path=''
+    trilinos_lib_path=''
+    if env['trilinos']:
+        havelibs = (len(env['trilinos_libs']) > 0)
+        trilinos_inc_path,trilinos_lib_path=findLibWithHeader(env,
+                env['trilinos_libs'], 'Tpetra_CrsMatrix.hpp',
+                env['trilinos_prefix'], lang='c++', try_link=havelibs)
+        if not havelibs:
+            packages=['Tpetra','Kokkos','Belos','Amesos2','Ifpack2','MueLu']
+            libs = []
+            for pk in packages:
+                # find out what libraries to link with...
+                makefile = os.path.join(trilinos_inc_path, 'Makefile.export.%s'%pk)
+                try:
+                    for l in open(makefile, 'r').readlines():
+                        if l.startswith("%s_LIBRARIES"%pk): # or l.startswith("Trilinos_TPL_LIBRARIES"):
+                            lst = l.split('=')[1].strip().split()
+                            lst = [e.replace('-l','',1) for e in lst]
+                            libs += lst
+                        elif l.startswith("%s_TPL_INCLUDE_DIRS"%pk):
+                            lst = l.split('=')[1].strip().split()
+                            lst = [e.replace('-I','',1) for e in lst]
+                            env.AppendUnique(CPPPATH = lst)
+
+                except Exception as e:
+                    raise RuntimeError('Error reading Trilinos export Makefile\n%s'%(e))
+            env['trilinos_libs'] = libs
+
+        env.AppendUnique(CPPPATH = [trilinos_inc_path])
+        env.AppendUnique(LIBPATH = [trilinos_lib_path])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_TRILINOS'])
+        env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], trilinos_lib_path)
+        env['buildvars']['trilinos_inc_path']=trilinos_inc_path
+        env['buildvars']['trilinos_lib_path']=trilinos_lib_path
+    env['buildvars']['trilinos']=int(env['trilinos'])
+    return env
+
 def checkOptionalLibraries(env):
     ######## netCDF
     netcdf_inc_path=''
@@ -286,9 +339,8 @@ def checkOptionalLibraries(env):
         netcdf_inc_path,netcdf_lib_path=findLibWithHeader(env, env['netcdf_libs'], 'netcdf.h', env['netcdf_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [netcdf_inc_path])
         env.AppendUnique(LIBPATH = [netcdf_lib_path])
-        env.AppendUnique(LIBS = env['netcdf_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], netcdf_lib_path)
-        env.Append(CPPDEFINES = ['USE_NETCDF'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_NETCDF'])
         env['buildvars']['netcdf_inc_path']=netcdf_inc_path
         env['buildvars']['netcdf_lib_path']=netcdf_lib_path
     env['buildvars']['netcdf']=int(env['netcdf'])
@@ -300,9 +352,8 @@ def checkOptionalLibraries(env):
         papi_inc_path,papi_lib_path=findLibWithHeader(env, env['papi_libs'], 'papi.h', env['papi_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [papi_inc_path])
         env.AppendUnique(LIBPATH = [papi_lib_path])
-        env.AppendUnique(LIBS = env['papi_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], papi_lib_path)
-        env.Append(CPPDEFINES = ['PAPI'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_PAPI'])
         env['buildvars']['papi_inc_path']=papi_inc_path
         env['buildvars']['papi_lib_path']=papi_lib_path
     env['buildvars']['papi']=int(env['papi'])
@@ -314,9 +365,8 @@ def checkOptionalLibraries(env):
         mkl_inc_path,mkl_lib_path=findLibWithHeader(env, env['mkl_libs'], 'mkl_pardiso.h', env['mkl_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [mkl_inc_path])
         env.AppendUnique(LIBPATH = [mkl_lib_path])
-        env.AppendUnique(LIBS = env['mkl_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], mkl_lib_path)
-        env.Append(CPPDEFINES = ['MKL'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_MKL'])
         env['buildvars']['mkl_inc_path']=mkl_inc_path
         env['buildvars']['mkl_lib_path']=mkl_lib_path
     env['buildvars']['mkl']=int(env['mkl'])
@@ -328,37 +378,63 @@ def checkOptionalLibraries(env):
         umfpack_inc_path,umfpack_lib_path=findLibWithHeader(env, env['umfpack_libs'], 'umfpack.h', env['umfpack_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [umfpack_inc_path])
         env.AppendUnique(LIBPATH = [umfpack_lib_path])
-        env.AppendUnique(LIBS = env['umfpack_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], umfpack_lib_path)
-        env.Append(CPPDEFINES = ['USE_UMFPACK'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_UMFPACK'])
         env['buildvars']['umfpack_inc_path']=umfpack_inc_path
         env['buildvars']['umfpack_lib_path']=umfpack_lib_path
     env['buildvars']['umfpack']=int(env['umfpack'])
 
     ######## LAPACK
-    if env['lapack']=='mkl' and not env['mkl']:
-        print("mkl_lapack requires MKL!")
-        env.Exit(1)
-
-    env['uselapack'] = env['lapack']!='none'
     lapack_inc_path=''
     lapack_lib_path=''
-    if env['uselapack']:
+    flavour = 'none'
+    env['uselapack'] = False
+    if env['lapack'] != 0:
+        # not explicitly disabled so run the checks
         if env['longindices']:
-            print("Sorry, cannot use LAPACK with 64-bit index types. Set longindices to False or disable LAPACK.")
-            env.Exit(1)
-        header='clapack.h'
-        if env['lapack']=='mkl':
-            env.AppendUnique(CPPDEFINES = ['MKL_LAPACK'])
-            header='mkl_lapack.h'
-        lapack_inc_path,lapack_lib_path=findLibWithHeader(env, env['lapack_libs'], header, env['lapack_prefix'], lang='c++')
-        env.AppendUnique(CPPPATH = [lapack_inc_path])
-        env.AppendUnique(LIBPATH = [lapack_lib_path])
-        env.AppendUnique(LIBS = env['lapack_libs'])
-        env.Append(CPPDEFINES = ['USE_LAPACK'])
-        env['buildvars']['lapack_inc_path']=lapack_inc_path
-        env['buildvars']['lapack_lib_path']=lapack_lib_path
-    env['buildvars']['lapack']=env['lapack']
+            # you want longindices + lapack? sorry.
+            if env['lapack'] == 1:
+                print("LAPACK requires index type = int. Set longindices to False or disable LAPACK.")
+                env.Exit(1)
+        else:
+            if env['mkl']:
+                # we detected MKL so try the MKL header+libs
+                flavour = 'mkl'
+                header = 'mkl_lapack.h'
+                prefix = env['mkl_prefix']
+                if len(env['lapack_libs']) == 0:
+                    libs = env['mkl_libs']
+                else:
+                    libs = env['lapack_libs']
+            else:
+                # try for clapack
+                flavour = 'clapack'
+                header = 'clapack.h'
+                prefix = env['lapack_prefix']
+                if len(env['lapack_libs']) == 0:
+                    libs = ['lapack_atlas']
+                else:
+                    libs = env['lapack_libs']
+
+            try:
+                lapack_inc_path,lapack_lib_path=findLibWithHeader(env, libs, header, prefix, lang='c++')
+                env['lapack_libs'] = libs
+                env['uselapack'] = True
+                env.AppendUnique(CPPPATH = [lapack_inc_path])
+                env.AppendUnique(LIBPATH = [lapack_lib_path])
+                env.Append(CPPDEFINES = ['ESYS_HAVE_LAPACK'])
+                if flavour == 'mkl':
+                    env.AppendUnique(CPPDEFINES = ['ESYS_MKL_LAPACK'])
+                env['buildvars']['lapack_inc_path']=lapack_inc_path
+                env['buildvars']['lapack_lib_path']=lapack_lib_path
+            except:
+                if env['lapack'] == 1:
+                    raise
+                # lapack was set to auto-detect so not a fatal error
+                flavour = 'none'
+
+    env['lapack'] = flavour
+    env['buildvars']['lapack'] = flavour
 
     ######## Silo
     silo_inc_path=''
@@ -367,9 +443,7 @@ def checkOptionalLibraries(env):
         silo_inc_path,silo_lib_path=findLibWithHeader(env, env['silo_libs'], 'silo.h', env['silo_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [silo_inc_path])
         env.AppendUnique(LIBPATH = [silo_lib_path])
-        # Note that we do not add the libs since they are only needed for the
-        # weipa library and tools.
-        #env.AppendUnique(LIBS = [env['silo_libs']])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_SILO'])
         env['buildvars']['silo_inc_path']=silo_inc_path
         env['buildvars']['silo_lib_path']=silo_lib_path
     env['buildvars']['silo']=int(env['silo'])
@@ -404,6 +478,21 @@ def checkOptionalLibraries(env):
         # to do that here
         if env['netcdf'] and env['mpi'] in ['MPT','OPENMPI']:
             env.Append(CPPDEFINES = ['MPI_INCLUDED'])
+
+        if env['mpi'] == 'OPENMPI':
+            # try to get version for correct launcher arguments
+            try:
+                p = Popen(['orterun', '-V'], stdout=PIPE, stderr=PIPE)
+                o,e = p.communicate()
+                try:
+                    ver = e.split('\n')[0].split()[-1]
+                except IndexError:
+                    ver = o.split('\n')[0].split()[-1]
+                if len(ver) > 0:
+                    env['orte_version'] = ver
+            except OSError:
+                pass
+
         env['buildvars']['mpi_inc_path']=mpi_inc_path
         env['buildvars']['mpi_lib_path']=mpi_lib_path
     env['buildvars']['mpi']=env['mpi']
@@ -416,9 +505,8 @@ def checkOptionalLibraries(env):
         boomeramg_inc_path,boomeramg_lib_path=findLibWithHeader(env, env['boomeramg_libs'], 'HYPRE.h', env['boomeramg_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [boomeramg_inc_path])
         env.AppendUnique(LIBPATH = [boomeramg_lib_path])
-        env.AppendUnique(LIBS = env['boomeramg_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], boomeramg_lib_path)
-        env.Append(CPPDEFINES = ['BOOMERAMG'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_BOOMERAMG'])
         env['buildvars']['boomeramg_inc_path']=boomeramg_inc_path
         env['buildvars']['boomeramg_lib_path']=boomeramg_lib_path
     env['buildvars']['boomeramg']=int(env['boomeramg'])
@@ -431,7 +519,6 @@ def checkOptionalLibraries(env):
         parmetis_inc_path,parmetis_lib_path=findLibWithHeader(env, env['parmetis_libs'], 'parmetis.h', env['parmetis_prefix'], lang='c++')
         env.AppendUnique(CPPPATH = [parmetis_inc_path])
         env.AppendUnique(LIBPATH = [parmetis_lib_path])
-        env.AppendUnique(LIBS = env['parmetis_libs'])
         env.PrependENVPath(env['LD_LIBRARY_PATH_KEY'], parmetis_lib_path)
 
         # Try to extract the parmetis version from parmetis.h
@@ -467,7 +554,7 @@ def checkOptionalLibraries(env):
         else:
             env['parmetis_version'] = "unknown"
 
-        env.Append(CPPDEFINES = ['USE_PARMETIS'])
+        env.Append(CPPDEFINES = ['ESYS_HAVE_PARMETIS'])
         env['buildvars']['parmetis_inc_path']=parmetis_inc_path
         env['buildvars']['parmetis_lib_path']=parmetis_lib_path
     env['buildvars']['parmetis']=int(env['parmetis'])
@@ -482,7 +569,6 @@ def checkOptionalLibraries(env):
                 env['gmsh']='m'
             else:
                 env['gmsh']='s'
-            p.wait()
         except OSError:
             pass
     else:
@@ -495,24 +581,26 @@ def checkOptionalLibraries(env):
             try:
                 p=Popen(cmd, stdout=PIPE)
                 gmshlibs,_ = p.communicate()
-                ret = p.wait()
-                if ret == 0 and 'libmpi' in gmshlibs:
+                env.Append(CPPDEFINES=['ESYS_HAVE_GMSH'])
+                if p.returncode == 0 and 'libmpi' in gmshlibs:
                     env['gmsh'] = 'm'
+                    env.Append(CPPDEFINES=['ESYS_GMSH_MPI'])
                 else:
                     env['gmsh'] = 's'
             except OSError:
                 pass
     
-######## boost::iostreams
+    ######## boost::iostreams
     if env['compressed_files']:
         try:
             boost_inc_path, boost_lib_path = findLibWithHeader(env, env['compression_libs'], 'boost/iostreams/filter/gzip.hpp', env['boost_prefix'], lang='c++')
-            env.Append(CPPDEFINES = ['USE_BOOSTIO'])
-            env.AppendUnique(LIBS = env['compression_libs'])
+            env.Append(CPPDEFINES = ['ESYS_HAVE_BOOST_IO'])
         except RuntimeError as e:
             env['compressed_files'] = False
     env['buildvars']['compressed_files']=int(env['compressed_files'])
 
+    ######## Trilinos
+    env = checkForTrilinos(env)
     return env
 
 def checkPDFLatex(env):
diff --git a/site_scons/extractdebbuild.py b/site_scons/extractdebbuild.py
index 1d18b16..2c99176 100644
--- a/site_scons/extractdebbuild.py
+++ b/site_scons/extractdebbuild.py
@@ -1,7 +1,7 @@
 
 ##############################################################################
 #
-# Copyright (c) 2015-2016 by The University of Queensland
+# Copyright (c)2015-2016 by The University of Queensland
 # http://www.uq.edu.au
 #
 # Primary Business: Queensland, Australia
@@ -16,7 +16,7 @@
 
 from __future__ import print_function, division
 
-__copyright__="""Copyright (c) 2015-2016 by The University of Queensland
+__copyright__="""Copyright (c)2015-2016 by The University of Queensland
 http://www.uq.edu.au
 Primary Business: Queensland, Australia"""
 __license__="""Licensed under the Apache License, version 2.0
diff --git a/site_scons/grouptest.py b/site_scons/grouptest.py
index 062361c..44b2a59 100644
--- a/site_scons/grouptest.py
+++ b/site_scons/grouptest.py
@@ -25,74 +25,85 @@ __url__="https://launchpad.net/escript-finley"
 
 
 class GroupTest(object):
-    def __init__(self, exec_cmd, evars, python_dir, working_dir, test_list, single_processor_only=False):
+    _allfuncs = []
+
+    def __init__(self, name, exec_cmd, evars, python_dir, working_dir, test_list, single_process_tests=[]):
+        self.name=name
         self.python_dir=python_dir
         self.working_dir=working_dir
         self.test_list=test_list
         self.exec_cmd=exec_cmd
         self.evars=evars
         self.mkdirs=[]
-        self.single_processor_only=single_processor_only
+        self.single_process_tests=single_process_tests
+        self._allfuncs.append(name)
         
     def makeDir(self,dirname):
-            self.mkdirs.append(dirname)
+        self.mkdirs.append(dirname)
 
     #stdloc means that the files are in standard locations so don't use prefix
     def makeHeader(build_platform, prefix, stdloc):
-        res="#!/bin/bash\n"
-        res=res+"\n#############################################\n"
-        res=res+"# This file is autogenerated by scons.\n"
-        res=res+"# It will be regenerated each time scons is run\n"
-        res=res+"#############################################\n\n"
-        res=res+"function failed()\n{\n  echo ""Execution failed for $@""\n  exit 1\n}\n"
-        res=res+"if [ $# -ne 2 ]\nthen\n echo Usage: $0 build_dir wrapper_options\necho Runs all unit tests. Options must be a single string.\nexit 2\nfi\n"
-        res=res+'CMDSTR="getopt p:n: -- $2" #Not using -uq -o because that is GNU only\nSTR=`$CMDSTR`\nNUMPROCS=1\n'
+        res="""#!/bin/sh
+#############################################
+# This file is autogenerated by scons.
+# It will be regenerated each time scons is run
+#############################################
+
+failed () {
+    echo "Execution failed for $@"
+    exit 1
+}
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $0 build_dir wrapper_options [groupname]"
+    echo Runs all or a group of unit tests. Options must be a single string.
+    exit 2
+fi
+
+case "$1" in
+   /*) ;;
+   *) echo "build_dir needs to be an absolute path"; exit 4;;
+esac
+
+NUMPROCS=1
+NUMNODES=1
+while getopts ':n:p:' option $2
+do
+    case "$option" in
+        "n")  NUMNODES=$OPTARG ;;
+        "p")  NUMPROCS=$OPTARG ;;
+    esac
+done
+MPIPROD=$(($NUMPROCS * $NUMNODES))
+"""
+        res+="BUILD_DIR=$1"+"/"+build_platform
+        res+="\nif [ ! -d $BUILD_DIR ]\nthen\n    echo Can not find build directory $BUILD_DIR\n     exit 2\nfi\n" 
         if stdloc:
-            res=res+'MPITYPE=`run-escript -c | grep mpi=`\n'
+            res+="""MPITYPE=`run-escript -c | grep mpi=`
+export OLD_PYTHON=$PYTHONPATH
+BATCH_ROOT=`pwd`
+BINRUNNER="run-escript -b $2"
+PYTHONRUNNER="run-escript $2"
+PYTHONTESTRUNNER="run-escript $2 $BATCH_ROOT/tools/testrunner.py"
+"""
         else:
-            res=res+'MPITYPE=`%s/bin/run-escript -c | grep mpi=`\n'%prefix
-        res=res+'NUMNODES=1\n#This little complication is required because set --\n'
-        res=res+'#does not seem to like -n as the first positional parameter\n'
-        res=res+'STATE=0\nfor name in $STR\ndo \n'
-        res=res+'case $STATE in\n'
-        res=res+'     0) case $name in\n'
-        res=res+'          -n) STATE=1;;\n'
-        res=res+'          -p) STATE=2;;\n'
-        res=res+'          --) break 2;;\n'
-        res=res+'        esac;;\n'
-        res=res+'     1) if [ $name == "--" ];then break; fi; NUMNODES=$name; STATE=0;;\n'
-        res=res+'     2) if [ $name == "--" ];then break; fi; NUMPROCS=$name; STATE=0;;\n'
-        res=res+'   esac\n'
-        res=res+'done\n'
-        res=res+'let MPIPROD="$NUMPROCS * $NUMNODES"\n'
-        if not stdloc:
-            res=res+"\nexport LD_LIBRARY_PATH=%s/lib:$LD_LIBRARY_PATH\n"%prefix
+            res+="""MPITYPE=`{0}/bin/run-escript -c | grep mpi=`
+BATCH_ROOT=`pwd`            
+export LD_LIBRARY_PATH={0}/lib:$LD_LIBRARY_PATH
+export OLD_PYTHON={0}:$PYTHONPATH
+BINRUNNER="{0}/bin/run-escript -b $2"
+PYTHONRUNNER="{0}/bin/run-escript $2"
+PYTHONTESTRUNNER="{0}/bin/run-escript $2 {0}/tools/testrunner.py"
+""".format(prefix)
         if build_platform=='darwin':
-                res=res+"export DYLD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DYLD_LIBRARY_PATH\n"
-        if stdloc:
-            res=res+"\nexport OLD_PYTHON=$PYTHONPATH\nBINRUNNER=\"run-escript -b $2\"\nPYTHONRUNNER=\"run-escript $2\"\nBATCH_ROOT=`pwd`\n"
-            res=res+"PYTHONTESTRUNNER=\"run-escript $2 $BATCH_ROOT/tools/testrunner.py\"\n"
-        else:
-            res=res+"""\nexport OLD_PYTHON={0}:$PYTHONPATH
-BINRUNNER=\"{0}/bin/run-escript -b $2\"
-PYTHONRUNNER=\"{0}/bin/run-escript $2\"
-PYTHONTESTRUNNER=\"{0}/bin/run-escript $2 {0}/tools/testrunner.py\"
-BATCH_ROOT=`pwd`\n""".format(prefix)
-        res=res+"BUILD_DIR=$1"+"/"+build_platform
-        res=res+"\nif [ ! -d $BUILD_DIR ]\nthen\n echo Can not find build directory $BUILD_DIR\n exit 2\nfi\n" 
-        #res=res+"if [ $# -lt 2 ]\nthen\n echo Usage: $0 bin_run_cmd python_run_cmd\n exit 2\nfi\n"
+            res+="export DYLD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DYLD_LIBRARY_PATH\n"
         return res
     makeHeader=staticmethod(makeHeader)
 
     def makeString(self):
-        res=""
+        res="%s () {\n"%self.name
+        tt="\t"
         build_dir = self.working_dir.replace("$BATCH_ROOT", "$BUILD_DIR")
-        if self.single_processor_only:
-            res+="#if [ $MPIPROD -le 1 ]; then\n"
-            res+='if [ "$MPITYPE" == "mpi=none" ]; then\n'
-            tt="\t"
-        else:
-            tt=""
         for d in self.mkdirs:
             res=res+tt+"if [ ! -d "+str(d)+" ]\n"+tt+"then\n"+tt+"\tmkdir -p "+str(d)+"\n"+tt+"fi\n"
         for v in self.evars:
@@ -102,8 +113,27 @@ BATCH_ROOT=`pwd`\n""".format(prefix)
             res=res+tt+"export PYTHONPATH="+self.python_dir+":$OLD_PYTHON"+"\n"+tt+"cd "+self.working_dir+"\n"
         else:
             res=res+tt+"export PYTHONPATH=$OLD_PYTHON"+"\n"+tt+"cd "+self.working_dir+"\n"
+        if len(self.single_process_tests) > 0:
+            res+=tt+"if [ $MPIPROD -le 1 ]; then\n"
+            #res+=tt+'if [ "$MPITYPE" == "mpi=none" ]; then\n'
+            tt+="\t"
+            for t in self.single_process_tests:
+                res=res+tt+"echo Starting "+t+"\n"+tt+"date\n"
+                skipoutputfile = ""
+                failoutputfile = ""
+                cmd = self.exec_cmd
+                exit_on_failure = " || failed %s"%t
+                if "examples" not in build_dir and "PYTHONRUNNER" in self.exec_cmd \
+                        and "/tools/" not in build_dir:
+                    skipoutputfile = " -skipfile={0}/{1}".format(build_dir, t.replace(".py", ".skipped"))
+                    failoutputfile = " -failfile={0}/{1}".format(build_dir, t.replace(".py", ".failed"))
+                    cmd = cmd.replace("PYTHONRUNNER", "PYTHONTESTRUNNER")
+                res += "".join([tt, cmd, t, failoutputfile, skipoutputfile, exit_on_failure, "\n"])
+                res += tt+"echo Completed "+t+"\n"
+            tt="\t"
+            res+=tt+"fi\n"
         for t in self.test_list:
-            res=res+tt+"echo Starting "+t+"\ndate\n"
+            res=res+tt+"echo Starting "+t+"\n"+tt+"date\n"
             skipoutputfile = ""
             failoutputfile = ""
             cmd = self.exec_cmd
@@ -113,13 +143,14 @@ BATCH_ROOT=`pwd`\n""".format(prefix)
                 skipoutputfile = " -skipfile={0}/{1}".format(build_dir, t.replace(".py", ".skipped"))
                 failoutputfile = " -failfile={0}/{1}".format(build_dir, t.replace(".py", ".failed"))
                 cmd = cmd.replace("PYTHONRUNNER", "PYTHONTESTRUNNER")
-                exit_on_failure = ""
             res += "".join([tt, cmd, t, failoutputfile, skipoutputfile, exit_on_failure, "\n"])
             res += tt+"echo Completed "+t+"\n"
-        if self.single_processor_only:
-            res+="fi\n"
-        res=res+"\n"
+        res=res+"}\n"
         return res
     
     def makeFooter(self):
-        return "find $BUILD_DIR -name '*.failed' | xargs cat; find $BUILD_DIR -name '*.failed' | xargs cat | diff -q - /dev/null >/dev/null\n"
+        res="if [ $# -gt 2 ]; then\n\teval $3\nelse\n\t"
+        res+="\n\t".join(self._allfuncs)
+        res+="\nfi\nfind $BUILD_DIR -name '*.failed' | xargs cat; find $BUILD_DIR -name '*.failed' | xargs cat | diff -q - /dev/null >/dev/null\n"
+        return res
+
diff --git a/site_scons/site_init.py b/site_scons/site_init.py
index c5c3b9c..136cff3 100644
--- a/site_scons/site_init.py
+++ b/site_scons/site_init.py
@@ -28,7 +28,7 @@ from SCons.Defaults import Chmod, Copy
 from grouptest import *
 from extractdebbuild import *
 
-def findLibWithHeader(env, libs, header, paths, lang='c++'):
+def findLibWithHeader(env, libs, header, paths, lang='c++', try_link=True):
     from SCons.Script.SConscript import Configure
     inc_path=''
     lib_path=''
@@ -61,20 +61,22 @@ def findLibWithHeader(env, libs, header, paths, lang='c++'):
         else:
             raise RuntimeError('%s is not a valid path.'%paths[1])
 
-    # now try the library
-    conf=Configure(env.Clone())
-    conf.env.AppendUnique(CPPPATH = [inc_path])
-    conf.env.AppendUnique(LIBPATH = [lib_path])
-    if type(libs)==str: libs=[libs]
-    if len(libs)==0: libs=['']
-    # we can't check for each library by itself since they may depend on each
-    # other, so we add all libraries to the link line and check only for one
-    conf.env.AppendUnique(LIBS = libs)
-    if not conf.CheckLibWithHeader(libs[0], header, lang):
-        conf.Finish()
-        raise RuntimeError('Unable to link against %s (paths: %s, %s)'%(libs,inc_path,lib_path))
+    if try_link:
+        # now try the library
+        conf=Configure(env.Clone())
+        conf.env.AppendUnique(CPPPATH = [inc_path])
+        conf.env.AppendUnique(LIBPATH = [lib_path])
+        if type(libs)==str: libs=[libs]
+        if len(libs)==0: libs=['']
+        # we can't check for each library by itself since they may depend on
+        # each other, so we add all libraries to the link line and check only
+        # for one
+        conf.env.AppendUnique(LIBS = libs)
+        if not conf.CheckLibWithHeader(libs[0], header, lang):
+            conf.Finish()
+            raise RuntimeError('Unable to link against %s (paths: %s, %s)'%(libs,inc_path,lib_path))
 
-    conf.Finish()
+        conf.Finish()
     return inc_path, lib_path
 
 def detectModule(env, module):
@@ -134,7 +136,7 @@ def generateTestScripts(env, TestGroups):
         for tests in TestGroups:
           if tests.exec_cmd=='$PYTHONRUNNER ':
             utest.write(tests.makeString())
-            utest.write(tests.makeFooter())
+        utest.write(tests.makeFooter())
         utest.close()
         env.Execute(Chmod('itest.sh', 0o755))
         print("Generated itest.sh.")        
@@ -248,4 +250,28 @@ def effectiveName(inname):
 def osxlib_dep_rewrite(libname, targetdir, env):
     if env.Execute("tools/libmover.sh %s %s"%(libname, targetdir)):
        return 1
-    return None
\ No newline at end of file
+    return None
+
+def TristateVariable(key, help, default):
+    """
+    Modelled after SCons internal BoolVariable but allows three states
+    (on=1, off=0, auto=-1)
+    """
+    on_strings = ('y', 'yes', 'true', 't', '1', 'on')
+    off_strings = ('n', 'no', 'false', 'f', '0', 'off', 'none')
+    auto_strings = ('a', 'auto', 'default', 'def', '-1', '')
+
+    def _validator(key, val, env):
+        if not env[key] in (1, 0, -1):
+            raise SCons.Errors.UserError(
+                    'Invalid value for tristate option %s: %s' % (key, env[key]))
+
+    def _converter(val):
+        lval = val.lower()
+        if lval in on_strings: return 1
+        if lval in off_strings: return 0
+        if lval in auto_strings: return -1
+        raise ValueError("Invalid value for tristate option: %s" % val)
+
+    return (key, '%s (yes|no|auto)' % help, default, _validator, _converter)
+
diff --git a/pasowrap/py_src/SConscript b/speckley/SConscript
similarity index 65%
copy from pasowrap/py_src/SConscript
copy to speckley/SConscript
index 05a35ee..9ef7483 100644
--- a/pasowrap/py_src/SConscript
+++ b/speckley/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,19 +13,14 @@
 #
 ##############################################################################
 
+Import('env')
+if 'speckley' in env['domains']:
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-import os
-Import('*')
-
-local_env = env.Clone()
-
-# get the source file names
-sources = Glob('*.py')
-
-# compile
-pyc = local_env.PyCompile(sources)
+    # configure python module
+    env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# install
-py_inst = local_env.Install(local_env['pyinstall']+'/pasowrap', pyc)
-env.Alias('install_pasowrap_py', py_inst)
+    # configure unit tests
+    env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/speckley/py_src/SConscript b/speckley/py_src/SConscript
index 4551b41..52717d6 100644
--- a/speckley/py_src/SConscript
+++ b/speckley/py_src/SConscript
@@ -14,9 +14,7 @@
 #
 ##############################################################################
 
-import os
 Import('*')
-
 local_env = env.Clone()
 
 # get the source file names
@@ -26,6 +24,6 @@ sources = Glob('*.py')
 pyc = local_env.PyCompile(sources)
 
 # install
-py_inst = local_env.Install(local_env['pyinstall']+'/speckley', pyc)
-env.Alias('install_speckley_py', py_inst)
+py_inst = local_env.Install(Dir('speckley', local_env['pyinstall']), pyc)
+env.Alias('install_speckley', py_inst)
 
diff --git a/speckley/src/AbstractAssembler.cpp b/speckley/src/AbstractAssembler.cpp
index b508793..ebedd29 100644
--- a/speckley/src/AbstractAssembler.cpp
+++ b/speckley/src/AbstractAssembler.cpp
@@ -13,10 +13,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/AbstractAssembler.h>
 
 namespace speckley {
diff --git a/speckley/src/AbstractAssembler.h b/speckley/src/AbstractAssembler.h
index 8ba8470..505d9c3 100644
--- a/speckley/src/AbstractAssembler.h
+++ b/speckley/src/AbstractAssembler.h
@@ -17,11 +17,11 @@
 #ifndef __SPECKLEY_ABSTRACTASSEMBLER_H__
 #define __SPECKLEY_ABSTRACTASSEMBLER_H__
 
+#include <speckley/domainhelpers.h>
+
 #include <escript/AbstractSystemMatrix.h>
 #include <escript/Data.h>
 #include <escript/Pointers.h>
-#include <esysUtils/index.h>
-#include <speckley/domainhelpers.h>
 
 namespace speckley {
 
diff --git a/speckley/src/Brick.cpp b/speckley/src/Brick.cpp
index 7adfd12..e6affb6 100644
--- a/speckley/src/Brick.cpp
+++ b/speckley/src/Brick.cpp
@@ -14,29 +14,27 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-#include <boost/math/special_functions/fpclassify.hpp> // for isnan
-
 #include <speckley/Brick.h>
 #include <speckley/DefaultAssembler3D.h>
 #include <speckley/WaveAssembler3D.h>
-#include <esysUtils/esysFileWriter.h>
-#include <esysUtils/EsysRandom.h>
-#include <esysUtils/index.h>
-#include <escript/FunctionSpaceFactory.h>
-
-#include <boost/scoped_array.hpp>
 
 #ifdef USE_RIPLEY
 #include <speckley/CrossDomainCoupler.h>
 #endif
 
-#ifdef USE_NETCDF
+#include <escript/index.h>
+#include <escript/FileWriter.h>
+#include <escript/FunctionSpaceFactory.h>
+#include <escript/Random.h>
+
+#include <boost/scoped_array.hpp>
+#include <boost/math/special_functions/fpclassify.hpp> // for isnan
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #ifdef ESYS_MPI
 #include <pmpio.h>
@@ -47,7 +45,7 @@
 #include <limits>
 
 namespace bm=boost::math;
-using esysUtils::FileWriter;
+using escript::FileWriter;
 using std::max;
 using std::min;
 using std::vector;
@@ -237,7 +235,7 @@ bool Brick::operator==(const escript::AbstractDomain& other) const
 void Brick::readNcGrid(escript::Data& out, std::string filename, std::string varname,
             const ReaderParameters& params) const
 {
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
     // check destination function space
     dim_t myN0, myN1, myN2;
     if (out.getFunctionSpace().getTypeCode() == Nodes) {
@@ -380,10 +378,10 @@ void Brick::readNcGrid(escript::Data& out, std::string filename, std::string var
 #endif
 }
 
-#ifdef USE_BOOSTIO
 void Brick::readBinaryGridFromZipped(escript::Data& out, std::string filename,
                            const ReaderParameters& params) const
 {
+#ifdef ESYS_HAVE_BOOST_IO
     // the mapping is not universally correct but should work on our
     // supported platforms
     switch (params.dataType) {
@@ -397,10 +395,12 @@ void Brick::readBinaryGridFromZipped(escript::Data& out, std::string filename,
             readBinaryGridZippedImpl<double>(out, filename, params);
             break;
         default:
-            throw SpeckleyException("readBinaryGrid(): invalid or unsupported datatype");
+            throw SpeckleyException("readBinaryGridZipped(): invalid or unsupported datatype");
     }
-}
+#else
+    throw SpeckleyException("readBinaryGridZipped(): not compiled with zip support");
 #endif
+}
 
 void Brick::readBinaryGrid(escript::Data& out, std::string filename,
                            const ReaderParameters& params) const
@@ -456,7 +456,7 @@ void Brick::readBinaryGridImpl(escript::Data& out, const std::string& filename,
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw SpeckleyException("readBinaryGrid(): cannot open file");
+        throw SpeckleyException("readBinaryGrid(): cannot open file " + filename);
     }
     f.seekg(0, std::ios::end);
     const int numComp = out.getDataPointSize();
@@ -579,7 +579,7 @@ void Brick::readBinaryGridImpl(escript::Data& out, const std::string& filename,
     interpolateFromCorners(out);
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 template<typename ValueType>
 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
                                const ReaderParameters& params) const
@@ -612,7 +612,7 @@ void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw SpeckleyException("readBinaryGridFromZipped(): cannot open file");
+        throw SpeckleyException("readBinaryGridFromZipped(): cannot open file " + filename);
     }
     f.seekg(0, std::ios::end);
     const int numComp = out.getDataPointSize();
@@ -888,7 +888,7 @@ void Brick::write(const std::string& filename) const
 
 void Brick::dump(const string& fileName) const
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     string fn(fileName);
     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
         fn+=".silo";
@@ -1030,7 +1030,7 @@ void Brick::dump(const string& fileName) const
         DBClose(dbfile);
     }
 
-#else // USE_SILO
+#else // ESYS_HAVE_SILO
     throw SpeckleyException("dump: no Silo support");
 #endif
 }
@@ -2505,28 +2505,30 @@ void Brick::shareFaces(escript::Data& out, int rx, int ry, int rz) const
 
 
 escript::Data Brick::randomFill(const escript::DataTypes::ShapeType& shape,
-       const escript::FunctionSpace& fs,
-       long seed, const boost::python::tuple& filter) const
+                                const escript::FunctionSpace& fs, long seed,
+                                const boost::python::tuple& filter) const
 {
-    int numvals=escript::DataTypes::noValues(shape);
-    int per_element = (m_order+1)*(m_order+1)*(m_order+1)*numvals;
-    if (len(filter)>0) {
+    const int numvals = escript::DataTypes::noValues(shape);
+    const int per_element = (m_order+1)*(m_order+1)*(m_order+1)*numvals;
+    if (len(filter) > 0) {
         throw SpeckleyException("Speckley does not support filters.");
     }
 
-    double* src=new double[m_NE[0]*m_NE[1]*m_NE[2]*per_element*numvals];
-    esysUtils::randomFillArray(seed, src, m_NE[0]*m_NE[1]*m_NE[2]*per_element);
+    double* src = new double[m_NE[0]*m_NE[1]*m_NE[2]*per_element*numvals];
+    escript::randomFillArray(seed, src, m_NE[0]*m_NE[1]*m_NE[2]*per_element);
     escript::Data res(0, shape, escript::function(*this), true);
     int current = 0;
-    for (int ei = 0; ei < m_NE[2]; ++ei) {
-        for (int ej = 0; ej < m_NE[1]; ++ej) {
-            for (int ek = 0; ek < m_NE[0]; ++ek) {
+    for (index_t ei = 0; ei < m_NE[2]; ++ei) {
+        for (index_t ej = 0; ej < m_NE[1]; ++ej) {
+            for (index_t ek = 0; ek < m_NE[0]; ++ek) {
                 double *e = res.getSampleDataRW(INDEX3(ek,ej,ei,m_NE[0],m_NE[1]));
                 memcpy(e, &src[current], sizeof(double)*per_element);
                 current += per_element;
             }
         }
     }
+    delete[] src;
+
     if (res.getFunctionSpace() != fs) {
         return escript::Data(res, fs);
     }
diff --git a/speckley/src/Brick.h b/speckley/src/Brick.h
index 45fdde3..b38f264 100644
--- a/speckley/src/Brick.h
+++ b/speckley/src/Brick.h
@@ -90,10 +90,8 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
 
-#ifdef USE_BOOSTIO
-    virtual void readBinaryGridFromZipped(escript::Data& out, std::string filename,
-                                const ReaderParameters& params) const;
-#endif
+    virtual void readBinaryGridFromZipped(escript::Data& out,
+                   std::string filename, const ReaderParameters& params) const;
 
     /**
     */
@@ -286,9 +284,12 @@ private:
     template<typename ValueType>
     void readBinaryGridImpl(escript::Data& out, const std::string& filename,
                             const ReaderParameters& params) const;
+
+#ifdef ESYS_HAVE_BOOST_IO
     template<typename ValueType>
     void readBinaryGridZippedImpl(escript::Data& out, 
             const std::string& filename, const ReaderParameters& params) const;
+#endif
 
     template<typename ValueType>
     void writeBinaryGridImpl(const escript::Data& in,
@@ -362,8 +363,8 @@ inline dim_t Brick::getNumDataPointsGlobal() const
 
 inline double Brick::getLocalCoordinate(index_t index, int dim) const
 {
-    EsysAssert((dim>=0 && dim<m_numDim), "'dim' out of bounds");
-    EsysAssert((index>=0 && index<m_NN[dim]), "'index' out of bounds");
+    ESYS_ASSERT(dim>=0 && dim<m_numDim, "'dim' out of bounds");
+    ESYS_ASSERT(index>=0 && index<m_NN[dim], "'index' out of bounds");
     return m_origin[dim]                                    //origin
             + m_dx[dim]*(m_offset[dim] + index/m_order      //elements
             + point_locations[m_order-2][index%m_order]);   //quads
diff --git a/speckley/src/BrickGradients.cpp b/speckley/src/BrickGradients.cpp
index e97ae0a..24d825f 100644
--- a/speckley/src/BrickGradients.cpp
+++ b/speckley/src/BrickGradients.cpp
@@ -14,13 +14,14 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-#include <esysUtils/index.h>
 #include <speckley/Brick.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Brick::gradient_order2(escript::Data& out, const escript::Data& in) const {
+
+void Brick::gradient_order2(escript::Data& out, const escript::Data& in) const
+{
     const double lagrange_deriv_0[3] = {-1.50000000000000, -0.500000000000000, 0.500000000000000};
     const double lagrange_deriv_1[3] = {2.00000000000000, 0, -2.00000000000000};
     const double lagrange_deriv_2[3] = {-0.500000000000000, 0.500000000000000, 1.50000000000000};
diff --git a/speckley/src/BrickIntegrals.cpp b/speckley/src/BrickIntegrals.cpp
index f4a7b1e..ee677b8 100644
--- a/speckley/src/BrickIntegrals.cpp
+++ b/speckley/src/BrickIntegrals.cpp
@@ -13,14 +13,15 @@
 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
 *
 *****************************************************************************/
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
 
-#include <esysUtils/index.h>
 #include <speckley/Brick.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Brick::integral_order2(std::vector<double>& integrals, const escript::Data& arg) const {
+
+void Brick::integral_order2(std::vector<double>& integrals, const escript::Data& arg) const
+{
     const double weights[] = {0.333333333333, 1.33333333333, 0.333333333333};
     const int numComp = arg.getDataPointSize();
     const double volume_product = 0.125*m_dx[0]*m_dx[1]*m_dx[2];
@@ -264,3 +265,4 @@ void Brick::integral_order10(std::vector<double>& integrals, const escript::Data
 }
 
 }
+
diff --git a/speckley/src/BrickReductions.cpp b/speckley/src/BrickReductions.cpp
index 4db4664..d7196bb 100644
--- a/speckley/src/BrickReductions.cpp
+++ b/speckley/src/BrickReductions.cpp
@@ -16,8 +16,12 @@
 
 #include <speckley/Brick.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Brick::reduction_order2(const escript::Data& in, escript::Data& out) const {
+
+void Brick::reduction_order2(const escript::Data& in, escript::Data& out) const
+{
     const double weights[] = {0.333333333333, 1.33333333333, 0.333333333333};
     const int numComp = in.getDataPointSize();
     for (int ei = 0; ei < m_NE[2]; ++ei) {
diff --git a/speckley/src/CrossDomainCoupler.cpp b/speckley/src/CrossDomainCoupler.cpp
index b077748..8e0147d 100644
--- a/speckley/src/CrossDomainCoupler.cpp
+++ b/speckley/src/CrossDomainCoupler.cpp
@@ -14,15 +14,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/CrossDomainCoupler.h>
-
 #include <speckley/lagrange_functions.h>
-#include <esysUtils/index.h>
-#include <esysUtils/Esys_MPI.h>
+
+#include <escript/index.h>
 
 #define MINE 1
 #define SHARED 0
diff --git a/speckley/src/CrossDomainCoupler.h b/speckley/src/CrossDomainCoupler.h
index f7ab240..21eab74 100644
--- a/speckley/src/CrossDomainCoupler.h
+++ b/speckley/src/CrossDomainCoupler.h
@@ -17,10 +17,11 @@
 #ifndef __SPECKLEY_CROSSDOMAINCOUPLER_H__
 #define __SPECKLEY_CROSSDOMAINCOUPLER_H__
 
-#include <speckley/Rectangle.h>
 #include <speckley/Brick.h>
-#include <ripley/Rectangle.h>
+#include <speckley/Rectangle.h>
+
 #include <ripley/Brick.h>
+#include <ripley/Rectangle.h>
 
 namespace speckley {
 
diff --git a/speckley/src/DefaultAssembler2D.cpp b/speckley/src/DefaultAssembler2D.cpp
index 3962d9f..f1101f5 100644
--- a/speckley/src/DefaultAssembler2D.cpp
+++ b/speckley/src/DefaultAssembler2D.cpp
@@ -14,13 +14,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/DefaultAssembler2D.h>
 #include <speckley/domainhelpers.h>
-#include <esysUtils/index.h>
+
+#include <escript/index.h>
 
 const double all_weights[][11] = {
     {0.333333333333, 1.33333333333, 0.333333333333},
diff --git a/speckley/src/DefaultAssembler2D.h b/speckley/src/DefaultAssembler2D.h
index 05e6327..65297c0 100644
--- a/speckley/src/DefaultAssembler2D.h
+++ b/speckley/src/DefaultAssembler2D.h
@@ -31,7 +31,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Rectangle>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Rectangle>(dom);
     }
 
     ~DefaultAssembler2D() {}
@@ -123,7 +123,7 @@ public:
                                            const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Rectangle> domain;
+    POINTER_WRAPPER_CLASS(const Rectangle) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/speckley/src/DefaultAssembler3D.cpp b/speckley/src/DefaultAssembler3D.cpp
index 47c5010..67e72b4 100644
--- a/speckley/src/DefaultAssembler3D.cpp
+++ b/speckley/src/DefaultAssembler3D.cpp
@@ -14,13 +14,9 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/DefaultAssembler3D.h>
 #include <speckley/domainhelpers.h>
-#include <esysUtils/index.h>
+#include <escript/index.h>
 
 const double all_weights[][11] = {
     {0.333333333333, 1.33333333333, 0.333333333333},
diff --git a/speckley/src/DefaultAssembler3D.h b/speckley/src/DefaultAssembler3D.h
index c92f021..8d81808 100644
--- a/speckley/src/DefaultAssembler3D.h
+++ b/speckley/src/DefaultAssembler3D.h
@@ -16,12 +16,14 @@
 #ifndef __SPECKLEY_DEFAULTASSEMBLER3D_H__
 #define __SPECKLEY_DEFAULTASSEMBLER3D_H__
 
-#include <map>
-#include <escript/Data.h>
 #include <speckley/Speckley.h>
-#include <speckley/SpeckleyException.h>
 #include <speckley/AbstractAssembler.h>
 #include <speckley/Brick.h>
+#include <speckley/SpeckleyException.h>
+
+#include <escript/Data.h>
+
+#include <map>
 
 namespace speckley {
 
@@ -36,7 +38,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Brick>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Brick>(dom);
     }
 
     ~DefaultAssembler3D() {}
@@ -124,7 +126,7 @@ public:
                                    const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Brick> domain;
+    POINTER_WRAPPER_CLASS(const Brick) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/speckley/src/Rectangle.cpp b/speckley/src/Rectangle.cpp
index 8ceb758..b660b32 100644
--- a/speckley/src/Rectangle.cpp
+++ b/speckley/src/Rectangle.cpp
@@ -14,42 +14,39 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-#include <boost/math/special_functions/fpclassify.hpp> // for isnan
-#include <algorithm>
-#include <limits>
-
 #include <speckley/Rectangle.h>
-#include <esysUtils/esysFileWriter.h>
-#include <esysUtils/index.h>
-#include <esysUtils/Esys_MPI.h>
 #include <speckley/DefaultAssembler2D.h>
 #include <speckley/WaveAssembler2D.h>
-#include <boost/scoped_array.hpp>
-#include <escript/FunctionSpaceFactory.h>
-#include "esysUtils/EsysRandom.h"
-
 #ifdef USE_RIPLEY
 #include <speckley/CrossDomainCoupler.h>
 #endif
 
-#ifdef USE_NETCDF
+#include <escript/index.h>
+#include <escript/FileWriter.h>
+#include <escript/FunctionSpaceFactory.h>
+#include <escript/Random.h>
+
+#include <boost/scoped_array.hpp>
+#include <boost/math/special_functions/fpclassify.hpp> // for isnan
+
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #ifdef ESYS_MPI
 #include <pmpio.h>
 #endif
 #endif
 
+#include <algorithm>
 #include <iomanip>
+#include <limits>
 
-namespace bm=boost::math;
-using esysUtils::FileWriter;
+namespace bm = boost::math;
+namespace bp = boost::python;
+using escript::FileWriter;
 
 namespace speckley {
 
@@ -206,7 +203,7 @@ bool Rectangle::operator==(const escript::AbstractDomain& other) const
 void Rectangle::readNcGrid(escript::Data& out, std::string filename,
         std::string varname, const ReaderParameters& params) const
 {
-#ifdef USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
     // check destination function space
     dim_t myN0, myN1;
     if (out.getFunctionSpace().getTypeCode() == Nodes) {
@@ -343,10 +340,10 @@ void Rectangle::readBinaryGrid(escript::Data& out, std::string filename,
     }
 }
 
-#ifdef USE_BOOSTIO
 void Rectangle::readBinaryGridFromZipped(escript::Data& out, std::string filename,
                                const ReaderParameters& params) const
 {
+#ifdef ESYS_HAVE_BOOST_IO
     // the mapping is not universally correct but should work on our
     // supported platforms
     switch (params.dataType) {
@@ -362,8 +359,10 @@ void Rectangle::readBinaryGridFromZipped(escript::Data& out, std::string filenam
         default:
             throw SpeckleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
     }
-}
+#else
+    throw SpeckleyException("readBinaryGridFromZipped(): not built with zip support");
 #endif
+}
 
 template<typename ValueType>
 void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filename,
@@ -397,7 +396,7 @@ void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filena
     // check file existence and size
     std::ifstream f(filename.c_str(), std::ifstream::binary);
     if (f.fail()) {
-        throw SpeckleyException("readBinaryGrid(): cannot open file");
+        throw SpeckleyException("readBinaryGrid(): cannot open file " + filename);
     }
     f.seekg(0, std::ios::end);
     const int numComp = out.getDataPointSize();
@@ -488,7 +487,7 @@ void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filena
     interpolateFromCorners(out);
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 template<typename ValueType>
 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const std::string& filename,
                                    const ReaderParameters& params) const
@@ -726,7 +725,7 @@ void Rectangle::write(const std::string& filename) const
 
 void Rectangle::dump(const std::string& fileName) const
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     std::string fn(fileName);
     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
         fn+=".silo";
@@ -863,7 +862,7 @@ void Rectangle::dump(const std::string& fileName) const
         DBClose(dbfile);
     }
 
-#else // USE_SILO
+#else // ESYS_HAVE_SILO
     throw SpeckleyException("dump: no Silo support");
 #endif
 }
@@ -1043,25 +1042,28 @@ void Rectangle::assembleIntegrate(std::vector<double>& integrals,
  * For detailed doco see randomFillWorker
 */
 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
-           const escript::FunctionSpace& fs,
-           long seed, const boost::python::tuple& filter) const {
-    int numvals=escript::DataTypes::noValues(shape);
-    int per_element = (m_order+1)*(m_order+1)*numvals;
-    if (len(filter)>0) {
+                                    const escript::FunctionSpace& fs,
+                                    long seed, const bp::tuple& filter) const
+{
+    const int numvals = escript::DataTypes::noValues(shape);
+    const int per_element = (m_order+1) * (m_order+1) * numvals;
+    if (len(filter) > 0) {
         throw SpeckleyException("Speckley does not support filters.");
     }
 
-    double* src=new double[m_NE[0]*m_NE[1]*per_element*numvals];
-    esysUtils::randomFillArray(seed, src, m_NE[0]*m_NE[1]*per_element);
+    double* src = new double[m_NE[0] * m_NE[1] * per_element * numvals];
+    escript::randomFillArray(seed, src, m_NE[0]*m_NE[1]*per_element);
     escript::Data res(0, shape, escript::function(*this), true);
     int current = 0;
-    for (int ei = 0; ei < m_NE[1]; ++ei) {
-        for (int ej = 0; ej < m_NE[0]; ++ej) {
-            double *e = res.getSampleDataRW(INDEX2(ej,ei,m_NE[0]));
+    for (index_t ei = 0; ei < m_NE[1]; ++ei) {
+        for (index_t ej = 0; ej < m_NE[0]; ++ej) {
+            double* e = res.getSampleDataRW(INDEX2(ej, ei, m_NE[0]));
             memcpy(e, &src[current], sizeof(double)*per_element);
             current += per_element;
         }
     }
+    delete[] src;
+
     if (res.getFunctionSpace() != fs) {
         return escript::Data(res, fs);
     }
diff --git a/speckley/src/Rectangle.h b/speckley/src/Rectangle.h
index d815ffd..303640b 100644
--- a/speckley/src/Rectangle.h
+++ b/speckley/src/Rectangle.h
@@ -88,12 +88,11 @@ public:
     */
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const;
-#ifdef USE_BOOSTIO
+
     /**
     */
     virtual void readBinaryGridFromZipped(escript::Data& out,
                     std::string filename, const ReaderParameters& params) const;
-#endif
 
     /**
     */
@@ -301,9 +300,11 @@ private:
     void readBinaryGridImpl(escript::Data& out, const std::string& filename,
                             const ReaderParameters& params) const;
 
+#ifdef ESYS_HAVE_BOOST_IO
     template<typename ValueType>
     void readBinaryGridZippedImpl(escript::Data& out, 
             const std::string& filename, const ReaderParameters& params) const;
+#endif
 
     template<typename ValueType>
     void writeBinaryGridImpl(const escript::Data& in,
@@ -367,8 +368,8 @@ inline dim_t Rectangle::getNumDataPointsGlobal() const
 
 inline double Rectangle::getLocalCoordinate(index_t index, int dim) const
 {
-    EsysAssert((dim>=0 && dim<2), "'dim' out of bounds");
-    EsysAssert((index>=0 && index<m_NN[dim]), "'index' out of bounds");
+    ESYS_ASSERT(dim>=0 && dim<2, "'dim' out of bounds");
+    ESYS_ASSERT(index>=0 && index<m_NN[dim], "'index' out of bounds");
     return m_origin[dim]                                    //origin
             + m_dx[dim]*(m_offset[dim] + index/m_order      //elements
             + point_locations[m_order-2][index%m_order]);   //quads
diff --git a/speckley/src/RectangleGradients.cpp b/speckley/src/RectangleGradients.cpp
index aaf4e95..b29917f 100644
--- a/speckley/src/RectangleGradients.cpp
+++ b/speckley/src/RectangleGradients.cpp
@@ -14,13 +14,14 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-#include <esysUtils/index.h>
 #include <speckley/Rectangle.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Rectangle::gradient_order2(escript::Data& out, const escript::Data& in) const {
+
+void Rectangle::gradient_order2(escript::Data& out, const escript::Data& in) const
+{
     const double lagrange_deriv_0[3] = {-1.50000000000000, -0.500000000000000, 0.500000000000000};
     const double lagrange_deriv_1[3] = {2.00000000000000, 0, -2.00000000000000};
     const double lagrange_deriv_2[3] = {-0.500000000000000, 0.500000000000000, 1.50000000000000};
diff --git a/speckley/src/RectangleIntegrals.cpp b/speckley/src/RectangleIntegrals.cpp
index d940c86..f9ce01c 100644
--- a/speckley/src/RectangleIntegrals.cpp
+++ b/speckley/src/RectangleIntegrals.cpp
@@ -13,14 +13,15 @@
 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
 *
 *****************************************************************************/
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
 
-#include <esysUtils/index.h>
 #include <speckley/Rectangle.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Rectangle::integral_order2(std::vector<double>& integrals, const escript::Data& arg) const {
+
+void Rectangle::integral_order2(std::vector<double>& integrals, const escript::Data& arg) const
+{
     const double weights[] = {0.333333333333, 1.33333333333, 0.333333333333};
     const int numComp = arg.getDataPointSize();
     const double volume_product = 0.25*m_dx[0]*m_dx[1];
diff --git a/speckley/src/RectangleReductions.cpp b/speckley/src/RectangleReductions.cpp
index 6d68d4b..993d28e 100644
--- a/speckley/src/RectangleReductions.cpp
+++ b/speckley/src/RectangleReductions.cpp
@@ -16,8 +16,12 @@
 
 #include <speckley/Rectangle.h>
 
+#include <escript/index.h>
+
 namespace speckley {
-void Rectangle::reduction_order2(const escript::Data& in, escript::Data& out) const {
+
+void Rectangle::reduction_order2(const escript::Data& in, escript::Data& out) const
+{
     const double weights[] = {0.333333333333, 1.33333333333, 0.333333333333};
     const int numComp = in.getDataPointSize();
     for (int ei = 0; ei < m_NE[1]; ++ei) {
diff --git a/speckley/src/SConscript b/speckley/src/SConscript
index 4e5cc61..28740a3 100644
--- a/speckley/src/SConscript
+++ b/speckley/src/SConscript
@@ -14,17 +14,9 @@
 #
 ##############################################################################
 
-import os
 Import('*')
 
-local_env = env.Clone()
-py_wrapper_local_env = env.Clone()
-local_unroll_env = env.Clone()
-
-
-# Remove the shared library prefix on all platforms - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'speckley'
 
 sources = """
     AbstractAssembler.cpp
@@ -40,7 +32,6 @@ sources = """
     RectangleIntegrals.cpp
     RectangleReductions.cpp
     SpeckleyDomain.cpp
-    SpeckleyException.cpp
     WaveAssembler2D.cpp
     WaveAssembler3D.cpp
 """.split()
@@ -61,55 +52,44 @@ headers = """
     WaveAssembler3D.h
 """.split()
 
-local_env.Prepend(LIBS = ['escript', 'esysUtils'])
+local_env = env.Clone()
 
+if IS_WINDOWS:
+    local_env.Append(CPPDEFINES = ['Speckley_EXPORTS'])
+
+# collect dependencies for other modules
+speckleylibs = []
+speckleylibs += env['escript_libs']
 if 'ripley' in env['domains']:
     local_env.Append(CPPDEFINES = ['USE_RIPLEY'])
-    py_wrapper_local_env.Append(CPPDEFINES = ['USE_RIPLEY'])
-    local_env.Append(LIBS = ['ripley'])
+    speckleylibs += env['ripley_libs']
     sources += ['CrossDomainCoupler.cpp']
     headers += ['CrossDomainCoupler.h']
 
-if local_env['silo']:
-    local_env.Append(CPPDEFINES = ['USE_SILO'])
-    local_env.AppendUnique(LIBS = env['silo_libs'])
-
-if IS_WINDOWS:
-    local_env.Append(CPPDEFINES = ['Speckley_EXPORTS'])
+if env['compressed_files']:
+    speckleylibs += env['compression_libs']
+if env['silo']:
+    speckleylibs += env['silo_libs']
 
-module_name = 'speckley'
-
-lib = local_env.SharedLibrary(module_name, sources)
-env.Alias('build_speckley_lib', lib)
+local_env.PrependUnique(LIBS = speckleylibs)
 
-include_path = Dir('speckley', local_env['incinstall'])
+env['speckley_libs'] = [module_name] + speckleylibs
 
+include_path = Dir(module_name, local_env['incinstall'])
 hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_speckley_headers', hdr_inst)
 
+lib = local_env.SharedLibrary(module_name, sources)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_speckley_lib', lib_inst)
 
 ### Python wrapper ###
-py_wrapper_local_env.Prepend(LIBS = ['speckley', 'escript', 'esysUtils'])
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'speckleycpp.cpp')
-env.Alias('build_speckleycpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
-
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_speckleycpp_lib', mod_inst)
+py_env = local_env.Clone()
+py_env.Prepend(LIBS = [module_name])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'speckleycpp.cpp')
 
-# configure python module
-local_env.SConscript(dirs = ['#/speckley/py_src'], variant_dir='py', duplicate=0)
+mod_path = Dir(module_name, local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
 
-# configure unit tests
-local_env.SConscript(dirs = ['#/speckley/test'], variant_dir='test', duplicate=0, exports=['py_wrapper_lib'])
+build = env.Alias('build_speckley', [hdr_inst, lib, py_lib])
+env.Alias('install_speckley', [build, lib_inst, mod_inst])
 
diff --git a/speckley/src/Speckley.h b/speckley/src/Speckley.h
index 1590626..771b69c 100644
--- a/speckley/src/Speckley.h
+++ b/speckley/src/Speckley.h
@@ -24,7 +24,7 @@
 
 #include <speckley/system_dep.h>
 
-#include <esysUtils/Esys_MPI.h>
+#include <escript/EsysMPI.h>
 
 #include <boost/shared_ptr.hpp>
 #include <list>
@@ -34,10 +34,13 @@
 
 namespace speckley {
 
+using escript::DataTypes::dim_t;
+using escript::DataTypes::index_t;
+using escript::DataTypes::real_t;
+
 typedef std::pair<index_t,index_t> IndexPair;
 typedef std::vector<index_t> IndexVector;
-typedef std::vector<double> DoubleVector;
-typedef std::vector<Esys_MPI_rank> RankVector;
+typedef std::vector<real_t> DoubleVector;
 typedef std::map<std::string,int> TagMap;
 
 enum {
diff --git a/speckley/src/SpeckleyDomain.cpp b/speckley/src/SpeckleyDomain.cpp
index d3dd1ea..f361687 100644
--- a/speckley/src/SpeckleyDomain.cpp
+++ b/speckley/src/SpeckleyDomain.cpp
@@ -14,16 +14,15 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/SpeckleyDomain.h>
+#include <speckley/domainhelpers.h>
+
 #include <escript/DataFactory.h>
 #include <escript/FunctionSpaceFactory.h>
-#include <speckley/domainhelpers.h>
+#include <escript/index.h>
 
 #include <iomanip>
+#include <iostream>
 
 namespace bp = boost::python;
 
@@ -52,7 +51,7 @@ SpeckleyDomain::SpeckleyDomain(dim_t dim, int order, escript::SubWorld_ptr p) :
     m_order(order)
 {
     if (p.get() == NULL)
-        m_mpiInfo = esysUtils::makeInfo(MPI_COMM_WORLD);
+        m_mpiInfo = escript::makeInfo(MPI_COMM_WORLD);
     else
         m_mpiInfo = p->getMPI();
 
diff --git a/speckley/src/SpeckleyDomain.h b/speckley/src/SpeckleyDomain.h
index 9e1c12d..88d2643 100644
--- a/speckley/src/SpeckleyDomain.h
+++ b/speckley/src/SpeckleyDomain.h
@@ -17,9 +17,6 @@
 #ifndef __Speckley_DOMAIN_H__
 #define __Speckley_DOMAIN_H__
 
-#include <boost/python/tuple.hpp>
-#include <boost/python/list.hpp>
-
 #include <speckley/Speckley.h>
 #include <speckley/SpeckleyException.h>
 #include <speckley/AbstractAssembler.h>
@@ -30,6 +27,9 @@
 #include <escript/FunctionSpace.h>
 #include <escript/SubWorld.h>
 
+#include <boost/python/tuple.hpp>
+#include <boost/python/list.hpp>
+
 namespace speckley {
 
 enum assembler_t {
@@ -97,6 +97,12 @@ public:
     ~SpeckleyDomain();
 
     /**
+     \brief
+     returns a reference to the MPI information wrapper for this domain
+    */
+    virtual escript::JMPI getMPI() const { return m_mpiInfo; }
+
+    /**
        \brief
        returns the number of processors used for this domain
     */
@@ -619,14 +625,12 @@ public:
     virtual void readBinaryGrid(escript::Data& out, std::string filename,
                                 const ReaderParameters& params) const = 0;
 
-#ifdef USE_BOOSTIO
     /**
        \brief
        reads grid data from a compressed raw binary file into a Data object
     */
     virtual void readBinaryGridFromZipped(escript::Data& out,
                std::string filename, const ReaderParameters& params) const = 0;
-#endif
 
     /**
        \brief
@@ -723,7 +727,7 @@ public:
 protected:
     int m_numDim;
     StatusType m_status;
-    esysUtils::JMPI m_mpiInfo;
+    escript::JMPI m_mpiInfo;
     TagMap m_tagMap;
     mutable std::vector<int> m_nodeTags, m_nodeTagsInUse;
     mutable std::vector<int> m_elementTags, m_elementTagsInUse;
diff --git a/speckley/src/SpeckleyException.cpp b/speckley/src/SpeckleyException.cpp
deleted file mode 100644
index 59cce1d..0000000
--- a/speckley/src/SpeckleyException.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-
-/*****************************************************************************
-*
-* Copyright (c) 2003-2016 by The University of Queensland
-* http://www.uq.edu.au
-*
-* Primary Business: Queensland, Australia
-* Licensed under the Apache License, version 2.0
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
-* Development 2012-2013 by School of Earth Sciences
-* Development from 2014 by Centre for Geoscience Computing (GeoComp)
-*
-*****************************************************************************/
-
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include <speckley/SpeckleyException.h>
-
-namespace speckley {
-
-const std::string SpeckleyException::exceptionNameValue("SpeckleyException");
-
-const std::string& SpeckleyException::exceptionName() const
-{
-    return exceptionNameValue;
-}
-
-} // namespace speckley
-
diff --git a/speckley/src/SpeckleyException.h b/speckley/src/SpeckleyException.h
index b2e0b41..b31f974 100644
--- a/speckley/src/SpeckleyException.h
+++ b/speckley/src/SpeckleyException.h
@@ -14,11 +14,11 @@
 *
 *****************************************************************************/
 
-#ifndef __Speckley_EXCEPTION_H__
-#define __Speckley_EXCEPTION_H__
+#ifndef __SPECKLEY_EXCEPTION_H__
+#define __SPECKLEY_EXCEPTION_H__
 
 #include <speckley/system_dep.h>
-#include <esysUtils/EsysException.h>
+#include <escript/EsysException.h>
 
 namespace speckley {
 
@@ -26,66 +26,13 @@ namespace speckley {
    \brief
    SpeckleyException exception class.
 */
-class Speckley_DLL_API SpeckleyException : public esysUtils::EsysException
+class SpeckleyException : public escript::EsysException
 {
-protected:
-    typedef EsysException Parent;
-
 public:
-    /**
-       \brief
-       Default constructor for the exception.
-    */
-    SpeckleyException() : Parent() { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    SpeckleyException(const char *cstr) : Parent(cstr) { updateMessage(); }
-
-    /**
-       \brief
-       Constructor with message.
-    */
-    SpeckleyException(const std::string &str) : Parent(str) { updateMessage(); }
-
-    /**
-       \brief
-       Copy Constructor.
-    */
-    SpeckleyException(const SpeckleyException &other) : Parent(other)
-    {
-        updateMessage();
-    }
-
-    /// Destructor
-    virtual ~SpeckleyException() THROW(NO_ARG) {}
-
-    /**
-       \brief
-       Assignment operator.
-    */
-    inline SpeckleyException& operator=(const SpeckleyException &other ) THROW(NO_ARG)
-    {
-        Parent::operator=(other);
-        updateMessage();
-        return *this;
-    }
-
-    /**
-       \brief
-       Returns the name of the exception.
-    */
-    virtual const std::string& exceptionName() const;
-
-private:
-    //
-    // the exception name is immutable and class-wide.
-    static const std::string exceptionNameValue;
+    SpeckleyException(const std::string& str) : escript::EsysException(str) {}
 };
 
 } // end of namespace speckley
 
-#endif // __Speckley_EXCEPTION_H__
+#endif // __SPECKLEY_EXCEPTION_H__
 
diff --git a/speckley/src/WaveAssembler2D.cpp b/speckley/src/WaveAssembler2D.cpp
index 7aac117..ac182cd 100644
--- a/speckley/src/WaveAssembler2D.cpp
+++ b/speckley/src/WaveAssembler2D.cpp
@@ -14,13 +14,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/WaveAssembler2D.h>
 #include <speckley/domainhelpers.h>
-#include <esysUtils/index.h>
+
+#include <escript/index.h>
 
 const double all_weights[][11] = {
     {0.333333333333, 1.33333333333, 0.333333333333},
diff --git a/speckley/src/WaveAssembler2D.h b/speckley/src/WaveAssembler2D.h
index 4334358..34341b7 100644
--- a/speckley/src/WaveAssembler2D.h
+++ b/speckley/src/WaveAssembler2D.h
@@ -31,7 +31,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Rectangle>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Rectangle>(dom);
         isHTI = isVTI = false;
         DataMap::const_iterator a = c.find("c12"), b = c.find("c23");
         if (c.find("c11") == c.end()
@@ -171,7 +171,7 @@ public:
                                            const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Rectangle> domain;
+    POINTER_WRAPPER_CLASS(const Rectangle) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/speckley/src/WaveAssembler3D.cpp b/speckley/src/WaveAssembler3D.cpp
index 432dfce..563855f 100644
--- a/speckley/src/WaveAssembler3D.cpp
+++ b/speckley/src/WaveAssembler3D.cpp
@@ -14,13 +14,10 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/WaveAssembler3D.h>
 #include <speckley/domainhelpers.h>
-#include <esysUtils/index.h>
+
+#include <escript/index.h>
 
 const double all_weights[][11] = {
     {0.333333333333, 1.33333333333, 0.333333333333},
diff --git a/speckley/src/WaveAssembler3D.h b/speckley/src/WaveAssembler3D.h
index 442cb07..0194486 100644
--- a/speckley/src/WaveAssembler3D.h
+++ b/speckley/src/WaveAssembler3D.h
@@ -16,12 +16,14 @@
 #ifndef __SPECKLEY_WAVE_ASSEMBLER_3D_H__
 #define __SPECKLEY_WAVE_ASSEMBLER_3D_H__
 
-#include <map>
-#include <escript/Data.h>
 #include <speckley/Speckley.h>
-#include <speckley/SpeckleyException.h>
 #include <speckley/AbstractAssembler.h>
 #include <speckley/Brick.h>
+#include <speckley/SpeckleyException.h>
+
+#include <escript/Data.h>
+
+#include <map>
 
 namespace speckley {
 
@@ -36,7 +38,7 @@ public:
         m_NE(NE),
         m_NN(NN)
     {
-        domain = boost::static_pointer_cast<const Brick>(dom);
+        domain = REFCOUNTNS::static_pointer_cast<const Brick>(dom);
         isHTI = isVTI = false;
         DataMap::const_iterator a = c.find("c12"), b = c.find("c23");
         if (c.find("c11") == c.end()
@@ -172,7 +174,7 @@ public:
                                    const DataMap& coefs) const;
 
 protected:
-    boost::shared_ptr<const Brick> domain;
+    POINTER_WRAPPER_CLASS(const Brick) domain;
     const double *m_dx;
     const dim_t *m_NE;
     const dim_t *m_NN;
diff --git a/speckley/src/domainhelpers.cpp b/speckley/src/domainhelpers.cpp
index 66d64ce..58ac278 100644
--- a/speckley/src/domainhelpers.cpp
+++ b/speckley/src/domainhelpers.cpp
@@ -14,15 +14,12 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/domainhelpers.h>
 #include <speckley/SpeckleyException.h>
+
 #include <cmath>
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/filtering_stream.hpp>
 #endif
@@ -43,7 +40,7 @@ void factorise(std::vector<int>& factors, int product)
     }
 }
 
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 std::vector<char> unzip(const std::vector<char>& compressed)
 {
     std::vector<char> decompressed = std::vector<char>();
diff --git a/speckley/src/domainhelpers.h b/speckley/src/domainhelpers.h
index 5c2981d..762498d 100644
--- a/speckley/src/domainhelpers.h
+++ b/speckley/src/domainhelpers.h
@@ -48,13 +48,12 @@ inline bool isNotEmpty(const std::string target, const DataMap& mapping)
 */
 void factorise(std::vector<int>& factors, int product);
 
-
-#ifdef USE_BOOSTIO
+#ifdef ESYS_HAVE_BOOST_IO
 /**
     converts the given gzip compressed char vector into an uncompressed form 
 */
 std::vector<char> unzip(const std::vector<char>& compressed);
-#endif // USE_BOOSTIO
+#endif // ESYS_HAVE_BOOST_IO
 
 } //namespace speckley
 
diff --git a/speckley/src/speckleycpp.cpp b/speckley/src/speckleycpp.cpp
index f5fa098..fe47bb3 100644
--- a/speckley/src/speckleycpp.cpp
+++ b/speckley/src/speckleycpp.cpp
@@ -14,14 +14,12 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <speckley/AbstractAssembler.h>
 #include <speckley/Brick.h>
 #include <speckley/Rectangle.h>
-#include <esysUtils/esysExceptionTranslator.h>
+
+#include <escript/ExceptionTranslators.h>
+#include <escript/SubWorld.h>
 
 #include <boost/python.hpp> 
 #include <boost/python/module.hpp>
@@ -29,8 +27,6 @@
 #include <boost/python/detail/defaults_gen.hpp>
 #include <boost/version.hpp>
 
-#include "escript/SubWorld.h"
-
 using namespace boost::python;
 
 namespace speckley {
@@ -86,12 +82,12 @@ escript::Data readBinaryGrid(std::string filename, escript::FunctionSpace fs,
     return res;
 }
 
-#ifdef USE_BOOSTIO
 escript::Data readBinaryGridFromZipped(std::string filename, escript::FunctionSpace fs,
         const object& pyShape, double fill, int byteOrder, int dataType,
         const object& pyFirst, const object& pyNum, const object& pyMultiplier,
         const object& pyReverse)
 {
+#ifdef ESYS_HAVE_BOOST_IO
     int dim=fs.getDim();
     ReaderParameters params;
 
@@ -110,8 +106,10 @@ escript::Data readBinaryGridFromZipped(std::string filename, escript::FunctionSp
     escript::Data res(fill, shape, fs, true);
     dom->readBinaryGridFromZipped(res, filename, params);
     return res;
-}
+#else
+    throw SpeckleyException("Speckley was not built with zip support!");
 #endif
+}
 
 escript::Data readNcGrid(std::string filename, std::string varname,
         escript::FunctionSpace fs, const object& pyShape, double fill,
@@ -325,7 +323,9 @@ BOOST_PYTHON_MODULE(speckleycpp)
     docstring_options docopt(true, true, false);
 #endif
 
-    register_exception_translator<speckley::SpeckleyException>(&(esysUtils::RuntimeErrorTranslator));
+    // register escript's default translators
+    REGISTER_ESCRIPT_EXCEPTION_TRANSLATORS;
+    register_exception_translator<speckley::SpeckleyException>(&escript::RuntimeErrorTranslator);
 
     scope().attr("__doc__") = "To use this module, please import esys.speckley";
     scope().attr("BYTEORDER_NATIVE") = (int)speckley::BYTEORDER_NATIVE;
@@ -363,13 +363,11 @@ BOOST_PYTHON_MODULE(speckleycpp)
                 arg("byteOrder"), arg("dataType"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
 "Reads a binary Grid");
-#ifdef USE_BOOSTIO
     def("_readBinaryGridFromZipped", &speckley::readBinaryGridFromZipped, (arg("filename"),
                 arg("functionspace"), arg("shape"), arg("fill")=0.,
                 arg("byteOrder"), arg("dataType"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
 "Reads a binary Grid");
-#endif
     def("_readNcGrid", &speckley::readNcGrid, (arg("filename"), arg("varname"),
                 arg("functionspace"), arg("shape"), arg("fill"), arg("first"),
                 arg("numValues"), arg("multiplier"), arg("reverse")),
diff --git a/speckley/src/system_dep.h b/speckley/src/system_dep.h
index 2879145..425bba4 100644
--- a/speckley/src/system_dep.h
+++ b/speckley/src/system_dep.h
@@ -17,7 +17,7 @@
 #ifndef __SPECKLEY_SYSTEM_DEP_H__
 #define __SPECKLEY_SYSTEM_DEP_H__
 
-#include <cmath>
+#include <escript/DataTypes.h>
 
 #define Speckley_DLL_API
 
diff --git a/speckley/test/SConscript b/speckley/test/SConscript
index a403ec7..cea8c7b 100644
--- a/speckley/test/SConscript
+++ b/speckley/test/SConscript
@@ -14,31 +14,9 @@
 #
 ##############################################################################
 
-
 Import('*')
 local_env = env.Clone()
 
-#if local_env['cppunit']:
-#    # get the test source file names
-#    sources = Glob('*.cpp')
-#    testname='speckley_UnitTest'
-#
-#    # build the executable
-#    local_env.Prepend(LIBS=['speckley', 'escript', 'esysUtils']+env['cppunit_libs'])
-#    program = local_env.Program(testname, sources)
-#
-#    # run the tests - but only if test_targets are stale
-#    local_env.RunUnitTest(testname)
-#
-#    # add unit test to target alias
-#    Alias('build_tests', program)
-#    Alias("run_tests", testname+'.passed')
-#
-#    # add a group of tests
-#    from grouptest import *
-#    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/speckley/test", ('./'+testname,))
-#    TestGroups.append(tgroup)
-
 # configure python unit tests
-local_env.SConscript(dirs = ['#/speckley/test/python'], variant_dir='python', duplicate=0, exports=['py_wrapper_lib'])
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/speckley/test/python/SConscript b/speckley/test/python/SConscript
index e465ca6..a4eca82 100644
--- a/speckley/test/python/SConscript
+++ b/speckley/test/python/SConscript
@@ -14,37 +14,34 @@
 #
 ##############################################################################
 
-import os
+from os.path import splitext
 Import('*')
 
 local_env = env.Clone()
 
-# 
 #  files defining test runs (passing in a release)
-# 
 testruns = Glob('run_*.py', strings=True)
 
-#Add Unit Test to target alias
+# add unit test to target alias
 local_env.PrependENVPath('PYTHONPATH', Dir('#/speckley/test/python/').abspath)
 local_env.PrependENVPath('PYTHONPATH', Dir('.').abspath)
 local_env.PrependENVPath('PYTHONPATH', env.Dir('$BUILD_DIR/$PLATFORM/escriptcore/test/python').abspath)
-local_env['ENV']['SPECKLEY_TEST_DATA']=Dir('.').srcnode().abspath
-local_env['ENV']['SPECKLEY_WORKDIR']=Dir('.').abspath
+local_env['ENV']['SPECKLEY_TEST_DATA'] = Dir('.').srcnode().abspath
+local_env['ENV']['SPECKLEY_WORKDIR'] = Dir('.').abspath
 # needed for a test from the util base class in escript
-local_env['ENV']['ESCRIPT_WORKDIR']=Dir('.').abspath
-env.Alias('local_py_tests',[os.path.splitext(x)[0]+'.passed' for x in testruns])
-env.Alias('py_tests', [os.path.splitext(x)[0]+'.passed' for x in testruns])
+local_env['ENV']['ESCRIPT_WORKDIR'] = Dir('.').abspath
+env.Alias('local_py_tests',[splitext(x)[0]+'.passed' for x in testruns])
+env.Alias('py_tests', [splitext(x)[0]+'.passed' for x in testruns])
 
 # run all tests
 program = local_env.RunPyUnitTest(testruns)
-Depends(program, py_wrapper_lib)
-Depends(program, 'build_py_tests')
+Requires(program, ['install_escript', 'build_py_tests'])
 if env['usempi']:
-    Depends(program, env['prefix']+"/lib/pythonMPI")
+    Requires(program, ['install_pythonMPI'])
 
 # Add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("SPECKLEY_TEST_DATA","$BATCH_ROOT/speckley/test/python"),('SPECKLEY_WORKDIR','$BUILD_DIR/speckley/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/speckley/test/python","$BATCH_ROOT/speckley/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("speckley", "$PYTHONRUNNER ", (("SPECKLEY_TEST_DATA","$BATCH_ROOT/speckley/test/python"),('SPECKLEY_WORKDIR','$BUILD_DIR/speckley/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/speckley/test/python", "$BATCH_ROOT/speckley/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/speckley/test/python")
 TestGroups.append(tgroup)
 
diff --git a/speckley/test/python/run_readWriteOnSpeckley.py b/speckley/test/python/run_readWriteOnSpeckley.py
index 8a6a9fa..a4825db 100644
--- a/speckley/test/python/run_readWriteOnSpeckley.py
+++ b/speckley/test/python/run_readWriteOnSpeckley.py
@@ -35,6 +35,8 @@ try:
 except KeyError:
      SPECKLEY_WORKDIR='/tmp'
 
+HAVE_UNZIP = hasFeature('unzip')
+
 #NE=4 # number elements, must be even
 #for x in [int(sqrt(mpiSize)),2,3,5,7,1]:
 #    NX=x
@@ -59,7 +61,7 @@ def adjust(NE, ftype):
 
 
 class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
-    NX = 4*mpiSize
+    NX = 4*min(10,mpiSize)
     NZ = 4
 
     def generateUniqueData(self, ftype):
@@ -96,7 +98,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
             self.domain = Rectangle(order, self.NE[0], self.NE[1], d0=mpiSize)
             data, ref = self.generateUniqueData(ContinuousFunction)
             result = self.writeThenRead(data, ContinuousFunction, 'CF')
-            self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ContinuousFunction(self.domain)))
 
     def test_writeGrid3D(self):
@@ -106,7 +108,7 @@ class WriteBinaryGridTestBase(unittest.TestCase): #subclassing required
             for ftype,fcode in [(ContinuousFunction,'CF')]:
                 data, ref = self.generateUniqueData(ftype)
                 result = self.writeThenRead(data, ftype, fcode)
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain)))
 
 class Test_writeBinaryGridSpeckley_LITTLE_FLOAT32(WriteBinaryGridTestBase):
@@ -242,7 +244,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                         else:
                             ref = np.append(ref, extra, axis=1-d)
                 # step 4 - compare
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain))+\
                         "%d"%order)
 
@@ -283,7 +285,7 @@ class ReadBinaryGridTestBase(unittest.TestCase): #subclassing required
                             ref = np.append(ref, extra, axis=2-d)
 
                 # step 4 - compare
-                self.assertAlmostEquals(Lsup(ref-result), 0, delta=1e-9,
+                self.assertAlmostEqual(Lsup(ref-result), 0, delta=1e-9,
                         msg="Data doesn't match for "+str(ftype(self.domain))+\
                             "%d"%order)
 
@@ -315,7 +317,7 @@ class Test_readBinaryGridSpeckley_LITTLE_FLOAT64(ReadBinaryGridTestBase):
             filename = filename + self.dtype.replace('<','L').replace('>','B')
             self.write(data, filename)
             result = self.read(filename, ContinuousFunction)
-            self.assertAlmostEquals(Lsup(data-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(data-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ContinuousFunction(self.domain)))
 
     #since using getX as the test, doubles required
@@ -330,7 +332,7 @@ class Test_readBinaryGridSpeckley_LITTLE_FLOAT64(ReadBinaryGridTestBase):
             filename = filename + self.dtype.replace('<','L').replace('>','B')
             self.write(data, filename)
             result = self.read(filename, ContinuousFunction)
-            self.assertAlmostEquals(Lsup(data-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(data-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ContinuousFunction(self.domain)))
 
 class Test_readBinaryGridSpeckley_LITTLE_INT32(ReadBinaryGridTestBase):
@@ -363,7 +365,7 @@ class Test_readBinaryGridSpeckley_BIG_FLOAT64(ReadBinaryGridTestBase):
             filename = filename + self.dtype.replace('<','L').replace('>','B')
             self.write(data, filename)
             result = self.read(filename, ContinuousFunction)
-            self.assertAlmostEquals(Lsup(data-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(data-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ContinuousFunction(self.domain)))
 
     #since using getX as the test, doubles required
@@ -378,7 +380,7 @@ class Test_readBinaryGridSpeckley_BIG_FLOAT64(ReadBinaryGridTestBase):
             filename = filename + self.dtype.replace('<','L').replace('>','B')
             self.write(data, filename)
             result = self.read(filename, ContinuousFunction)
-            self.assertAlmostEquals(Lsup(data-result), 0, delta=1e-9,
+            self.assertAlmostEqual(Lsup(data-result), 0, delta=1e-9,
                     msg="Data doesn't match for "+str(ContinuousFunction(self.domain)))
 
 
@@ -435,7 +437,7 @@ class Test_readBinaryGridZippedSpeckley(unittest.TestCase):
             return readBinaryGrid(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
 
-        if not hasattr(speckleycpp, "_readBinaryGridFromZipped"):
+        if not HAVE_UNZIP:
             raise unittest.SkipTest("unzip library not available (boost_iostreams)")
         return speckleycpp._readBinaryGridFromZipped(filename, FS, (), 50000,
                 self.byteorder, self.datatype, first, expected, scale, reverse)
diff --git a/speckley/test/python/run_specialOnSpeckley.py b/speckley/test/python/run_specialOnSpeckley.py
index b03416e..f267d83 100644
--- a/speckley/test/python/run_specialOnSpeckley.py
+++ b/speckley/test/python/run_specialOnSpeckley.py
@@ -54,7 +54,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                     f *= whereNegative(dom.getX()[dim]-6)
                 res = Lsup(f)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Rectangle_XY_single(self):
@@ -77,7 +77,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                     f *= whereNegative(dom.getX()[dim]-6)
                 res = Lsup(f)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Brick_XY_system(self):
@@ -102,7 +102,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                     f *= whereNegative(dom.getX()[dim]-6)
                 res = Lsup(f)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Rectangle_XY_system(self):
@@ -125,7 +125,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                 f *= whereNegative(dom.getX()[0]-6)*whereNegative(dom.getX()[1]-6)
                 res = Lsup(f)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Brick_Du_Y_single(self):
@@ -150,7 +150,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                 
                 res = Lsup((rhs/lhs)-2)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
     
     def test_Rectangle_Du_Y_single(self):
@@ -175,7 +175,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                 
                 res = Lsup((rhs/lhs)-2)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Brick_Du_Y_system(self):
@@ -202,7 +202,7 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                 
                 res = Lsup((rhs/lhs)-2)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
     def test_Rectangle_Du_Y_system(self):
@@ -229,11 +229,11 @@ class Test_Speckley_Assemblers(unittest.TestCase):
                 
                 res = Lsup((rhs/lhs)-2)
                 self.assertLess(res, self.TOLERANCE,
-                        ("assembly for {0}expanded order %d failed with %g >= %g"%(order,
+                        ("assembly for {0}expanded order %d failed with %e >= %e"%(order,
                         res, self.TOLERANCE)).format("" if expanded else "un-"))
 
 class Test_Speckley(unittest.TestCase):
-    TOLERANCE = 1e-10
+    TOLERANCE = 5e-10
     def test_Rectangle_ReducedFunction(self):
         ranks = getMPISizeWorld()
         for order in range(2, 11):
@@ -245,11 +245,11 @@ class Test_Speckley(unittest.TestCase):
                     (interpolate(redData, ContinuousFunction(dom)), "ContinuousFunction")]
             for d, fs in data:
                 self.assertLess(inf(d-[0.5]*2), self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != 0"%(fs, order, inf(d-[0.5]*2)))
+                        "reduced->%s failure with order %d: %e != 0"%(fs, order, inf(d-[0.5]*2)))
                 self.assertLess(sup(d[0]+0.5) - 3, self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != 3"%(fs, order, sup(d[0]+0.5)))
+                        "reduced->%s failure with order %d: %e != 3"%(fs, order, sup(d[0]+0.5)))
                 self.assertLess(sup(d[1]+0.5) - 3*ranks, self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != %g"%(fs, order, sup(d[1]+0.5), 3*ranks))
+                        "reduced->%s failure with order %d: %e >= %e"%(fs, order, sup(d[1]+0.5)-3*ranks, self.TOLERANCE))
 
     def test_Brick_ReducedFunction(self):
         ranks = getMPISizeWorld()
@@ -262,13 +262,13 @@ class Test_Speckley(unittest.TestCase):
                     (interpolate(redData, ContinuousFunction(dom)), "ContinuousFunction")]
             for d, fs in data:
                 self.assertLess(inf(d-[0.5]*3), self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != 0"%(fs, order, inf(d-[0.5]*3)))
+                        "reduced->%s failure with order %d: %e != 0"%(fs, order, inf(d-[0.5]*3)))
                 self.assertLess(sup(d[0]+0.5) - 3, self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != 3"%(fs, order, sup(d[0]+0.5)))
+                        "reduced->%s failure with order %d: %e != 3"%(fs, order, sup(d[0]+0.5)))
                 self.assertLess(sup(d[1]+0.5) - 3*ranks, self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != %g"%(fs, order, sup(d[1]+0.5), 3*ranks))
+                        "reduced->%s failure with order %d: %e >= %e"%(fs, order, sup(d[1]+0.5)-3*ranks, self.TOLERANCE))
                 self.assertLess(sup(d[2]+0.5) - 3, self.TOLERANCE,
-                        "reduced->%s failure with order %d: %g != 3"%(fs, order, sup(d[2]+0.5)))
+                        "reduced->%s failure with order %d: %e != 3"%(fs, order, sup(d[2]+0.5)))
 
     def test_Rectangle_Function_gradient(self): #expanded and non-expanded
         ranks = getMPISizeWorld()
@@ -277,14 +277,14 @@ class Test_Speckley(unittest.TestCase):
                 dom = Rectangle(order, 3, 3*ranks, d1=ranks)
                 x = Data(5, Function(dom), True)
                 self.assertLess(Lsup(grad(x)), 1e-10,
-                        "single component failure, order %d%s, %g >= 1e-10"%(order,
+                        "single component failure, order %d%s, %e >= 1e-10"%(order,
                         (" expanded" if expanded else ""), Lsup(grad(x))))
                 for data in [[5,1], [-5,-1], [5,1,1e-5]]:
                     x = Data(data, Function(dom), True)
                     g = grad(x)
                     for n,d in enumerate(data):
                         self.assertLess(Lsup(g[n]), 1e-10,
-                                "%d-component failure, order %d %sexpanded, %g >= 1e-10"%(len(data),
+                                "%d-component failure, order %d %sexpanded, %e >= 1e-10"%(len(data),
                                 order, ("" if expanded else "un-"), Lsup(g[n])))
 
     def test_Brick_Function_gradient(self):
@@ -294,14 +294,14 @@ class Test_Speckley(unittest.TestCase):
                 dom = Brick(order, 3, 3*ranks, 3, d1=ranks)
                 x = Data(5, Function(dom), True)
                 self.assertLess(Lsup(grad(x)), 1e-10,
-                        "single component failure, order %d%s, %g >= 1e-10"%(order,
+                        "single component failure, order %d%s, %e >= 1e-10"%(order,
                         (" expanded" if expanded else ""), Lsup(grad(x))))
                 for data in [[5,1], [-5,-1], [5,1,1e-5]]:
                     x = Data(data, Function(dom), True)
                     g = grad(x)
                     for n,d in enumerate(data):
                         self.assertLess(Lsup(g[n]), 1e-10,
-                                "%d-component failure, order %d %sexpanded, %g >= 1e-10"%(len(data),
+                                "%d-component failure, order %d %sexpanded, %e >= 1e-10"%(len(data),
                                 order, ("" if expanded else "un-"), Lsup(g[n])))
 
 
@@ -312,7 +312,7 @@ class Test_Speckley(unittest.TestCase):
             X = dom.getX()
             u = X[0] + X[1] + 1
             v = Lsup(grad(u) - 1)
-            self.assertLess(v, 1e-10, "order %d, %g >= 1e-10, %s"%(order, v, str(grad(u)-1)))
+            self.assertLess(v, 1e-10, "order %d, %e >= 1e-10, %s"%(order, v, str(grad(u)-1)))
             for power in range(1, order+1):
                 for power2 in range(1, order+1):
                     a = X[0]**power * X[1]**power2
@@ -322,10 +322,10 @@ class Test_Speckley(unittest.TestCase):
                     second = Lsup(da[1] - power2*X[1]**(power2-1) * X[0]**power) \
                             /Lsup(power2*X[1]**(power2-1) * X[0]**power)
                     self.assertLess(first, 1e-10,
-                            "order %d and degree %d,%d, %g >= 1e-9"%(order,
+                            "order %d and degree %d,%d, %e >= 1e-9"%(order,
                             power, power2, first))
                     self.assertLess(second, 1e-10,
-                            "order %d and degree %d,%d, %g >= 1e-9"%(order,
+                            "order %d and degree %d,%d, %e >= 1e-9"%(order,
                             power, power2, second))
 
     def test_Brick_ContinuousFunction_gradient(self):
@@ -335,7 +335,7 @@ class Test_Speckley(unittest.TestCase):
             X = dom.getX()
             u = X[0] + X[1] + X[2] + 1
             v = Lsup(grad(u) - 1)
-            self.assertLess(v, 1e-10, "order %d, %g >= 1e-10, %s"%(order, v,
+            self.assertLess(v, 1e-10, "order %d, %e >= 1e-10, %s"%(order, v,
                     str(grad(u)-1)))
             for power1 in range(1, order+1, order//2):
                 for power2 in range(1, order+1, order//2):
@@ -351,13 +351,13 @@ class Test_Speckley(unittest.TestCase):
                         third = Lsup(da[2] - temp) / Lsup(temp)
                                 
                         self.assertLess(first, 1e-10,
-                            "order %d and degree %d,%d,%d, %g >= 1e-9"%(order,
+                            "order %d and degree %d,%d,%d, %e >= 1e-9"%(order,
                             power1, power2, power3, first))
                         self.assertLess(second, 1e-10,
-                            "order %d and degree %d,%d,%d, %g >= 1e-9"%(order,
+                            "order %d and degree %d,%d,%d, %e >= 1e-9"%(order,
                             power1, power2, power3, second))
                         self.assertLess(third, 1e-10,
-                            "order %d and degree %d,%d,%d, %g >= 1e-9"%(order,
+                            "order %d and degree %d,%d,%d, %e >= 1e-9"%(order,
                             power1, power2, power3, third))
 
     def test_Rectangle_interpolation_continuous_noncontinuous_and_back(self):
@@ -368,18 +368,18 @@ class Test_Speckley(unittest.TestCase):
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of constant, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of constant, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
             x = dom.getX()
             original = x[0] + x[1] + 1
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of expanded, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of expanded, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
             original = whereZero(x[0]-2) + whereZero(x[1]-2)
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of point, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of point, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
 
     def test_Brick_interpolation_continuous_noncontinuous_and_back(self):
         ranks = getMPISizeWorld()
@@ -389,18 +389,18 @@ class Test_Speckley(unittest.TestCase):
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of constant, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of constant, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
             x = dom.getX()
             original = x[0] + x[1] + x[2] + 1
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of expanded, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of expanded, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
             original = whereZero(x[0]-2) + whereZero(x[1]-2) + whereZero(x[2] - 2)
             cont = interpolate(original, ContinuousFunction(dom))
             func = interpolate(cont, Function(dom))
             self.assertEqual(Lsup(original-func), 0,
-                    "interpolation of point, order %d: original and final not equal, %g != 0"%(order, Lsup(original-func)))
+                    "interpolation of point, order %d: original and final not equal, %e != 0"%(order, Lsup(original-func)))
 
     def test_Rectangle_integration(self):
         ranks = getMPISizeWorld()
@@ -431,7 +431,7 @@ class Test_Speckley(unittest.TestCase):
                                 /((k+1.)*(l+1.)*(m+1.))
                         res = abs(integral - actual)/actual
                         self.assertLess(res, 1e-11,
-                                "too much variance in integral result (order %d, degrees %d %d, %g >= 1e-11)"%(order,
+                                "too much variance in integral result (order %d, degrees %d %d, %e >= 1e-11)"%(order,
                                 k, l, res))
 
     @unittest.skipIf(getMPISizeWorld() == 1,
diff --git a/svn_version b/svn_version
index c8ad85c..5d5ee38 100644
--- a/svn_version
+++ b/svn_version
@@ -1 +1 @@
-6110
+6403
diff --git a/tools/escriptconvert/SConscript b/tools/escriptconvert/SConscript
index 13acccf..ea3a74b 100644
--- a/tools/escriptconvert/SConscript
+++ b/tools/escriptconvert/SConscript
@@ -19,7 +19,6 @@ Import('*')
 local_env = env.Clone()
 
 if local_env['silo']:
-    local_env.Append(CPPDEFINES = ['USE_SILO'])
     local_env.AppendUnique(LIBS = env['silo_libs'])
 
 local_env.Prepend(LIBS = ['esysUtils', 'weipa', 'dudley', 'finley', 'ripley', 'escript'])
diff --git a/tools/escriptconvert/escriptconvert.cpp b/tools/escriptconvert/escriptconvert.cpp
index 23fbe3f..80d3806 100644
--- a/tools/escriptconvert/escriptconvert.cpp
+++ b/tools/escriptconvert/escriptconvert.cpp
@@ -17,7 +17,7 @@
 #include <weipa/EscriptDataset.h>
 #include <weipa/DataVar.h>
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -47,7 +47,7 @@ string insertTimestep(const string& fString, int timeStep, int tsMultiplier)
 
 int usage()
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     cerr << "Usage: escriptconvert {-vtk|-silo} <file.esd>" << endl;
 #else
     cerr << "Note: escriptconvert was compiled without Silo support!" << endl;
@@ -79,7 +79,7 @@ int main(int argc, char** argv)
     bool doVTK = false, doSilo = false;
     string esdFile;
 
-#if USE_SILO
+#if ESYS_HAVE_SILO
     if (argc != 3) {
         cleanup();
         return usage();
@@ -95,7 +95,7 @@ int main(int argc, char** argv)
     }
     esdFile = string(argv[2]);
     
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     if (argc == 2) {
         esdFile = string(argv[1]);
     } else if (argc == 3) {
diff --git a/tools/overlord/SConscript b/tools/overlord/SConscript
index 75dc6f5..edbb175 100644
--- a/tools/overlord/SConscript
+++ b/tools/overlord/SConscript
@@ -19,7 +19,7 @@ Import('*')
 local_env = env.Clone()
 
 if not local_env['IS_WINDOWS']:
-    prog = local_env.Program('escript-overlord', ['overlord.c'])
+    prog = local_env.Program('escript-overlord', ['overlord.cpp'])
     env.Alias('build_overlord', prog)
     install_overlord = local_env.Install(local_env['bininstall'], prog)
     env.Alias('install_overlord', install_overlord) #oh no! our freedoms!
diff --git a/tools/overlord/overlord.c b/tools/overlord/overlord.cpp
similarity index 100%
rename from tools/overlord/overlord.c
rename to tools/overlord/overlord.cpp
diff --git a/scons/templates/jessie_mpi_options.py b/trilinoswrap/SConscript
similarity index 83%
copy from scons/templates/jessie_mpi_options.py
copy to trilinoswrap/SConscript
index cae5ae6..8c11dc7 100644
--- a/scons/templates/jessie_mpi_options.py
+++ b/trilinoswrap/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,6 +13,8 @@
 #
 ##############################################################################
 
-from .jessie_options import *
+Import('env')
+if env['trilinos']:
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-mpi='OPENMPI'
diff --git a/trilinoswrap/src/AbstractMatrixWrapper.h b/trilinoswrap/src/AbstractMatrixWrapper.h
new file mode 100644
index 0000000..7cfb700
--- /dev/null
+++ b/trilinoswrap/src/AbstractMatrixWrapper.h
@@ -0,0 +1,68 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_ABSTRACTMATRIXWRAPPER_H__
+#define __ESYS_TRILINOS_ABSTRACTMATRIXWRAPPER_H__
+
+#include <trilinoswrap/types.h>
+
+namespace escript {
+    class SolverBuddy;
+}
+
+namespace esys_trilinos {
+
+template<typename ST>
+class AbstractMatrixWrapper
+{
+public:
+    virtual ~AbstractMatrixWrapper() {}
+
+    virtual void resetValues(bool preserveSolverData) = 0;
+
+    /// notifies the matrix that changes are about to happen.
+    virtual void resumeFill() = 0;
+
+    /// notifies the matrix that a set of changes has occured.
+    virtual void fillComplete(bool localOnly) = 0;
+
+    /// sets certain entries to zero, and main diagonal to `mdv`
+    virtual void nullifyRowsAndCols(
+                              const Teuchos::ArrayView<const real_t>& rowMask,
+                              const Teuchos::ArrayView<const real_t>& colView,
+                              ST mdv) = 0;
+
+    /// adds entries of an element matrix to this matrix
+    virtual void add(const std::vector<LO>& rowIndex,
+                     const std::vector<ST>& array) = 0;
+
+    /// computes y += Ax
+    virtual void ypAx(const Teuchos::ArrayView<ST>& y,
+                      const Teuchos::ArrayView<const ST>& x) const = 0;
+
+    /// solves Ax = b
+    virtual void solve(const Teuchos::ArrayView<ST>& x,
+                       const Teuchos::ArrayView<const ST>& b,
+                       escript::SolverBuddy& sb) const = 0;
+
+    /// saves matrix in Matrix Market (MM) format
+    virtual void saveMM(const std::string& filename) const = 0;
+};
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_ABSTRACTMATRIXWRAPPER_H__
+
diff --git a/trilinoswrap/src/Amesos2Wrapper.cpp b/trilinoswrap/src/Amesos2Wrapper.cpp
new file mode 100644
index 0000000..a28faad
--- /dev/null
+++ b/trilinoswrap/src/Amesos2Wrapper.cpp
@@ -0,0 +1,204 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <trilinoswrap/Amesos2Wrapper.h>
+#include <trilinoswrap/TrilinosAdapterException.h>
+#include <trilinoswrap/util.h>
+
+#include <escript/SolverOptions.h>
+
+#include <Amesos2.hpp>
+#include <Tpetra_CrsMatrix.hpp>
+
+#include <boost/python/dict.hpp>
+
+using Teuchos::RCP;
+
+namespace bp = boost::python;
+
+namespace esys_trilinos {
+
+template<class Matrix, class Vector>
+RCP<DirectSolverType<Matrix,Vector> > createDirectSolver(
+                                               const escript::SolverBuddy& sb,
+                                               RCP<const Matrix> A,
+                                               RCP<Vector> X,
+                                               RCP<const Vector> B)
+{
+    using util::extractParamIfSet;
+
+    typedef typename Matrix::scalar_type ST;
+
+    RCP<DirectSolverType<Matrix,Vector> > solver;
+    RCP<Teuchos::ParameterList> amesosParams = Teuchos::parameterList("Amesos2");
+    const bp::dict& pyParams = sb.getTrilinosParameters();
+
+    const escript::SolverOptions method = sb.getSolverMethod();
+    // did user request specific direct solver or not?
+    const bool dontcare = method == escript::SO_METHOD_DIRECT;
+
+    if ((dontcare || method == escript::SO_METHOD_DIRECT_TRILINOS) &&
+            Amesos2::query("klu2")) {
+        solver = Amesos2::create<Matrix, Vector>("klu2", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        // the doco says these params exist but clearly they don't :-(
+        //solverParams.set("DiagPivotThresh", sb.getDiagonalDominanceThreshold());
+        //solverParams.set("SymmetricMode", sb.isSymmetric());
+        extractParamIfSet<std::string>("Trans", pyParams, solverParams);
+        extractParamIfSet<bool>("Equil", pyParams, solverParams);
+        extractParamIfSet<std::string>("IterRefine", pyParams, solverParams);
+        extractParamIfSet<bool>("SymmetricMode", pyParams, solverParams);
+        extractParamIfSet<ST>("DiagPivotThresh", pyParams, solverParams);
+        extractParamIfSet<std::string>("ColPerm", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_MUMPS) &&
+            Amesos2::query("MUMPS")) {
+        solver = Amesos2::create<Matrix, Vector>("MUMPS", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        if (sb.isVerbose()) {
+            solverParams.set("ICNTL(4)", 4);
+        }
+        extractParamIfSet<int>("ICNTL(1)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(2)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(3)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(4)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(6)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(9)", pyParams, solverParams);
+        extractParamIfSet<int>("ICNTL(11)", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_TRILINOS) &&
+            Amesos2::query("Basker")) {
+        solver = Amesos2::create<Matrix, Vector>("Basker", A, X, B);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_SUPERLU) &&
+            Amesos2::query("superludist")) {
+        solver = Amesos2::create<Matrix, Vector>("superludist", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        extractParamIfSet<int>("npcol", pyParams, solverParams);
+        extractParamIfSet<int>("nprow", pyParams, solverParams);
+        extractParamIfSet<std::string>("ColPerm", pyParams, solverParams);
+        extractParamIfSet<bool>("ReplaceTinyPivot", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_SUPERLU) &&
+            Amesos2::query("superlu")) {
+        solver = Amesos2::create<Matrix, Vector>("superlu", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        solverParams.set("DiagPivotThresh", sb.getDiagonalDominanceThreshold());
+        solverParams.set("ILU_DropTol", sb.getDropTolerance());
+        solverParams.set("SymmetricMode", sb.isSymmetric());
+        extractParamIfSet<std::string>("Trans", pyParams, solverParams);
+        extractParamIfSet<bool>("Equil", pyParams, solverParams);
+        extractParamIfSet<std::string>("IterRefine", pyParams, solverParams);
+        extractParamIfSet<bool>("SymmetricMode", pyParams, solverParams);
+        extractParamIfSet<ST>("DiagPivotThresh", pyParams, solverParams);
+        extractParamIfSet<std::string>("ColPerm", pyParams, solverParams);
+        extractParamIfSet<bool>("ILU_Flag", pyParams, solverParams);
+        extractParamIfSet<ST>("ILU_DropTol", pyParams, solverParams);
+        extractParamIfSet<ST>("ILU_FillFactor", pyParams, solverParams);
+        extractParamIfSet<std::string>("ILU_Norm", pyParams, solverParams);
+        extractParamIfSet<std::string>("ILU_MILU", pyParams, solverParams);
+        extractParamIfSet<ST>("ILU_FillTol", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_SUPERLU) &&
+            Amesos2::query("superlumt")) {
+        solver = Amesos2::create<Matrix, Vector>("superlumt", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        solverParams.set("nprocs", omp_get_max_threads());
+        solverParams.set("DiagPivotThresh", sb.getDiagonalDominanceThreshold());
+        solverParams.set("SymmetricMode", sb.isSymmetric());
+        extractParamIfSet<int>("nprocs", pyParams, solverParams);
+        extractParamIfSet<std::string>("trans", pyParams, solverParams);
+        extractParamIfSet<int>("panel_size", pyParams, solverParams);
+        extractParamIfSet<int>("relax", pyParams, solverParams);
+        extractParamIfSet<bool>("Equil", pyParams, solverParams);
+        extractParamIfSet<bool>("SymmetricMode", pyParams, solverParams);
+        extractParamIfSet<ST>("DiagPivotThresh", pyParams, solverParams);
+        extractParamIfSet<std::string>("ColPerm", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if ((dontcare || method == escript::SO_METHOD_DIRECT_PARDISO) &&
+            Amesos2::query("pardiso_mkl")) {
+        solver = Amesos2::create<Matrix, Vector>("pardiso_mkl", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        extractParamIfSet<int>("IPARM(2)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(4)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(8)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(10)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(18)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(24)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(25)", pyParams, solverParams);
+        extractParamIfSet<int>("IPARM(60)", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if (Amesos2::query("amesos2_cholmod")) {
+        solver = Amesos2::create<Matrix, Vector>("amesos2_cholmod", A, X, B);
+        Teuchos::ParameterList solverParams(solver->name());
+        solverParams.set("DiagPivotThresh", sb.getDiagonalDominanceThreshold());
+        solverParams.set("SymmetricMode", sb.isSymmetric());
+        extractParamIfSet<std::string>("Trans", pyParams, solverParams);
+        extractParamIfSet<bool>("Equil", pyParams, solverParams);
+        extractParamIfSet<std::string>("IterRefine", pyParams, solverParams);
+        extractParamIfSet<bool>("SymmetricMode", pyParams, solverParams);
+        extractParamIfSet<ST>("DiagPivotThresh", pyParams, solverParams);
+        extractParamIfSet<std::string>("ColPerm", pyParams, solverParams);
+        amesosParams->set(solver->name(), solverParams);
+    } else if (Amesos2::query("lapack")) {
+        solver = Amesos2::create<Matrix, Vector>("lapack", A, X, B);
+    } else {
+        if (dontcare) {
+            throw TrilinosAdapterException("Could not find an Amesos2 direct solver!");
+        } else {
+            throw TrilinosAdapterException("The requested direct solver is not available!");
+        }
+    }
+    solver->setParameters(amesosParams);
+    return solver;
+}
+
+typedef Tpetra::CrsMatrix<real_t,LO,GO,NT> RealMatrix;
+typedef Tpetra::CrsMatrix<cplx_t,LO,GO,NT> ComplexMatrix;
+
+// instantiate
+template
+RCP<DirectSolverType<RealMatrix, RealVector> >
+createDirectSolver<RealMatrix,RealVector>(const escript::SolverBuddy& sb,
+                                          RCP<const RealMatrix> A,
+                                          RCP<RealVector> X,
+                                          RCP<const RealVector> B);
+template
+RCP<DirectSolverType<ComplexMatrix, ComplexVector> >
+createDirectSolver<ComplexMatrix, ComplexVector>(
+                                          const escript::SolverBuddy& sb,
+                                          RCP<const ComplexMatrix> A,
+                                          RCP<ComplexVector> X,
+                                          RCP<const ComplexVector> B);
+
+/* Amesos2 does not currently support block matrices!
+template
+RCP<DirectSolverType<RealBlockMatrix, RealBlockVector> >
+createDirectSolver<RealBlockMatrix,RealBlockVector>(
+                                          const escript::SolverBuddy& sb,
+                                          RCP<const RealBlockMatrix> A,
+                                          RCP<RealBlockVector> X,
+                                          RCP<const RealBlockVector> B);
+template
+RCP<DirectSolverType<ComplexBlockMatrix, ComplexBlockVector> >
+createDirectSolver<ComplexBlockMatrix, ComplexBlockVector>(
+                                          const escript::SolverBuddy& sb,
+                                          RCP<const ComplexBlockMatrix> A,
+                                          RCP<ComplexBlockVector> X,
+                                          RCP<const ComplexBlockVector> B);
+*/
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/Amesos2Wrapper.h b/trilinoswrap/src/Amesos2Wrapper.h
new file mode 100644
index 0000000..512921c
--- /dev/null
+++ b/trilinoswrap/src/Amesos2Wrapper.h
@@ -0,0 +1,43 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_AMESOS2WRAPPER_H__
+#define __ESYS_TRILINOS_AMESOS2WRAPPER_H__
+
+#include <trilinoswrap/types.h>
+
+#include <Amesos2_Solver_decl.hpp>
+
+namespace escript {
+    class SolverBuddy;
+}
+
+namespace esys_trilinos {
+
+template<class Matrix, class Vector>
+using DirectSolverType = Amesos2::Solver<Matrix, Vector>;
+
+template<class Matrix, class Vector>
+Teuchos::RCP<DirectSolverType<Matrix,Vector> > createDirectSolver(
+                                  const escript::SolverBuddy& sb,
+                                  Teuchos::RCP<const Matrix> A,
+                                  Teuchos::RCP<Vector> X,
+                                  Teuchos::RCP<const Vector> B);
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_AMESOS2WRAPPER_H__
+
diff --git a/trilinoswrap/src/BelosWrapper.cpp b/trilinoswrap/src/BelosWrapper.cpp
new file mode 100644
index 0000000..9f1ed59
--- /dev/null
+++ b/trilinoswrap/src/BelosWrapper.cpp
@@ -0,0 +1,106 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <trilinoswrap/BelosWrapper.h>
+#include <trilinoswrap/TrilinosAdapterException.h>
+#include <trilinoswrap/util.h>
+
+#include <escript/SolverOptions.h>
+
+#include <BelosSolverFactory.hpp>
+#include <BelosTpetraAdapter.hpp>
+
+#include <boost/python/dict.hpp>
+
+using Teuchos::RCP;
+
+namespace bp = boost::python;
+
+namespace esys_trilinos {
+
+template<typename ST>
+RCP<SolverType<ST> > createSolver(const escript::SolverBuddy& sb)
+{
+    using util::extractParamIfSet;
+
+    Belos::SolverFactory<ST, VectorType<ST>, OpType<ST> > factory;
+    RCP<SolverType<ST> > solver;
+    RCP<Teuchos::ParameterList> solverParams = Teuchos::parameterList();
+
+    solverParams->set("Convergence Tolerance", sb.getTolerance());
+    solverParams->set("Maximum Iterations", sb.getIterMax());
+    if (sb.isVerbose()) {
+        solverParams->set("Verbosity", Belos::Errors + Belos::Warnings +
+                Belos::TimingDetails + Belos::StatusTestDetails);
+    }
+
+    escript::SolverOptions method = sb.getSolverMethod();
+    const bp::dict& pyParams = sb.getTrilinosParameters();
+
+    if (method == escript::SO_DEFAULT) {
+        if (sb.isSymmetric()) {
+            method = escript::SO_METHOD_PCG;
+        } else {
+            method = escript::SO_METHOD_GMRES;
+        }
+    }
+
+    switch (method) {
+        case escript::SO_METHOD_BICGSTAB:
+            solver = factory.create("BICGSTAB", solverParams);
+            break;
+        case escript::SO_METHOD_PCG:
+            solver = factory.create("CG", solverParams);
+            break;
+        case escript::SO_METHOD_PRES20:
+            //solverParams->set("Num Blocks", 5);
+            //solverParams->set("Maximum Restarts", 20);
+            solver = factory.create("GMRES", solverParams);
+            break;
+        case escript::SO_METHOD_GMRES:
+            extractParamIfSet<int>("Num Blocks", pyParams, *solverParams);
+            extractParamIfSet<int>("Maximum Restarts", pyParams, *solverParams);
+            extractParamIfSet<std::string>("Orthogonalization", pyParams, *solverParams);
+            solver = factory.create("GMRES", solverParams);
+            break;
+        case escript::SO_METHOD_LSQR:
+            extractParamIfSet<ST>("Condition Limit", pyParams, *solverParams);
+            extractParamIfSet<int>("Term Iter Max", pyParams, *solverParams);
+            extractParamIfSet<ST>("Lambda", pyParams, *solverParams);
+            solverParams->set("Rel Mat Err", sb.getTolerance());
+            solver = factory.create("LSQR", solverParams);
+            break;
+        case escript::SO_METHOD_MINRES:
+            extractParamIfSet<int>("Block Size", pyParams, *solverParams);
+            solver = factory.create("MINRES", solverParams);
+            break;
+        case escript::SO_METHOD_TFQMR:
+            solver = factory.create("TFQMR", solverParams);
+            break;
+        default:
+            throw TrilinosAdapterException("Unsupported solver type requested.");
+    }
+    return solver;
+}
+
+// instantiate our two supported versions
+template
+RCP<SolverType<real_t> > createSolver(const escript::SolverBuddy& sb);
+template
+RCP<SolverType<cplx_t> > createSolver(const escript::SolverBuddy& sb);
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/BelosWrapper.h b/trilinoswrap/src/BelosWrapper.h
new file mode 100644
index 0000000..26d3391
--- /dev/null
+++ b/trilinoswrap/src/BelosWrapper.h
@@ -0,0 +1,44 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_BELOSWRAPPER_H__
+#define __ESYS_TRILINOS_BELOSWRAPPER_H__
+
+#include <trilinoswrap/types.h>
+
+#include <BelosSolverManager.hpp>
+#include <BelosTpetraAdapter.hpp>
+#include <BelosTypes.hpp>
+
+namespace escript {
+    class SolverBuddy;
+}
+
+namespace esys_trilinos {
+
+template<typename ST>
+using ProblemType = Belos::LinearProblem< ST, VectorType<ST>, OpType<ST> >;
+
+template<typename ST>
+using SolverType = Belos::SolverManager< ST, VectorType<ST>, OpType<ST> >;
+
+template<typename ST>
+Teuchos::RCP<SolverType<ST> > createSolver(const escript::SolverBuddy& sb);
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_AMESOS2WRAPPER_H__
+
diff --git a/trilinoswrap/src/BlockCrsMatrixWrapper.cpp b/trilinoswrap/src/BlockCrsMatrixWrapper.cpp
new file mode 100644
index 0000000..d35cce7
--- /dev/null
+++ b/trilinoswrap/src/BlockCrsMatrixWrapper.cpp
@@ -0,0 +1,274 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "BlockCrsMatrixWrapper.h" 
+#include "BelosWrapper.h" 
+#include "PreconditionerFactory.h" 
+#include "TrilinosAdapterException.h" 
+#include "util.h" 
+
+#include <escript/index.h>
+#include <escript/SolverOptions.h>
+
+#include <Kokkos_DefaultNode.hpp>
+#include <Tpetra_DefaultPlatform.hpp>
+#include <Tpetra_Experimental_BlockCrsMatrix_Helpers.hpp> // for writing
+#include <Tpetra_Vector.hpp>
+
+using Teuchos::RCP;
+using Teuchos::rcp;
+using Teuchos::rcpFromRef;
+
+namespace esys_trilinos {
+
+template<typename ST>
+BlockCrsMatrixWrapper<ST>::BlockCrsMatrixWrapper(const_TrilinosGraph_ptr graph,
+                                                 int blocksize) :
+    blockSize(blocksize),
+    mat(*graph, blocksize)
+{
+    // initialize column point map, needed by nullifyRowsAndCols to communicate
+    // remote values
+    colPointMap = BlockVectorType<ST>::makePointMap(*mat.getColMap(), blockSize);
+    maxLocalRow = graph->getRowMap()->getMaxLocalIndex();
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::add(const std::vector<LO>& rowIdx,
+                                    const std::vector<ST>& array)
+{
+    const size_t emSize = rowIdx.size();
+    std::vector<LO> cols(emSize);
+    std::vector<ST> vals(emSize*blockSize*blockSize);
+    for (size_t i = 0; i < emSize; i++) {
+        const LO row = rowIdx[i];
+        if (row <= maxLocalRow) {
+            for (int j = 0; j < emSize; j++) {
+                cols[j] = rowIdx[j];
+                for (int k = 0; k < blockSize; k++) {
+                    for (int m = 0; m < blockSize; m++) {
+                        const size_t srcIdx =
+                            INDEX4(k, m, i, j, blockSize, blockSize, emSize);
+                        const size_t destIdx =
+                            INDEX3(m, k, j, blockSize, blockSize);
+                        vals[destIdx] = array[srcIdx];
+                    }
+                }
+            }
+            mat.sumIntoLocalValues(row, &cols[0], &vals[0], emSize);
+        }
+    }
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::ypAx(const Teuchos::ArrayView<ST>& y,
+                                   const Teuchos::ArrayView<const ST>& x) const
+{
+    typedef VectorType<ST> Vector;
+    RCP<Vector> X = rcp(new Vector(mat.getDomainMap(), x, x.size(), 1));
+    RCP<Vector> Y = rcp(new Vector(mat.getDomainMap(), y, y.size(), 1));
+
+    const ST alpha = Teuchos::ScalarTraits<ST>::one();
+    const ST beta = Teuchos::ScalarTraits<ST>::one();
+
+    // Y = beta*Y + alpha*A*X
+    mat.apply(*X, *Y, Teuchos::NO_TRANS, alpha, beta);
+    Y->get1dCopy(y, y.size());
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::solve(const Teuchos::ArrayView<ST>& x,
+                                      const Teuchos::ArrayView<const ST>& b,
+                                      escript::SolverBuddy& sb) const
+{
+    typedef VectorType<ST> Vector;
+
+    RCP<Vector> X = rcp(new Vector(mat.getDomainMap(), 1));
+    RCP<Vector> B = rcp(new Vector(mat.getRangeMap(), b, b.size(), 1));
+    RCP<const Matrix> A = rcpFromRef(mat);
+
+    if (escript::isDirectSolver(sb.getSolverMethod())) {
+        throw TrilinosAdapterException("Amesos2 does not currently support "
+                                       "block matrices!");
+#if 0
+        RCP<DirectSolverType<Matrix,Vector> > solver(m_direct);
+        if (solver.is_null()) {
+            solver = createDirectSolver<Matrix,Vector>(sb, A, X, B);
+            m_direct = solver;
+            if (sb.isVerbose()) {
+                std::cout << solver->description() << std::endl;
+                std::cout << "Performing symbolic factorization..." << std::flush;
+            }
+            solver->symbolicFactorization();
+            if (sb.isVerbose()) {
+                std::cout << "done\nPerforming numeric factorization..." << std::flush;
+            }
+            solver->numericFactorization();
+            if (sb.isVerbose()) {
+                std::cout << "done\n" << std::flush;
+            }
+        } else {
+            solver->setX(X);
+            solver->setB(B);
+        }
+        if (sb.isVerbose()) {
+            std::cout << "Solving system..." << std::flush;
+        }
+        solver->solve();
+        if (sb.isVerbose()) {
+            std::cout << "done" << std::endl;
+            RCP<Teuchos::FancyOStream> fos(Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)));
+            solver->printTiming(*fos, Teuchos::VERB_HIGH);
+        }
+#endif
+    } else { // iterative solver
+        double t0 = Teuchos::Time::wallTime();
+        RCP<ProblemType<ST> > problem(m_solver);
+        if (problem.is_null()) {
+            problem = rcp(new ProblemType<ST>(A, X, B));
+            m_solver = problem;
+            RCP<OpType<ST> > prec = createPreconditioner<ST>(A, sb);
+            if (!prec.is_null()) {
+                // Trilinos BiCGStab does not support left preconditioners
+                if (sb.getSolverMethod() == escript::SO_METHOD_BICGSTAB)
+                    problem->setRightPrec(prec);
+                else
+                    problem->setLeftPrec(prec);
+            }
+            problem->setHermitian(sb.isSymmetric());
+            problem->setProblem();
+        } else {
+            for (auto t: problem->getTimers()) {
+                t->reset();
+            }
+            problem->setProblem(X, B);
+        }
+
+        double t1 = Teuchos::Time::wallTime();
+        RCP<SolverType<ST> > solver = createSolver<ST>(sb);
+        solver->setProblem(problem);
+        Belos::ReturnType result = solver->solve();
+        double t2 = Teuchos::Time::wallTime();
+        const int numIters = solver->getNumIters();
+        double tol = sb.getTolerance();
+        try {
+            tol = solver->achievedTol();
+        } catch (...) {
+        }
+        if (sb.isVerbose()) {
+            if (result == Belos::Converged) {
+                sb.updateDiagnostics("converged", true);
+                std::cout << "The solver took " << numIters
+                   << " iteration(s) to reach a residual tolerance of "
+                   << tol << "." << std::endl;
+            } else {
+                std::cout << "The solver took " << numIters
+                   << " iteration(s), but did not reach a relative residual "
+                   "tolerance of " << sb.getTolerance() << "." << std::endl;
+            }
+        }
+        double solverTime = 0.;
+        for (auto t: problem->getTimers()) {
+            solverTime += t->totalElapsedTime();
+        }
+        sb.updateDiagnostics("set_up_time", t1-t0);
+        sb.updateDiagnostics("net_time", solverTime);
+        sb.updateDiagnostics("time", t2-t0);
+        sb.updateDiagnostics("num_iter", numIters);
+        sb.updateDiagnostics("residual_norm", tol);
+    }
+    X->get1dCopy(x, x.size());
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::nullifyRowsAndCols(
+                               const Teuchos::ArrayView<const real_t>& rowMask,
+                               const Teuchos::ArrayView<const real_t>& colView,
+                               ST mdv)
+{
+    RCP<VectorType<real_t> > lclCol = rcp(new VectorType<real_t>(
+                               mat.getRangeMap(), colView, colView.size(), 1));
+    RCP<MapType> cpm = rcpFromRef(colPointMap);
+    RCP<VectorType<real_t> > gblCol = rcp(new VectorType<real_t>(cpm, 1));
+
+    const ImportType importer(mat.getRangeMap(), cpm);
+    gblCol->doImport(*lclCol, importer, Tpetra::INSERT);
+    Teuchos::ArrayRCP<const real_t> colMask(gblCol->getData(0));
+    const real_t eps = escript::DataTypes::real_t_eps();
+    const ST zero = Teuchos::ScalarTraits<ST>::zero();
+
+// Can't use OpenMP here as replaceLocalValues() is not thread-safe.
+//#pragma omp parallel for
+    // loop through local row blocks
+    for (LO lrb = 0; lrb < mat.getNodeNumRows(); lrb++) {
+        LO numIndices = 0;
+        const LO* indices;
+        ST* values;
+        mat.getLocalRowView(lrb, indices, values, numIndices);
+        std::vector<GO> cols(numIndices);
+        std::vector<ST> vals(numIndices*blockSize*blockSize);
+        const GO rowblk = mat.getRowMap()->getGlobalElement(lrb);
+        for (LO c = 0; c < numIndices; c++) {
+            // local/global column block
+            const LO lcb = indices[c];
+            const GO colblk = mat.getColMap()->getGlobalElement(lcb);
+            cols[c] = lcb;
+            for (LO ri = 0; ri < blockSize; ri++) {
+                const LO lclrow = lrb * blockSize + ri;
+                const GO row = rowblk * blockSize + ri;
+                for (LO ci = 0; ci < blockSize; ci++) {
+                    const LO lclcol = lcb * blockSize + ci;
+                    const GO col = colblk * blockSize + ci;
+                    const size_t idx = INDEX3(ci, ri, c, blockSize, blockSize);
+                    if (std::abs(rowMask[lclrow]) > eps || std::abs(colMask[lclcol]) > eps) {
+                        vals[idx] = (row==col ? mdv : zero);
+                    } else {
+                        // we need to add full blocks so add current value
+                        vals[idx] = values[idx];
+                    }
+                }
+            }
+        }
+        mat.replaceLocalValues(lrb, &cols[0], &vals[0], numIndices);
+    }
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::saveMM(const std::string& filename) const
+{
+    Teuchos::ParameterList params;
+    // for compatibility with paso, not strictly required.
+    params.set("precision", 15);
+    std::ofstream os(filename);
+    Tpetra::Experimental::blockCrsMatrixWriter<ST,LO,GO,NT>(mat, os, params);
+    os.close();
+}
+
+template<typename ST>
+void BlockCrsMatrixWrapper<ST>::resetValues(bool preserveSolverData)
+{
+    mat.setAllToScalar(static_cast<ST>(0.));
+    if (!preserveSolverData) {
+        m_solver.reset();
+    }
+}
+
+// instantiate
+template class BlockCrsMatrixWrapper<real_t>;
+template class BlockCrsMatrixWrapper<cplx_t>;
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/BlockCrsMatrixWrapper.h b/trilinoswrap/src/BlockCrsMatrixWrapper.h
new file mode 100644
index 0000000..6fa0666
--- /dev/null
+++ b/trilinoswrap/src/BlockCrsMatrixWrapper.h
@@ -0,0 +1,74 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_BLOCKCRSMATRIXWRAPPER_H__
+#define __ESYS_TRILINOS_BLOCKCRSMATRIXWRAPPER_H__
+
+#include <trilinoswrap/AbstractMatrixWrapper.h>
+#include <trilinoswrap/BelosWrapper.h>
+
+#include <Tpetra_Experimental_BlockCrsMatrix.hpp>
+
+namespace esys_trilinos {
+
+template<typename ST>
+class BlockCrsMatrixWrapper : public AbstractMatrixWrapper<ST>
+{
+    typedef Tpetra::Experimental::BlockCrsMatrix<ST,LO,GO,NT> Matrix;
+
+public:
+    /**
+       \brief
+       Creates a new Trilinos Block CRS matrix wrapper using a compatible
+       fill-complete Trilinos matrix graph and given block size.
+    */
+    BlockCrsMatrixWrapper(const_TrilinosGraph_ptr graph, int blocksize);
+
+    void resetValues(bool preserveSolverData = false);
+
+    /// notifies the matrix that changes are about to happen.
+    inline void resumeFill() {}
+
+    /// notifies the matrix that a set of changes has occured.
+    inline void fillComplete(bool /*localOnly*/) {}
+
+    void nullifyRowsAndCols(const Teuchos::ArrayView<const real_t>& rowMask,
+                            const Teuchos::ArrayView<const real_t>& colView,
+                            ST mdv);
+
+    void add(const std::vector<LO>& rowIndex, const std::vector<ST>& array);
+
+    void ypAx(const Teuchos::ArrayView<ST>& y,
+              const Teuchos::ArrayView<const ST>& x) const;
+
+    void solve(const Teuchos::ArrayView<ST>& x,
+               const Teuchos::ArrayView<const ST>& b,
+               escript::SolverBuddy& sb) const;
+
+    void saveMM(const std::string& filename) const;
+
+private:
+    int blockSize;
+    Matrix mat;
+    mutable Teuchos::RCP<ProblemType<ST> > m_solver;
+    MapType colPointMap;
+    LO maxLocalRow;
+};
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_BLOCKCRSMATRIXWRAPPER_H__
+
diff --git a/trilinoswrap/src/CrsMatrixWrapper.cpp b/trilinoswrap/src/CrsMatrixWrapper.cpp
new file mode 100644
index 0000000..e653463
--- /dev/null
+++ b/trilinoswrap/src/CrsMatrixWrapper.cpp
@@ -0,0 +1,281 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "CrsMatrixWrapper.h" 
+#include "Amesos2Wrapper.h" 
+#include "BelosWrapper.h" 
+#include "PreconditionerFactory.h" 
+#include "TrilinosAdapterException.h" 
+#include "util.h" 
+
+#include <escript/SolverOptions.h>
+
+#include <Kokkos_DefaultNode.hpp>
+#include <MatrixMarket_Tpetra.hpp>
+#include <MueLu_CreateTpetraPreconditioner.hpp>
+
+#include <Tpetra_DefaultPlatform.hpp>
+#include <Tpetra_Vector.hpp>
+
+using Teuchos::RCP;
+using Teuchos::rcp;
+using Teuchos::rcpFromRef;
+
+namespace esys_trilinos {
+
+template<typename ST>
+CrsMatrixWrapper<ST>::CrsMatrixWrapper(const_TrilinosGraph_ptr graph) :
+    mat(graph),
+    m_resetCalled(false)
+{
+    mat.fillComplete();
+    maxLocalRow = graph->getRowMap()->getMaxLocalIndex();
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::fillComplete(bool localOnly)
+{
+    RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    params->set("No Nonlocal Changes", localOnly);
+    mat.fillComplete(params);
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::add(const std::vector<LO>& rowIdx,
+                               const std::vector<ST>& array)
+{
+    const size_t emSize = rowIdx.size();
+    std::vector<LO> cols(emSize);
+    std::vector<ST> vals(emSize);
+    for (size_t i = 0; i < emSize; i++) {
+        const LO row = rowIdx[i];
+        if (row <= maxLocalRow) {
+            for (int j = 0; j < emSize; j++) {
+                const LO col = rowIdx[j];
+                cols[j] = col;
+                const size_t srcIdx = j * emSize + i;
+                vals[j] = array[srcIdx];
+            }
+            mat.sumIntoLocalValues(row, cols, vals);
+        }
+    }
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::ypAx(const Teuchos::ArrayView<ST>& y,
+                                const Teuchos::ArrayView<const ST>& x) const
+{
+    RCP<VectorType<ST> > X = rcp(new VectorType<ST>(mat.getRowMap(), x, x.size(), 1));
+    RCP<VectorType<ST> > Y = rcp(new VectorType<ST>(mat.getRowMap(), y, y.size(), 1));
+
+    const ST alpha = Teuchos::ScalarTraits<ST>::one();
+    const ST beta = Teuchos::ScalarTraits<ST>::one();
+
+    // Y = beta*Y + alpha*A*X
+    mat.apply(*X, *Y, Teuchos::NO_TRANS, alpha, beta);
+    Y->get1dCopy(y, y.size());
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::solve(const Teuchos::ArrayView<ST>& x,
+                                 const Teuchos::ArrayView<const ST>& b,
+                                 escript::SolverBuddy& sb) const
+{
+    typedef VectorType<ST> Vector;
+
+    RCP<Vector> X = rcp(new Vector(mat.getDomainMap(), 1));
+    RCP<Vector> B = rcp(new Vector(mat.getRangeMap(), b, b.size(), 1));
+    RCP<const Matrix> A = rcpFromRef(mat);
+
+    if (escript::isDirectSolver(sb.getSolverMethod())) {
+        RCP<DirectSolverType<Matrix,Vector> > solver(m_direct);
+        if (solver.is_null()) {
+            solver = createDirectSolver<Matrix,Vector>(sb, A, X, B);
+            m_direct = solver;
+            if (sb.isVerbose()) {
+                std::cout << "Using " << solver->description() << std::endl;
+                std::cout << "Performing symbolic factorization..." << std::flush;
+            }
+            solver->symbolicFactorization();
+            if (sb.isVerbose()) {
+                std::cout << "done\nPerforming numeric factorization..." << std::flush;
+            }
+            solver->numericFactorization();
+            if (sb.isVerbose()) {
+                std::cout << "done\n" << std::flush;
+            }
+        } else {
+            if (sb.isVerbose()) {
+                std::cout << "Using " << solver->description() << std::endl;
+            }
+            if (m_resetCalled) {
+                // matrix structure never changes
+                solver->setA(A, Amesos2::SYMBFACT);
+                m_resetCalled = false;
+            }
+            solver->setX(X);
+            solver->setB(B);
+        }
+        if (sb.isVerbose()) {
+            std::cout << "Solving system..." << std::flush;
+        }
+        solver->solve();
+        if (sb.isVerbose()) {
+            std::cout << "done" << std::endl;
+            RCP<Teuchos::FancyOStream> fos(Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)));
+            solver->printTiming(*fos, Teuchos::VERB_HIGH);
+        }
+
+    } else { // iterative solver
+        double t0 = Teuchos::Time::wallTime();
+        RCP<ProblemType<ST> > problem(m_solver);
+        if (problem.is_null()) {
+            problem = rcp(new ProblemType<ST>(A, X, B));
+            m_solver = problem;
+            RCP<OpType<ST> > prec = createPreconditioner<ST>(A, sb);
+            m_preconditioner = prec;
+            if (!prec.is_null()) {
+                // Trilinos BiCGStab does not support left preconditioners
+                if (sb.getSolverMethod() == escript::SO_METHOD_BICGSTAB)
+                    problem->setRightPrec(prec);
+                else
+                    problem->setLeftPrec(prec);
+            }
+            problem->setHermitian(sb.isSymmetric());
+            problem->setProblem();
+        } else {
+            for (auto t: problem->getTimers()) {
+                t->reset();
+            }
+            if (m_resetCalled) {
+                // special case for MueLu preconditioner - call Reuse...
+                // which honours the "reuse: type" parameter.
+                RCP<MueLu::TpetraOperator<ST,LO,GO,NT> > mlOp =
+                    Teuchos::rcp_dynamic_cast<MueLu::TpetraOperator<ST,LO,GO,NT> >(m_preconditioner);
+                if (mlOp.get()) {
+                    RCP<Matrix> A_(Teuchos::rcp_const_cast<Matrix>(A));
+                    MueLu::ReuseTpetraPreconditioner(A_, *mlOp);
+                }
+            }
+            problem->setProblem(X, B);
+        }
+
+        double t1 = Teuchos::Time::wallTime();
+        RCP<SolverType<ST> > solver = createSolver<ST>(sb);
+        if (sb.isVerbose()) {
+            std::cout << "Using " << solver->description() << std::endl;
+        }
+        solver->setProblem(problem);
+        Belos::ReturnType result = solver->solve();
+        double t2 = Teuchos::Time::wallTime();
+        const int numIters = solver->getNumIters();
+        double tol = sb.getTolerance();
+        try {
+            tol = solver->achievedTol();
+        } catch (...) {
+        }
+        if (sb.isVerbose()) {
+            if (result == Belos::Converged) {
+                sb.updateDiagnostics("converged", true);
+                std::cout << "The solver took " << numIters
+                   << " iteration(s) to reach a residual tolerance of "
+                   << tol << "." << std::endl;
+            } else {
+                std::cout << "The solver took " << numIters
+                   << " iteration(s), but did not reach a relative residual "
+                   "tolerance of " << sb.getTolerance() << "." << std::endl;
+            }
+        }
+        double solverTime = 0.;
+        for (auto t: problem->getTimers()) {
+            solverTime += t->totalElapsedTime();
+        }
+        sb.updateDiagnostics("set_up_time", t1-t0);
+        sb.updateDiagnostics("net_time", solverTime);
+        sb.updateDiagnostics("time", t2-t0);
+        sb.updateDiagnostics("num_iter", numIters);
+        sb.updateDiagnostics("residual_norm", tol);
+    }
+    X->get1dCopy(x, x.size());
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::nullifyRowsAndCols(
+                               const Teuchos::ArrayView<const real_t>& rowMask,
+                               const Teuchos::ArrayView<const real_t>& colView,
+                               ST mdv)
+{
+    const_TrilinosMap_ptr rowMap(mat.getRowMap());
+    RCP<VectorType<real_t> > lclCol = rcp(new VectorType<real_t>(rowMap,
+                                                  colView, colView.size(), 1));
+    RCP<VectorType<real_t> > gblCol = rcp(new VectorType<real_t>(
+                                                          mat.getColMap(), 1));
+
+    const ImportType importer(rowMap, mat.getColMap());
+    gblCol->doImport(*lclCol, importer, Tpetra::INSERT);
+    Teuchos::ArrayRCP<const real_t> colMask(gblCol->getData(0));
+    const ST zero = Teuchos::ScalarTraits<ST>::zero();
+
+    resumeFill();
+// Can't use OpenMP here as replaceLocalValues() is not thread-safe.
+//#pragma omp parallel for
+    for (LO lclrow = 0; lclrow < mat.getNodeNumRows(); lclrow++) {
+        Teuchos::ArrayView<const LO> indices;
+        Teuchos::ArrayView<const ST> values;
+        std::vector<GO> cols;
+        std::vector<ST> vals;
+        mat.getLocalRowView(lclrow, indices, values);
+        GO row = rowMap->getGlobalElement(lclrow);
+        for (size_t c = 0; c < indices.size(); c++) {
+            const LO lclcol = indices[c];
+            const GO col = mat.getColMap()->getGlobalElement(lclcol);
+            if (rowMask[lclrow] != 0. || colMask[lclcol] != 0.) {
+                cols.push_back(lclcol);
+                vals.push_back(row==col ? mdv : zero);
+            }
+        }
+        if (cols.size() > 0)
+            mat.replaceLocalValues(lclrow, cols, vals);
+    }
+    fillComplete(true);
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::saveMM(const std::string& filename) const
+{
+    Tpetra::MatrixMarket::Writer<Matrix>::writeSparseFile(filename, rcpFromRef(mat));
+}
+
+template<typename ST>
+void CrsMatrixWrapper<ST>::resetValues(bool preserveSolverData)
+{
+    resumeFill();
+    mat.setAllToScalar(static_cast<ST>(0.));
+    fillComplete(true);
+    if (!preserveSolverData) {
+        m_solver.reset();
+        m_preconditioner.reset();
+    }
+    m_resetCalled = true;
+}
+
+
+// instantiate the supported variants
+template class CrsMatrixWrapper<real_t>;
+template class CrsMatrixWrapper<cplx_t>;
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/CrsMatrixWrapper.h b/trilinoswrap/src/CrsMatrixWrapper.h
new file mode 100644
index 0000000..ca98110
--- /dev/null
+++ b/trilinoswrap/src/CrsMatrixWrapper.h
@@ -0,0 +1,79 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_CRSMATRIXWRAPPER_H__
+#define __ESYS_TRILINOS_CRSMATRIXWRAPPER_H__
+
+#include <trilinoswrap/AbstractMatrixWrapper.h>
+#include <trilinoswrap/Amesos2Wrapper.h>
+#include <trilinoswrap/BelosWrapper.h>
+
+#include <Tpetra_CrsMatrix.hpp>
+
+namespace esys_trilinos {
+
+template<typename ST>
+class CrsMatrixWrapper : public AbstractMatrixWrapper<ST>
+{
+public:
+    typedef Tpetra::CrsMatrix<ST,LO,GO,NT> Matrix;
+
+    /**
+       \brief
+       Creates a new Trilinos CRS matrix adapter using a compatible
+       fill-complete Trilinos matrix graph.
+    */
+    CrsMatrixWrapper(const_TrilinosGraph_ptr graph);
+
+    void resetValues(bool preserveSolverData = false);
+
+    /// notifies the matrix that changes are about to happen.
+    inline void resumeFill()
+    {
+        mat.resumeFill();
+    }
+
+    /// notifies the matrix that a set of changes has occured.
+    void fillComplete(bool localOnly);
+
+    void nullifyRowsAndCols(const Teuchos::ArrayView<const real_t>& rowMask,
+                            const Teuchos::ArrayView<const real_t>& colView,
+                            ST mdv);
+
+    void add(const std::vector<LO>& rowIndex, const std::vector<ST>& array);
+
+    void ypAx(const Teuchos::ArrayView<ST>& y,
+              const Teuchos::ArrayView<const ST>& x) const;
+
+    void solve(const Teuchos::ArrayView<ST>& x,
+               const Teuchos::ArrayView<const ST>& b,
+               escript::SolverBuddy& sb) const;
+
+    void saveMM(const std::string& filename) const;
+
+protected:
+    Matrix mat;
+    mutable bool m_resetCalled;
+    mutable Teuchos::RCP<ProblemType<ST> > m_solver;
+    mutable Teuchos::RCP<OpType<ST> > m_preconditioner;
+    mutable Teuchos::RCP<DirectSolverType<Matrix,VectorType<ST> > > m_direct;
+    LO maxLocalRow;
+};
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_CRSMATRIXWRAPPER_H__
+
diff --git a/trilinoswrap/src/PreconditionerFactory.cpp b/trilinoswrap/src/PreconditionerFactory.cpp
new file mode 100644
index 0000000..6ddf5a7
--- /dev/null
+++ b/trilinoswrap/src/PreconditionerFactory.cpp
@@ -0,0 +1,195 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include <trilinoswrap/PreconditionerFactory.h>
+#include <trilinoswrap/TrilinosAdapterException.h>
+#include <trilinoswrap/util.h>
+
+#include <escript/SolverOptions.h>
+
+#include <Ifpack2_Factory.hpp>
+#if 1 //ndef ESYS_INDEXTYPE_LONG
+#include <MueLu_CreateTpetraPreconditioner.hpp>
+#endif
+
+#include <boost/python/dict.hpp>
+
+using Teuchos::RCP;
+
+namespace bp = boost::python;
+
+namespace esys_trilinos {
+
+template<typename ST>
+RCP<OpType<ST> > createPreconditioner(RCP<const MatrixType<ST> > mat,
+                                      const escript::SolverBuddy& sb)
+{
+    using util::extractParamIfSet;
+
+    typedef MatrixType<ST> Matrix;
+
+    RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    Ifpack2::Factory factory;
+    RCP<OpType<ST> > prec;
+    RCP<Ifpack2::Preconditioner<ST,LO,GO,NT> > ifprec;
+    const bp::dict& pyParams = sb.getTrilinosParameters();
+
+    switch (sb.getPreconditioner()) {
+        case escript::SO_PRECONDITIONER_NONE:
+            break;
+        case escript::SO_PRECONDITIONER_AMG:
+            {
+#if 1 //ndef ESYS_INDEXTYPE_LONG
+                params->set("max levels", sb.getLevelMax());
+                params->set("number of equations", 1);
+                params->set("cycle type", sb.getCycleType()==1 ? "V" : "W");
+                params->set("problem: symmetric", sb.isSymmetric());
+                params->set("verbosity", sb.isVerbose()? "high":"none");
+                // override parameters if set explicitly for trilinos
+                // The set of available parameters is documented in the MueLu
+                // user's guide (PDF download)
+                // NOTE: passing sub parameter lists is not supported via
+                // python due to escript's SolverBuddy constraints.
+                // Use the 'xml parameter file' option instead.
+                extractParamIfSet<std::string>("problem: type", pyParams, *params);
+                extractParamIfSet<std::string>("verbosity", pyParams, *params);
+                extractParamIfSet<int>("number of equations", pyParams, *params);
+                extractParamIfSet<int>("max levels", pyParams, *params);
+                extractParamIfSet<std::string>("cycle type", pyParams, *params);
+                extractParamIfSet<bool>("problem: symmetric", pyParams, *params);
+                extractParamIfSet<std::string>("xml parameter file", pyParams, *params);
+                extractParamIfSet<std::string>("smoother: pre or post", pyParams, *params);
+                extractParamIfSet<std::string>("smoother: type", pyParams, *params);
+                extractParamIfSet<std::string>("smoother: pre type", pyParams, *params);
+                extractParamIfSet<std::string>("smoother: post type", pyParams, *params);
+                extractParamIfSet<int>("coarse: max size", pyParams, *params);
+                extractParamIfSet<std::string>("coarse: type", pyParams, *params);
+                extractParamIfSet<std::string>("aggregation: type", pyParams, *params);
+                extractParamIfSet<std::string>("aggregation: ordering", pyParams, *params);
+                extractParamIfSet<std::string>("aggregation: drop scheme", pyParams, *params);
+                extractParamIfSet<ST>("aggregation: drop tol", pyParams, *params);
+                extractParamIfSet<int>("aggregation: min agg size", pyParams, *params);
+                extractParamIfSet<int>("aggregation: max agg size", pyParams, *params);
+                extractParamIfSet<ST>("aggregation: Dirichlet threshold", pyParams, *params);
+                extractParamIfSet<bool>("aggregation: export visualization data", pyParams, *params);
+                extractParamIfSet<std::string>("aggregation: output filename", pyParams, *params);
+                extractParamIfSet<int>("aggregation: output file: time step", pyParams, *params);
+                extractParamIfSet<int>("aggregation: output file: iter", pyParams, *params);
+                extractParamIfSet<std::string>("aggregation: output file: agg style", pyParams, *params);
+                extractParamIfSet<bool>("aggregation: output file: fine graph edges", pyParams, *params);
+                extractParamIfSet<bool>("aggregation: output file: coarse graph edges", pyParams, *params);
+                extractParamIfSet<bool>("aggregation: output file: build colormap", pyParams, *params);
+                extractParamIfSet<bool>("repartition: enable", pyParams, *params);
+                extractParamIfSet<std::string>("repartition: partitioner", pyParams, *params);
+                extractParamIfSet<int>("repartition: start level", pyParams, *params);
+                extractParamIfSet<int>("repartition: min rows per proc", pyParams, *params);
+                extractParamIfSet<ST>("repartition: max imbalance", pyParams, *params);
+                extractParamIfSet<bool>("repartition: remap parts", pyParams, *params);
+                extractParamIfSet<bool>("repartition: rebalance P and R", pyParams, *params);
+                extractParamIfSet<std::string>("multigrid algorithm", pyParams, *params);
+                extractParamIfSet<int>("semicoarsen: coarsen rate", pyParams, *params);
+                extractParamIfSet<ST>("sa: damping factor", pyParams, *params);
+                extractParamIfSet<bool>("sa: use filtered matrix", pyParams, *params);
+                extractParamIfSet<bool>("filtered matrix: use lumping", pyParams, *params);
+                extractParamIfSet<bool>("filtered matrix: reuse eigenvalue", pyParams, *params);
+                extractParamIfSet<std::string>("emin: iterative method", pyParams, *params);
+                extractParamIfSet<int>("emin: num iterations", pyParams, *params);
+                extractParamIfSet<int>("emin: num reuse iterations", pyParams, *params);
+                extractParamIfSet<std::string>("emin: pattern", pyParams, *params);
+                extractParamIfSet<int>("emin: pattern order", pyParams, *params);
+                extractParamIfSet<std::string>("reuse: type", pyParams, *params);
+                extractParamIfSet<bool>("print initial parameters", pyParams, *params);
+                extractParamIfSet<bool>("print unused parameters", pyParams, *params);
+                extractParamIfSet<bool>("transpose: use implicit", pyParams, *params);
+                RCP<OpType<ST> > A(Teuchos::rcp_const_cast<Matrix>(mat));
+                prec = MueLu::CreateTpetraPreconditioner(A, *params);
+#else
+                throw escript::ValueError("MueLu (AMG) is incompatible with index type long!");
+#endif
+            }
+            break;
+        case escript::SO_PRECONDITIONER_ILUT:
+            ifprec = factory.create<const Matrix>("ILUT", mat);
+            params->set("fact: drop tolerance", sb.getDropTolerance());
+            params->set("fact: relax value", sb.getRelaxationFactor());
+            // override if set explicitly for trilinos
+            extractParamIfSet<ST>("fact: relax value", pyParams, *params);
+            extractParamIfSet<ST>("fact: drop tolerance", pyParams, *params);
+            extractParamIfSet<int>("fact: ilut level-of-fill", pyParams, *params);
+            extractParamIfSet<ST>("fact: absolute threshold", pyParams, *params);
+            extractParamIfSet<ST>("fact: relative threshold", pyParams, *params);
+            break;
+        case escript::SO_PRECONDITIONER_GAUSS_SEIDEL:
+        case escript::SO_PRECONDITIONER_JACOBI:
+          {
+            ifprec = factory.create<const Matrix>("RELAXATION", mat);
+            if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {
+                params->set("relaxation: type", "Jacobi");
+            } else {
+                params->set("relaxation: type", (sb.isSymmetric() ?
+                            "Symmetric Gauss-Seidel" : "Gauss-Seidel"));
+            }
+            params->set("relaxation: sweeps", sb.getNumSweeps());
+            const ST fac = static_cast<ST>(sb.getRelaxationFactor());
+            params->set("relaxation: damping factor", fac);
+            // override if set explicitly for trilinos
+            extractParamIfSet<int>("relaxation: sweeps", pyParams, *params);
+            extractParamIfSet<ST>("relaxation: damping factor", pyParams, *params);
+            extractParamIfSet<ST>("relaxation: min diagonal value", pyParams, *params);
+            extractParamIfSet<bool>("relaxation: zero starting solution", pyParams, *params);
+            extractParamIfSet<bool>("relaxation: backward mode", pyParams, *params);
+            break;
+          }
+        case escript::SO_PRECONDITIONER_ILU0: // to avoid test failures
+        case escript::SO_PRECONDITIONER_RILU:
+            if (dynamic_cast<const Tpetra::Experimental::BlockCrsMatrix<ST,LO,GO,NT>* >(mat.get())) {
+                ifprec = factory.create<const Matrix>("RBILUK", mat);
+            } else {
+                ifprec = factory.create<const Matrix>("RILUK", mat);
+            }
+            params->set("fact: relax value", sb.getRelaxationFactor());
+            // override if set explicitly for trilinos
+            extractParamIfSet<ST>("fact: relax value", pyParams, *params);
+            extractParamIfSet<int>("fact: iluk level-of-fill", pyParams, *params);
+            extractParamIfSet<int>("fact: iluk level-of-overlap", pyParams, *params);
+            extractParamIfSet<ST>("fact: absolute threshold", pyParams, *params);
+            extractParamIfSet<ST>("fact: relative threshold", pyParams, *params);
+            break;
+        default:
+            throw escript::ValueError("Unsupported preconditioner requested.");
+    }
+    if (!ifprec.is_null()) {
+        ifprec->setParameters(*params);
+        ifprec->initialize();
+        ifprec->compute();
+        prec = ifprec;
+    }
+    return prec;
+}
+
+// instantiate our two supported versions
+typedef MatrixType<real_t> RealMatrix;
+typedef MatrixType<cplx_t> ComplexMatrix;
+
+template
+RCP<RealOperator> createPreconditioner<real_t>(RCP<const RealMatrix> mat,
+                                               const escript::SolverBuddy& sb);
+template
+RCP<ComplexOperator> createPreconditioner<cplx_t>(RCP<const ComplexMatrix> mat,
+                                               const escript::SolverBuddy& sb);
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/PreconditionerFactory.h b/trilinoswrap/src/PreconditionerFactory.h
new file mode 100644
index 0000000..bf90c8c
--- /dev/null
+++ b/trilinoswrap/src/PreconditionerFactory.h
@@ -0,0 +1,40 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_PRECONDITIONERFACTORY_H__
+#define __ESYS_TRILINOS_PRECONDITIONERFACTORY_H__
+
+#include <trilinoswrap/types.h>
+
+/// Wrapper for Ifpack2 and MueLu
+
+namespace escript {
+    class SolverBuddy;
+}
+
+namespace esys_trilinos {
+
+/// creates a preconditioner (Operator) for input matrix A using options in sb.
+/// ST is the scalar type used by the matrix.
+template<typename ST>
+Teuchos::RCP<OpType<ST> > createPreconditioner(
+                                      Teuchos::RCP<const MatrixType<ST> > A,
+                                      const escript::SolverBuddy& sb);
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_PRECONDITIONERFACTORY_H__
+
diff --git a/trilinoswrap/src/SConscript b/trilinoswrap/src/SConscript
new file mode 100644
index 0000000..bb7bb3f
--- /dev/null
+++ b/trilinoswrap/src/SConscript
@@ -0,0 +1,61 @@
+##############################################################################
+#
+# Copyright (c) 2003-2016 by The University of Queensland
+# http://www.uq.edu.au
+#
+# Primary Business: Queensland, Australia
+# Licensed under the Apache License, version 2.0
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+# Development 2012-2013 by School of Earth Sciences
+# Development from 2014 by Centre for Geoscience Computing (GeoComp)
+#
+##############################################################################
+
+Import('*')
+
+module_name = 'trilinoswrap'
+
+sources = """
+  Amesos2Wrapper.cpp
+  BelosWrapper.cpp
+  BlockCrsMatrixWrapper.cpp
+  CrsMatrixWrapper.cpp
+  PreconditionerFactory.cpp
+  TrilinosMatrixAdapter.cpp
+  UnrolledBlockCrsMatrixWrapper.cpp
+""".split()
+
+headers = """
+  AbstractMatrixWrapper.h
+  Amesos2Wrapper.h
+  BelosWrapper.h
+  BlockCrsMatrixWrapper.h
+  CrsMatrixWrapper.h
+  PreconditionerFactory.h
+  TrilinosAdapterException.h
+  TrilinosMatrixAdapter.h
+  UnrolledBlockCrsMatrixWrapper.h
+  types.h
+  util.h
+""".split()
+
+local_env = env.Clone()
+
+# collect dependencies for other modules
+trilinoswraplibs = env['escript_libs'] + env['trilinos_libs']
+
+local_env.PrependUnique(LIBS = trilinoswraplibs)
+
+env['trilinoswrap_libs'] = [module_name] + trilinoswraplibs
+
+include_path = Dir(module_name, local_env['incinstall'])
+hdr_inst = local_env.Install(include_path, headers)
+
+lib = local_env.SharedLibrary(module_name, sources)
+lib_inst = local_env.Install(local_env['libinstall'], lib)
+
+build = env.Alias('build_trilinoswrap', [hdr_inst, lib])
+env.Alias('install_trilinoswrap', [build, lib_inst])
+
diff --git a/escriptcore/test/DataAlgorithmAdapterTestCase.h b/trilinoswrap/src/TrilinosAdapterException.h
similarity index 61%
rename from escriptcore/test/DataAlgorithmAdapterTestCase.h
rename to trilinoswrap/src/TrilinosAdapterException.h
index 400d723..1f435f5 100644
--- a/escriptcore/test/DataAlgorithmAdapterTestCase.h
+++ b/trilinoswrap/src/TrilinosAdapterException.h
@@ -14,24 +14,22 @@
 *
 *****************************************************************************/
 
+#ifndef __ESYS_TRILINOSADAPTEREXCEPTION_H__
+#define __ESYS_TRILINOSADAPTEREXCEPTION_H__
 
-#if !defined DataAlgorithmAdapterTestCase_20040715_H
-#define DataAlgorithmAdapterTestCase_20040715_H
+#include <escript/EsysException.h>
 
-#include <cppunit/TestFixture.h>
-#include <cppunit/TestSuite.h>
+namespace esys_trilinos {
 
-#define REL_TOL ((double)1.e-10)
-
-class DataAlgorithmAdapterTestCase : public CppUnit::TestFixture
+class TrilinosAdapterException : public escript::EsysException
 {
 public:
-  void testAll();
-  void testAlgorithm();
-  void testDpAlgorithm();
-
-  static CppUnit::TestSuite* suite();
+    TrilinosAdapterException(const std::string& str)
+        : escript::EsysException(str) {}
 };
 
-#endif
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOSADAPTEREXCEPTION_H__
 
diff --git a/trilinoswrap/src/TrilinosMatrixAdapter.cpp b/trilinoswrap/src/TrilinosMatrixAdapter.cpp
new file mode 100644
index 0000000..edc75c0
--- /dev/null
+++ b/trilinoswrap/src/TrilinosMatrixAdapter.cpp
@@ -0,0 +1,197 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "TrilinosMatrixAdapter.h" 
+#include "BlockCrsMatrixWrapper.h" 
+#include "CrsMatrixWrapper.h" 
+#include "TrilinosAdapterException.h" 
+#include "UnrolledBlockCrsMatrixWrapper.h" 
+#include "util.h" 
+
+#include <escript/index.h>
+#include <escript/Data.h>
+#include <escript/FunctionSpaceFactory.h>
+#include <escript/SolverOptions.h>
+
+namespace bp = boost::python;
+using Teuchos::rcp;
+
+namespace esys_trilinos {
+
+TrilinosMatrixAdapter::TrilinosMatrixAdapter(escript::JMPI mpiInfo,
+        int blocksize, const escript::FunctionSpace& fs,
+        const_TrilinosGraph_ptr graph, bool isComplex, bool unroll) :
+    AbstractSystemMatrix(blocksize, fs, blocksize, fs),
+    m_mpiInfo(mpiInfo),
+    m_isComplex(isComplex)
+{
+    if (isComplex) {
+        if (blocksize == 1) {
+            cmat = rcp(new CrsMatrixWrapper<cplx_t>(graph));
+        } else if (unroll) {
+            const_TrilinosGraph_ptr newGraph(util::unrollCrsGraph(graph, blocksize));
+            cmat = rcp(new UnrolledBlockCrsMatrixWrapper<cplx_t>(newGraph, blocksize));
+        } else {
+            cmat = rcp(new BlockCrsMatrixWrapper<cplx_t>(graph, blocksize));
+        }
+    } else {
+        if (blocksize == 1) {
+            mat = rcp(new CrsMatrixWrapper<real_t>(graph));
+        } else if (unroll) {
+            const_TrilinosGraph_ptr newGraph(util::unrollCrsGraph(graph, blocksize));
+            mat = rcp(new UnrolledBlockCrsMatrixWrapper<real_t>(newGraph, blocksize));
+        } else {
+            mat = rcp(new BlockCrsMatrixWrapper<real_t>(graph, blocksize));
+        }
+    }
+}
+
+template<>
+void TrilinosMatrixAdapter::add<real_t>(const std::vector<LO>& rowIdx,
+                                        const std::vector<real_t>& array)
+{
+    if (m_isComplex) {
+        throw escript::ValueError("Please use complex array to add to complex "
+                                  "matrix!");
+    } else {
+        (*mat).add(rowIdx, array);
+    }
+}
+
+template<>
+void TrilinosMatrixAdapter::add<cplx_t>(const std::vector<LO>& rowIdx,
+                                        const std::vector<cplx_t>& array)
+{
+    if (m_isComplex) {
+        (*cmat).add(rowIdx, array);
+    } else {
+        throw escript::ValueError("Please use real-valued array to add to "
+                                  "real-valued matrix!");
+    }
+}
+
+void TrilinosMatrixAdapter::ypAx(escript::Data& y, escript::Data& x) const
+{
+    if (x.getDataPointSize() != getBlockSize()) {
+        throw TrilinosAdapterException("matrix vector product: block size "
+                        "does not match the number of components in input.");
+    } else if (y.getDataPointSize() != getBlockSize()) {
+        throw TrilinosAdapterException("matrix vector product: block size "
+                        "does not match the number of components in output.");
+    } else if (x.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw TrilinosAdapterException("matrix vector product: matrix "
+                   "function space and function space of input don't match.");
+    } else if (y.getFunctionSpace() != getRowFunctionSpace()) {
+        throw TrilinosAdapterException("matrix vector product: matrix "
+                  "function space and function space of output don't match.");
+    } else if (y.isComplex() != m_isComplex || x.isComplex() != m_isComplex) {
+        throw escript::ValueError("matrix vector product: matrix complexity "
+                  "must match vector complexity!");
+    }
+
+    // expand data objects
+    x.expand();
+    y.expand();
+    y.requireWrite();
+
+    if (m_isComplex) {
+        const Teuchos::ArrayView<const cplx_t> xView(x.getSampleDataRO(0,
+                        cplx_t(0)), x.getNumDataPoints()*x.getDataPointSize());
+        const Teuchos::ArrayView<cplx_t> yView(y.getSampleDataRW(0, cplx_t(0)),
+                                    y.getNumDataPoints()*y.getDataPointSize());
+        cmat->ypAx(yView, xView);
+    } else {
+        const Teuchos::ArrayView<const real_t> xView(x.getSampleDataRO(0),
+                                    x.getNumDataPoints()*x.getDataPointSize());
+        const Teuchos::ArrayView<real_t> yView(y.getSampleDataRW(0),
+                                    y.getNumDataPoints()*y.getDataPointSize());
+        mat->ypAx(yView, xView);
+    }
+}
+
+void TrilinosMatrixAdapter::setToSolution(escript::Data& out, escript::Data& in,
+                                 bp::object& options) const
+{
+    if (out.getDataPointSize() != getBlockSize()) {
+        throw TrilinosAdapterException("solve: block size does not match the number of components of solution.");
+    } else if (in.getDataPointSize() != getBlockSize()) {
+        throw TrilinosAdapterException("solve: block size does not match the number of components of right hand side.");
+    } else if (out.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw TrilinosAdapterException("solve: matrix function space and function space of solution don't match.");
+    } else if (in.getFunctionSpace() != getRowFunctionSpace()) {
+        throw TrilinosAdapterException("solve: matrix function space and function space of right hand side don't match.");
+    } else if (in.isComplex() != m_isComplex || out.isComplex() != m_isComplex) {
+        throw escript::ValueError("solve: matrix complexity must match vector "
+                                  "complexity!");
+    }
+
+    options.attr("resetDiagnostics")();
+    escript::SolverBuddy& sb = bp::extract<escript::SolverBuddy&>(options);
+    out.expand();
+    out.requireWrite();
+    in.expand();
+
+    if (m_isComplex) {
+        const Teuchos::ArrayView<const cplx_t> bView(in.getSampleDataRO(0,
+                      cplx_t(0)), in.getNumDataPoints()*in.getDataPointSize());
+        const Teuchos::ArrayView<cplx_t> outView(out.getSampleDataRW(0,
+                    cplx_t(0)), out.getNumDataPoints()*out.getDataPointSize());
+        cmat->solve(outView, bView, sb);
+
+    } else {
+        const Teuchos::ArrayView<const real_t> bView(in.getSampleDataRO(0),
+                                  in.getNumDataPoints()*in.getDataPointSize());
+        const Teuchos::ArrayView<real_t> outView(out.getSampleDataRW(0),
+                                out.getNumDataPoints()*out.getDataPointSize());
+        mat->solve(outView, bView, sb);
+    }
+}
+
+void TrilinosMatrixAdapter::nullifyRowsAndCols(escript::Data& row_q,
+                                               escript::Data& col_q,
+                                               double mdv)
+{
+    if (col_q.getDataPointSize() != getColumnBlockSize()) {
+        throw TrilinosAdapterException("nullifyRowsAndCols: column block size does not match the number of components of column mask.");
+    } else if (row_q.getDataPointSize() != getRowBlockSize()) {
+        throw TrilinosAdapterException("nullifyRowsAndCols: row block size does not match the number of components of row mask.");
+    } else if (col_q.getFunctionSpace() != getColumnFunctionSpace()) {
+        throw TrilinosAdapterException("nullifyRowsAndCols: column function space and function space of column mask don't match.");
+    } else if (row_q.getFunctionSpace() != getRowFunctionSpace()) {
+        throw TrilinosAdapterException("nullifyRowsAndCols: row function space and function space of row mask don't match.");
+    }
+
+    col_q.expand();
+    row_q.expand();
+    const Teuchos::ArrayView<const real_t> rowMask(row_q.getSampleDataRO(0),
+                            row_q.getNumDataPoints()*row_q.getDataPointSize());
+    // we need remote values for col_q
+    const Teuchos::ArrayView<const real_t> colView(col_q.getSampleDataRO(0),
+                            col_q.getNumDataPoints()*col_q.getDataPointSize());
+
+    if (m_isComplex)
+        cmat->nullifyRowsAndCols(rowMask, colView, mdv);
+    else
+        mat->nullifyRowsAndCols(rowMask, colView, mdv);
+}
+
+void TrilinosMatrixAdapter::saveHB(const std::string& filename) const
+{
+    throw escript::NotImplementedError("Harwell-Boeing interface not available.");
+}
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/TrilinosMatrixAdapter.h b/trilinoswrap/src/TrilinosMatrixAdapter.h
new file mode 100644
index 0000000..be90ade
--- /dev/null
+++ b/trilinoswrap/src/TrilinosMatrixAdapter.h
@@ -0,0 +1,105 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOSMATRIXADAPTER_H__
+#define __ESYS_TRILINOSMATRIXADAPTER_H__
+
+#include <escript/AbstractSystemMatrix.h>
+#include <escript/FunctionSpace.h>
+
+#include <trilinoswrap/AbstractMatrixWrapper.h>
+
+namespace escript {
+    class SolverBuddy;
+}
+
+namespace esys_trilinos {
+
+class TrilinosMatrixAdapter : public escript::AbstractSystemMatrix
+{
+public:
+    /**
+       \brief
+       Creates a new Trilinos CRS/block CRS matrix adapter using a compatible
+       fill-complete Trilinos matrix graph.
+    */
+    TrilinosMatrixAdapter(escript::JMPI mpiInfo, int blocksize,
+                          const escript::FunctionSpace& fs,
+                          const_TrilinosGraph_ptr graph,
+                          bool isComplex = false, bool unroll = false);
+
+    virtual ~TrilinosMatrixAdapter() {}
+
+    virtual void nullifyRowsAndCols(escript::Data& row_q, escript::Data& col_q,
+                                    double mdv);  
+
+    virtual void saveMM(const std::string& filename) const
+    {
+        if (m_isComplex)
+            cmat->saveMM(filename);
+        else
+            mat->saveMM(filename);
+    }
+
+    virtual void saveHB(const std::string& filename) const;
+
+    virtual void resetValues(bool preserveSolverData = false)
+    {
+        if (m_isComplex)
+            cmat->resetValues(preserveSolverData);
+        else
+            mat->resetValues(preserveSolverData);
+    }
+
+    /// notifies the matrix that changes are about to happen.
+    inline void resumeFill()
+    {
+        if (m_isComplex)
+            cmat->resumeFill();
+        else
+            mat->resumeFill();
+    }
+
+    /// notifies the matrix that a set of changes has occured.
+    inline void fillComplete(bool localOnly)
+    {
+        if (m_isComplex)
+            cmat->fillComplete(localOnly);
+        else
+            mat->fillComplete(localOnly);
+    }
+
+    template<typename ST>
+    void add(const std::vector<LO>& rowIndex, const std::vector<ST>& array);
+
+    inline int getBlockSize() const { return getRowBlockSize(); }
+
+private:
+    virtual void setToSolution(escript::Data& out, escript::Data& in,
+                               boost::python::object& options) const;
+
+    virtual void ypAx(escript::Data& y, escript::Data& x) const;
+
+    escript::JMPI m_mpiInfo;
+    bool m_isComplex;
+    Teuchos::RCP<AbstractMatrixWrapper<real_t> > mat;
+    Teuchos::RCP<AbstractMatrixWrapper<cplx_t> > cmat;
+};
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOSMATRIXADAPTER_H__
+
diff --git a/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.cpp b/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.cpp
new file mode 100644
index 0000000..1037d65
--- /dev/null
+++ b/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.cpp
@@ -0,0 +1,60 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#include "UnrolledBlockCrsMatrixWrapper.h" 
+
+#include <escript/index.h>
+
+namespace esys_trilinos {
+
+template<typename ST>
+UnrolledBlockCrsMatrixWrapper<ST>::UnrolledBlockCrsMatrixWrapper(
+        const_TrilinosGraph_ptr graph, int blocksize) :
+    CrsMatrixWrapper<ST>(graph),
+    blockSize(blocksize)
+{
+}
+
+template<typename ST>
+void UnrolledBlockCrsMatrixWrapper<ST>::add(const std::vector<LO>& rowIdx,
+                                            const std::vector<ST>& array)
+{
+    const size_t emSize = rowIdx.size();
+    std::vector<LO> cols(emSize * blockSize);
+    std::vector<ST> vals(emSize * blockSize);
+    for (size_t ri = 0; ri < emSize; ri++) {
+        for (int rj = 0; rj < blockSize; rj++) {
+            const LO row = rowIdx[ri] * blockSize + rj;
+            if (row <= this->maxLocalRow) {
+                for (int ci = 0; ci < emSize; ci++) {
+                    for (int cj = 0; cj < blockSize; cj++) {
+                        cols[ci*blockSize + cj] = rowIdx[ci] * blockSize + cj;
+                        const size_t srcIdx = INDEX4(rj, cj, ri, ci, blockSize, blockSize, emSize);
+                        vals[ci*blockSize + cj] = array[srcIdx];
+                    }
+                }
+                this->mat.sumIntoLocalValues(row, cols, vals);
+            }
+        }
+    }
+}
+
+// instantiate
+template class UnrolledBlockCrsMatrixWrapper<real_t>;
+template class UnrolledBlockCrsMatrixWrapper<cplx_t>;
+
+}  // end of namespace
+
diff --git a/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.h b/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.h
new file mode 100644
index 0000000..276f689
--- /dev/null
+++ b/trilinoswrap/src/UnrolledBlockCrsMatrixWrapper.h
@@ -0,0 +1,46 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOS_UNROLLEDBLOCKCRSMATRIXWRAPPER_H__
+#define __ESYS_TRILINOS_UNROLLEDBLOCKCRSMATRIXWRAPPER_H__
+
+#include <trilinoswrap/CrsMatrixWrapper.h>
+
+namespace esys_trilinos {
+
+template<typename ST>
+class UnrolledBlockCrsMatrixWrapper : public CrsMatrixWrapper<ST>
+{
+public:
+    typedef typename CrsMatrixWrapper<ST>::Matrix Matrix;
+
+    /**
+       \brief
+       Creates a new Trilinos CRS matrix wrapper using a compatible
+       fill-complete unrolled Trilinos matrix graph and given block size.
+    */
+    UnrolledBlockCrsMatrixWrapper(const_TrilinosGraph_ptr graph, int blocksize);
+
+    void add(const std::vector<LO>& rowIndex, const std::vector<ST>& array);
+
+private:
+    int blockSize;
+};
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOS_UNROLLEDBLOCKCRSMATRIXWRAPPER_H__
+
diff --git a/trilinoswrap/src/types.h b/trilinoswrap/src/types.h
new file mode 100644
index 0000000..ca7ed69
--- /dev/null
+++ b/trilinoswrap/src/types.h
@@ -0,0 +1,89 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOSWRAP_TYPES_H__
+#define __ESYS_TRILINOSWRAP_TYPES_H__
+
+#include <escript/DataTypes.h>
+
+#ifndef ESYS_MPI
+#include <escript/EsysMPI.h> // for MPI_Comm typedef
+#include <Teuchos_DefaultComm.hpp>
+#endif
+#include <Tpetra_CrsGraph.hpp>
+#include <Tpetra_RowMatrix.hpp>
+
+#include <Tpetra_Experimental_BlockVector.hpp>
+
+namespace esys_trilinos {
+
+/// Scalar types
+typedef escript::DataTypes::real_t  real_t;
+typedef escript::DataTypes::cplx_t  cplx_t;
+
+/// Global Ordinal type
+typedef escript::DataTypes::index_t GO;
+/// Local Ordinal type
+typedef escript::DataTypes::index_t LO;
+/// Kokkos Node type
+#ifdef _OPENMP
+typedef Kokkos::Compat::KokkosOpenMPWrapperNode NT;
+#elif ESYS_HAVE_CUDA
+typedef Kokkos::Compat::KokkosCudaWrapperNode   NT;
+#else
+typedef Kokkos::Compat::KokkosSerialWrapperNode NT;
+#endif
+
+typedef Tpetra::CrsGraph<LO,GO,NT>    GraphType;
+typedef Tpetra::Import<LO,GO,NT>      ImportType;
+typedef Teuchos::RCP<GraphType>       TrilinosGraph_ptr;
+typedef Teuchos::RCP<const GraphType> const_TrilinosGraph_ptr;
+typedef GraphType::map_type           MapType;
+typedef Teuchos::RCP<MapType>         TrilinosMap_ptr;
+typedef Teuchos::RCP<const MapType>   const_TrilinosMap_ptr;
+
+template<typename ST> using MatrixType = Tpetra::RowMatrix<ST,LO,GO,NT>;
+template<typename ST> using VectorType = Tpetra::MultiVector<ST,LO,GO,NT>;
+template<typename ST> using OpType     = Tpetra::Operator<ST,LO,GO,NT>;
+
+typedef VectorType<real_t> RealVector;
+typedef OpType<real_t>     RealOperator;
+
+typedef VectorType<cplx_t> ComplexVector;
+typedef OpType<cplx_t>     ComplexOperator;
+
+// experimental block types
+template<typename ST> using BlockVectorType =
+                               Tpetra::Experimental::BlockVector<ST,LO,GO,NT>;
+typedef BlockVectorType<real_t> RealBlockVector;
+typedef BlockVectorType<cplx_t> ComplexBlockVector;
+
+
+/// converts an MPI communicator to a Teuchos communicator
+inline
+Teuchos::RCP<const Teuchos::Comm<int> > TeuchosCommFromEsysComm(MPI_Comm comm)
+{
+#ifdef ESYS_MPI
+    return Teuchos::rcp(new Teuchos::MpiComm<int>(comm));
+#else
+    return Teuchos::DefaultComm<int>::getComm();
+#endif
+}
+
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOSWRAP_TYPES_H__
+
diff --git a/trilinoswrap/src/util.h b/trilinoswrap/src/util.h
new file mode 100644
index 0000000..4666cbd
--- /dev/null
+++ b/trilinoswrap/src/util.h
@@ -0,0 +1,91 @@
+
+/*****************************************************************************
+*
+* Copyright (c) 2016 by The University of Queensland
+* http://www.uq.edu.au
+*
+* Primary Business: Queensland, Australia
+* Licensed under the Apache License, version 2.0
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
+* Development 2012-2013 by School of Earth Sciences
+* Development from 2014 by Centre for Geoscience Computing (GeoComp)
+*
+*****************************************************************************/
+
+#ifndef __ESYS_TRILINOSWRAP_UTIL_H__
+#define __ESYS_TRILINOSWRAP_UTIL_H__
+
+#include <escript/EsysException.h>
+#include <escript/SolverOptions.h>
+
+#include <Teuchos_ParameterList.hpp>
+
+#include <boost/python.hpp>
+#include <boost/python/dict.hpp>
+
+#include <string>
+
+namespace esys_trilinos {
+
+namespace util {
+
+template<typename T>
+void extractParamIfSet(const std::string& name,
+                       const boost::python::dict& pyDict,
+                       Teuchos::ParameterList& params)
+{
+    if (pyDict.has_key(name)) {
+        boost::python::object bpo = pyDict.get(name);
+        if (boost::python::extract<T>(bpo).check()) {
+            T val = boost::python::extract<T>(bpo);
+            params.set(name, val);
+        } else {
+            throw escript::ValueError("Wrong type for option " + name);
+        }
+    }
+}
+
+inline
+TrilinosGraph_ptr unrollCrsGraph(const_TrilinosGraph_ptr graph, int blockSize)
+{
+    // template type double is arbitrary
+    MapType cpm = BlockVectorType<double>::makePointMap(*graph->getColMap(), blockSize);
+    MapType rpm = BlockVectorType<double>::makePointMap(*graph->getRowMap(), blockSize);
+    TrilinosMap_ptr colPointMap(new MapType(cpm));
+    TrilinosMap_ptr rowPointMap(new MapType(rpm));
+    const LO numMatrixRows = graph->getRowMap()->getNodeNumElements();
+    const LO numUnrolledRows = rpm.getNodeNumElements();
+    Teuchos::ArrayRCP<size_t> rowPtr(numUnrolledRows + 1);
+    Teuchos::ArrayRCP<GO> colInd(graph->getNodeNumEntries() * blockSize * blockSize);
+
+    for (LO row = 0; row < numMatrixRows; row++) {
+        size_t numColumns = graph->getNumEntriesInLocalRow(row);
+        Teuchos::Array<LO> indices(numColumns);
+        graph->getLocalRowCopy(row, indices(), numColumns);
+        for (int b = 0; b < blockSize; b++) {
+            for (size_t c = 0; c < numColumns; c++) {
+                for (int cb = 0; cb < blockSize; cb++) {
+                    colInd[rowPtr[row * blockSize + b] + c * blockSize + cb] =
+                        indices[c] * blockSize + cb;
+                }
+            }
+            rowPtr[row * blockSize + b + 1] = rowPtr[row * blockSize + b]
+                    + numColumns * blockSize;
+        }
+    }
+
+    GraphType* unrolledGraph = new GraphType(rowPointMap, colPointMap, rowPtr, colInd);
+
+    Teuchos::RCP<Teuchos::ParameterList> params = Teuchos::parameterList();
+    params->set("Optimize Storage", true);
+    unrolledGraph->fillComplete(rowPointMap, rowPointMap, params);
+    return Teuchos::rcp(unrolledGraph);
+}
+
+} // namespace util
+} // namespace esys_trilinos
+
+#endif // __ESYS_TRILINOSWRAP_UTIL_H__
+
diff --git a/pasowrap/py_src/SConscript b/weipa/SConscript
similarity index 67%
rename from pasowrap/py_src/SConscript
rename to weipa/SConscript
index 05a35ee..5963dad 100644
--- a/pasowrap/py_src/SConscript
+++ b/weipa/SConscript
@@ -1,4 +1,3 @@
-
 ##############################################################################
 #
 # Copyright (c) 2003-2016 by The University of Queensland
@@ -14,19 +13,14 @@
 #
 ##############################################################################
 
+Import('env')
+if env['weipa']:
+    # configure C++ library
+    env.SConscript('src/SConscript', duplicate=0)
 
-import os
-Import('*')
-
-local_env = env.Clone()
-
-# get the source file names
-sources = Glob('*.py')
-
-# compile
-pyc = local_env.PyCompile(sources)
+    # configure python module
+    env.SConscript('py_src/SConscript', variant_dir='py', duplicate=0)
 
-# install
-py_inst = local_env.Install(local_env['pyinstall']+'/pasowrap', pyc)
-env.Alias('install_pasowrap_py', py_inst)
+    # configure unit tests
+    env.SConscript('test/SConscript', duplicate=0)
 
diff --git a/weipa/py_src/SConscript b/weipa/py_src/SConscript
index 097ac75..6c9329f 100644
--- a/weipa/py_src/SConscript
+++ b/weipa/py_src/SConscript
@@ -22,9 +22,9 @@ sources = Glob('*.py')
 
 # compile
 pyc = local_env.PyCompile(sources)
-local_env.Depends(pyc, py_wrapper_lib)
+local_env.Depends(pyc, 'build_weipa')
 
 # install
-py_inst = local_env.Install(local_env['pyinstall']+'/weipa', pyc)
-env.Alias('install_weipa_py', py_inst)
+py_inst = local_env.Install(Dir('weipa', local_env['pyinstall']), pyc)
+env.Alias('install_weipa', py_inst)
 
diff --git a/weipa/src/DataVar.cpp b/weipa/src/DataVar.cpp
index de3fa63..1656bfb 100644
--- a/weipa/src/DataVar.cpp
+++ b/weipa/src/DataVar.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <weipa/DataVar.h>
 #include <weipa/DomainChunk.h>
 #include <weipa/ElementData.h>
@@ -26,11 +22,11 @@
 #include <escript/Data.h>
 #endif
 
-#if USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -132,7 +128,7 @@ bool DataVar::initFromEscript(escript::Data& escriptData, const_DomainChunk_ptr
     if (numSamples == 0)
         return true;
 
-    const dim_t* iPtr = escriptData.getFunctionSpace().borrowSampleReferenceIDs();
+    const escript::DataTypes::dim_t* iPtr = escriptData.getFunctionSpace().borrowSampleReferenceIDs();
     sampleID.insert(sampleID.end(), numSamples, 0);
     copy(iPtr, iPtr+numSamples, sampleID.begin());
 
@@ -157,7 +153,7 @@ bool DataVar::initFromEscript(escript::Data& escriptData, const_DomainChunk_ptr
         float* tempData = new float[dataSize*numSamples];
         float* destPtr = tempData;
         if (escriptData.isConstant()) {
-            const escript::DataAbstract::ValueType::value_type* values =
+            const escript::DataTypes::real_t* values =
                 escriptData.getDataRO();
             for (int pointNo=0; pointNo<numSamples*ptsPerSample; pointNo++) {
                 copy(values, values+dimSize, destPtr);
@@ -165,7 +161,7 @@ bool DataVar::initFromEscript(escript::Data& escriptData, const_DomainChunk_ptr
             }
         } else {
             for (int sampleNo=0; sampleNo<numSamples; sampleNo++) {
-                const escript::DataAbstract::ValueType::value_type* values =
+                const escript::DataTypes::real_t* values =
                     escriptData.getSampleDataRO(sampleNo);
                 copy(values, values+dataSize, destPtr);
                 destPtr += dataSize;
@@ -173,7 +169,7 @@ bool DataVar::initFromEscript(escript::Data& escriptData, const_DomainChunk_ptr
         }
 
         const float* srcPtr = tempData;
-        for (int i=0; i < dimSize; i++, srcPtr++) {
+        for (size_t i=0; i < dimSize; i++, srcPtr++) {
             float* c = averageData(srcPtr, dimSize);
             dataArray.push_back(c);
         }
@@ -225,7 +221,7 @@ bool DataVar::initFromFile(const string& filename, const_DomainChunk_ptr dom)
 {
     cleanup();
     
-#if USE_NETCDF
+#if ESYS_HAVE_NETCDF
     NcError ncerr(NcError::silent_nonfatal);    
     NcFile* input = new NcFile(filename.c_str());
     if (!input->is_valid()) {
@@ -310,7 +306,7 @@ bool DataVar::initFromFile(const string& filename, const_DomainChunk_ptr dom)
         var->get(tempData, &counts[0]);
 
         const float* srcPtr = tempData;
-        for (int i=0; i < dimSize; i++, srcPtr++) {
+        for (size_t i=0; i < dimSize; i++, srcPtr++) {
             float* c = averageData(srcPtr, dimSize);
             dataArray.push_back(c);
         }
@@ -320,7 +316,7 @@ bool DataVar::initFromFile(const string& filename, const_DomainChunk_ptr dom)
     }
 
     delete input;
-#endif // USE_NETCDF
+#endif // ESYS_HAVE_NETCDF
 
     return initialized;
 }
@@ -554,7 +550,7 @@ void DataVar::writeToVTK(ostream& os, int ownIndex)
         int firstId = nodeDist[ownIndex];
         int lastId = nodeDist[ownIndex+1];
         IndexMap sampleID2idx = buildIndexMap();
-        for (int i=0; i<nodeGNI.size(); i++) {
+        for (size_t i=0; i<nodeGNI.size(); i++) {
             if (firstId <= nodeGNI[i] && nodeGNI[i] < lastId) {
                 IndexMap::const_iterator it = sampleID2idx.find(requiredIDs[i]);
                 int idx = (it==sampleID2idx.end() ? -1 : (int)it->second);
@@ -626,7 +622,7 @@ string DataVar::getTensorDef() const
 bool DataVar::writeToSilo(DBfile* dbfile, const string& siloPath,
                           const string& units)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (!initialized)
         return false;
 
@@ -689,7 +685,7 @@ bool DataVar::writeToSilo(DBfile* dbfile, const string& siloPath,
     DBSetDir(dbfile, "/");
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/EscriptDataset.cpp b/weipa/src/EscriptDataset.cpp
index 6accc87..f779ebc 100644
--- a/weipa/src/EscriptDataset.cpp
+++ b/weipa/src/EscriptDataset.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <weipa/EscriptDataset.h>
 #include <weipa/DataVar.h>
 #include <weipa/ElementData.h>
@@ -33,13 +29,13 @@
 #endif
 
 #ifndef VISIT_PLUGIN
-#include <esysUtils/esysFileWriter.h>
 #include <escript/Data.h>
+#include <escript/FileWriter.h>
 #ifdef USE_DUDLEY
-#include <dudley/CppAdapter/MeshAdapter.h>
+#include <dudley/DudleyDomain.h>
 #endif
 #ifdef USE_FINLEY
-#include <finley/CppAdapter/MeshAdapter.h>
+#include <finley/FinleyDomain.h>
 #endif
 #ifdef USE_RIPLEY
 #include <ripley/RipleyDomain.h>
@@ -48,7 +44,7 @@
 #include <speckley/SpeckleyDomain.h>
 #endif
 
-using esysUtils::FileWriter;
+using escript::FileWriter;
 #endif
 
 #include <cstring>
@@ -56,15 +52,15 @@ using esysUtils::FileWriter;
 #include <numeric> // for std::accumulate
 #include <sstream> // for std::ostringstream
 
-#if USE_SILO
+#if ESYS_HAVE_SILO
 #include <silo.h>
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
 #include <pmpio.h>
 const int NUM_SILO_FILES = 1; // number of Silo files to produce per save
 #endif
 
-#endif // USE_SILO
+#endif // ESYS_HAVE_SILO
 
 using namespace std;
 
@@ -88,7 +84,7 @@ EscriptDataset::EscriptDataset() :
 //
 // Constructor with communicator
 //
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
 EscriptDataset::EscriptDataset(MPI_Comm comm) :
     cycle(0),
     time(0.),
@@ -124,7 +120,7 @@ bool EscriptDataset::setDomain(const escript::AbstractDomain* domain)
         cerr << "Domain is NULL!" << endl;
         myError = 1;
     } else {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         mpiComm = domain->getMPIComm();
         mpiRank = domain->getMPIRank();
         mpiSize = domain->getMPISize();
@@ -135,7 +131,7 @@ bool EscriptDataset::setDomain(const escript::AbstractDomain* domain)
         if (0) {
         }
 #if USE_FINLEY
-        else if (dynamic_cast<const finley::MeshAdapter*>(domain)) {
+        else if (dynamic_cast<const finley::FinleyDomain*>(domain)) {
             DomainChunk_ptr dom(new FinleyDomain());
             if (dom->initFromEscript(domain)) {
                 if (mpiSize > 1)
@@ -148,7 +144,7 @@ bool EscriptDataset::setDomain(const escript::AbstractDomain* domain)
         }
 #endif
 #if USE_DUDLEY
-        else if (dynamic_cast<const dudley::MeshAdapter*>(domain)) {
+        else if (dynamic_cast<const dudley::DudleyDomain*>(domain)) {
             DomainChunk_ptr dom(new FinleyDomain());
             if (dom->initFromEscript(domain)) {
                 if (mpiSize > 1)
@@ -193,7 +189,7 @@ bool EscriptDataset::setDomain(const escript::AbstractDomain* domain)
     }
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         MPI_Allreduce(&myError, &gError, 1, MPI_INT, MPI_MAX, mpiComm);
 #else
         gError = myError;
@@ -310,7 +306,7 @@ bool EscriptDataset::loadNetCDF(const DomainChunks& domain,
 //
 bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
 {
-#if USE_SILO
+#if ESYS_HAVE_SILO
     if (domainChunks.size() == 0)
         return false;
 
@@ -323,7 +319,7 @@ bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
     //Unidata has been contacted, Ticket ID: YTC-894489.
     //When this issue is resolved, remove the following line.
     driver = DB_PDB;
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
     PMPIO_baton_t* baton = NULL;
 #endif
 
@@ -332,7 +328,7 @@ bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
     }
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE,
                     mpiComm, 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
                     PMPIO_DefaultClose, (void*)&driver);
@@ -365,7 +361,7 @@ bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
     if (!dbfile) {
         cerr << "Could not create Silo file." << endl;
         if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
             PMPIO_HandOffBaton(baton, dbfile);
             PMPIO_Finish(baton);
 #endif
@@ -468,7 +464,7 @@ bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
     }
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         PMPIO_HandOffBaton(baton, dbfile);
         PMPIO_Finish(baton);
 #endif
@@ -478,7 +474,7 @@ bool EscriptDataset::saveSilo(string fileName, bool useMultiMesh)
 
     return true;
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     std::cerr << "WARNING: saving to silo file requested but escript was not built"
             " with silo support";
     return false;
@@ -510,7 +506,7 @@ bool EscriptDataset::saveVTK(string fileName)
         // We assume rank 0 always has samples, if this turns out to be a
         // wrong assumption then a bit more work is needed to get the correct
         // mesh name to all ranks.
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         if (mpiSize > 1) {
             char name[100];
             if (mpiRank == 0) {
@@ -626,7 +622,7 @@ bool EscriptDataset::saveVTKsingle(const string& fileName,
     boost::scoped_ptr<FileWriter> fw(NULL);
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         fw.reset(new FileWriter(mpiComm));
         domainChunks[0]->removeGhostZones(mpiRank);
         ElementData_ptr elements = domainChunks[0]->getElementsByName(meshName);
@@ -894,7 +890,7 @@ bool EscriptDataset::loadDomain(const string filePattern, int nChunks)
     }
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         MPI_Allreduce(&myError, &gError, 1, MPI_INT, MPI_MAX, mpiComm);
 #else
         gError = myError;
@@ -931,7 +927,7 @@ bool EscriptDataset::setExternalDomain(const DomainChunks& domain)
     }
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         MPI_Allreduce(&myError, &gError, 1, MPI_INT, MPI_MAX, mpiComm);
 #else
         gError = myError;
@@ -986,7 +982,7 @@ bool EscriptDataset::loadData(const string filePattern, const string name,
         delete[] str;
 
         if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
             MPI_Allreduce(&myError, &gError, 1, MPI_INT, MPI_MAX, mpiComm);
 #else
             gError = myError;
@@ -1041,7 +1037,7 @@ void EscriptDataset::updateSampleDistribution(VarInfo& vi)
     const DataChunks& varChunks = vi.dataChunks;
 
     if (mpiSize > 1) {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         int myNumSamples = varChunks[0]->getNumberOfSamples();
         sampleDist.insert(sampleDist.end(), mpiSize, 0);
         MPI_Allgather(
@@ -1061,7 +1057,7 @@ void EscriptDataset::updateSampleDistribution(VarInfo& vi)
 //
 void EscriptDataset::putSiloMultiMesh(DBfile* dbfile, const string& meshName)
 {
-#if USE_SILO
+#if ESYS_HAVE_SILO
     vector<int> meshtypes;
     vector<string> tempstrings;
     vector<char*> meshnames;
@@ -1130,7 +1126,7 @@ void EscriptDataset::putSiloMultiMesh(DBfile* dbfile, const string& meshName)
 void EscriptDataset::putSiloMultiVar(DBfile* dbfile, const VarInfo& vi,
                                      bool useMeshFile)
 {
-#if USE_SILO
+#if ESYS_HAVE_SILO
     vector<int> vartypes;
     vector<string> tempstrings;
     vector<char*> varnames;
@@ -1185,7 +1181,7 @@ void EscriptDataset::putSiloMultiVar(DBfile* dbfile, const VarInfo& vi,
 //
 void EscriptDataset::putSiloMultiTensor(DBfile* dbfile, const VarInfo& vi)
 {
-#if USE_SILO
+#if ESYS_HAVE_SILO
     string tensorDir = vi.varName+string("_comps/");
     DBSetDir(dbfile, "/");
     DBMkdir(dbfile, tensorDir.c_str());
diff --git a/weipa/src/EscriptDataset.h b/weipa/src/EscriptDataset.h
index b3de6f1..246787f 100644
--- a/weipa/src/EscriptDataset.h
+++ b/weipa/src/EscriptDataset.h
@@ -65,7 +65,7 @@ public:
     /// \brief Default constructor.
     EscriptDataset();
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
     /// \brief Constructor with communicator.
     EscriptDataset(MPI_Comm comm);
 #endif
@@ -147,7 +147,7 @@ public:
     /// \brief Returns a vector with the mesh variables.
     const VarVector& getMeshVariables() const { return meshVariables; }
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
     MPI_Comm
 #else
     void*
@@ -178,7 +178,7 @@ private:
     DomainChunks domainChunks;
     VarVector variables, meshVariables;
     int mpiRank, mpiSize;
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
     MPI_Comm mpiComm;
 #else
     void* mpiComm;
diff --git a/weipa/src/FinleyDomain.cpp b/weipa/src/FinleyDomain.cpp
index a1d5aca..4cab673 100644
--- a/weipa/src/FinleyDomain.cpp
+++ b/weipa/src/FinleyDomain.cpp
@@ -14,32 +14,26 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <weipa/FinleyDomain.h>
 #include <weipa/FinleyNodes.h>
 #include <weipa/DataVar.h>
 
 #ifndef VISIT_PLUGIN
 #ifdef USE_DUDLEY
-#include <dudley/CppAdapter/MeshAdapter.h>
-#include <dudley/Mesh.h>
+#include <dudley/DudleyDomain.h>
 #endif
 #ifdef USE_FINLEY
-#include <finley/CppAdapter/MeshAdapter.h>
-#include <finley/Mesh.h>
+#include <finley/FinleyDomain.h>
 #endif
 #endif // VISIT_PLUGIN
 
 #include <iostream>
 
-#if USE_NETCDF
+#if ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -99,38 +93,36 @@ bool FinleyDomain::initFromEscript(const escript::AbstractDomain* escriptDomain)
     if (0) {
     }
 #ifdef USE_FINLEY
-    else if (dynamic_cast<const finley::MeshAdapter*>(escriptDomain)) {
-        const finley::Mesh* finleyMesh =
-            dynamic_cast<const finley::MeshAdapter*>(escriptDomain)
-                ->getFinley_Mesh();
+    else if (dynamic_cast<const finley::FinleyDomain*>(escriptDomain)) {
+        const finley::FinleyDomain* finleyMesh =
+            dynamic_cast<const finley::FinleyDomain*>(escriptDomain);
 
         nodes = FinleyNodes_ptr(new FinleyNodes("Elements"));
         cells = FinleyElements_ptr(new FinleyElements("Elements", nodes));
         faces = FinleyElements_ptr(new FinleyElements("FaceElements", nodes));
         contacts = FinleyElements_ptr(new FinleyElements("ContactElements", nodes));
 
-        if (nodes->initFromFinley(finleyMesh->Nodes) &&
-                cells->initFromFinley(finleyMesh->Elements) &&
-                faces->initFromFinley(finleyMesh->FaceElements) &&
-                contacts->initFromFinley(finleyMesh->ContactElements)) {
+        if (nodes->initFromFinley(finleyMesh->getNodes()) &&
+                cells->initFromFinley(finleyMesh->getElements()) &&
+                faces->initFromFinley(finleyMesh->getFaceElements()) &&
+                contacts->initFromFinley(finleyMesh->getContactElements())) {
             initialized = true;
         }
     }
 #endif
 #ifdef USE_DUDLEY
-    else if (dynamic_cast<const dudley::MeshAdapter*>(escriptDomain)) {
-        const Dudley_Mesh* dudleyMesh =
-            dynamic_cast<const dudley::MeshAdapter*>(escriptDomain)
-                ->getDudley_Mesh();
+    else if (dynamic_cast<const dudley::DudleyDomain*>(escriptDomain)) {
+        const dudley::DudleyDomain* dudleyMesh =
+            dynamic_cast<const dudley::DudleyDomain*>(escriptDomain);
 
         nodes = FinleyNodes_ptr(new FinleyNodes("Elements"));
         cells = FinleyElements_ptr(new FinleyElements("Elements", nodes));
         faces = FinleyElements_ptr(new FinleyElements("FaceElements", nodes));
         contacts = FinleyElements_ptr(new FinleyElements("ContactElements", nodes));
 
-        if (nodes->initFromDudley(dudleyMesh->Nodes) &&
-                cells->initFromDudley(dudleyMesh->Elements) &&
-                faces->initFromDudley(dudleyMesh->FaceElements)) {
+        if (nodes->initFromDudley(dudleyMesh->getNodes()) &&
+                cells->initFromDudley(dudleyMesh->getElements()) &&
+                faces->initFromDudley(dudleyMesh->getFaceElements())) {
             initialized = true;
         }
     }
@@ -147,11 +139,11 @@ bool FinleyDomain::initFromEscript(const escript::AbstractDomain* escriptDomain)
 bool FinleyDomain::initFromFile(const string& filename)
 {
     cleanup();
-    
-#if USE_NETCDF
+
+#if ESYS_HAVE_NETCDF
     NcError ncerr(NcError::silent_nonfatal);
     NcFile* input;
- 
+
     input = new NcFile(filename.c_str());
     if (!input->is_valid()) {
         cerr << "Could not open input file " << filename << "." << endl;
@@ -186,7 +178,7 @@ Centering FinleyDomain::getCenteringForFunctionSpace(int fsCode) const
         ret = NODE_CENTERED;
 #endif
 #ifdef USE_DUDLEY
-    if (fsCode==DUDLEY_REDUCED_NODES || fsCode==DUDLEY_NODES)
+    if (fsCode==DUDLEY_NODES)
         ret = NODE_CENTERED;
 #endif
     return ret;
@@ -205,7 +197,7 @@ NodeData_ptr FinleyDomain::getMeshForFunctionSpace(int fsCode) const
     ElementData_ptr elements = getElementsForFunctionSpace(fsCode);
     if (elements != NULL)
         result = elements->getNodes();
- 
+
     return result;
 }
 
@@ -269,10 +261,6 @@ ElementData_ptr FinleyDomain::getElementsForFunctionSpace(int fsCode) const
 
     if (fsCode == DUDLEY_NODES) {
         result = cells;
-    } else if (fsCode == DUDLEY_REDUCED_NODES) {
-        result = cells->getReducedElements();
-        if (!result)
-            result = cells;
     } else {
         switch (fsCode) {
             case DUDLEY_REDUCED_ELEMENTS:
@@ -318,7 +306,7 @@ StringVec FinleyDomain::getMeshNames() const
 StringVec FinleyDomain::getVarNames() const
 {
     StringVec res;
- 
+
     if (initialized) {
         res = nodes->getVarNames();
         StringVec tmpVec = cells->getVarNames();
@@ -474,7 +462,7 @@ bool FinleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
                                const StringVec& labels, const StringVec& units,
                                bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     // Write nodes, elements and mesh variables
     if (!initialized ||
             !cells->writeToSilo(dbfile, pathInSilo, labels, units, writeMeshData) ||
@@ -485,7 +473,7 @@ bool FinleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
     siloPath = pathInSilo;
     return true;
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/FinleyElements.cpp b/weipa/src/FinleyElements.cpp
index cd18d6e..5841084 100644
--- a/weipa/src/FinleyElements.cpp
+++ b/weipa/src/FinleyElements.cpp
@@ -14,33 +14,27 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <weipa/FinleyElements.h>
 #include <weipa/NodeData.h>
 
 #ifndef VISIT_PLUGIN
 
 #ifdef USE_DUDLEY
-#include <dudley/CppAdapter/MeshAdapter.h>
+#include <dudley/DudleyDomain.h>
 #endif
 #ifdef USE_FINLEY
-#include <finley/CppAdapter/MeshAdapter.h>
+#include <finley/FinleyDomain.h>
 #endif
 
-#elif !defined(ABS)
-#define ABS(X) ((X)>0?(X):-(X))
 #endif
 
 #include <iostream>
 
-#if USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -110,14 +104,14 @@ static const size_t hex27indices[8*8] = {
 };
 
 namespace weipa {
-    
+
 //
 // Constructor
 //
 FinleyElements::FinleyElements(const string& elementName, FinleyNodes_ptr nodeData)
     :
 #ifdef USE_FINLEY
-      finleyTypeId(finley::NoRef), 
+      finleyTypeId(finley::NoRef),
 #endif
       originalMesh(nodeData), name(elementName), numElements(0),
       numGhostElements(0), nodesPerElement(0),
@@ -160,7 +154,7 @@ FinleyElements::FinleyElements(const FinleyElements& e)
 //
 //
 //
-bool FinleyElements::initFromDudley(const Dudley_ElementFile* dudleyFile)
+bool FinleyElements::initFromDudley(const dudley::ElementFile* dudleyFile)
 {
 #if !defined VISIT_PLUGIN && defined USE_DUDLEY
     numElements = dudleyFile->numElements;
@@ -168,32 +162,12 @@ bool FinleyElements::initFromDudley(const Dudley_ElementFile* dudleyFile)
     if (numElements > 0) {
         nodesPerElement = dudleyFile->numNodes;
 
-        int* iPtr;
-   
-        iPtr = dudleyFile->Nodes;
-        nodes.clear();
-        nodes.insert(nodes.end(), numElements*nodesPerElement, 0);
-        copy(iPtr, iPtr+numElements*nodesPerElement, nodes.begin());
-
-        iPtr = dudleyFile->Color;
-        color.clear();
-        color.insert(color.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, color.begin());
-
-        iPtr = dudleyFile->Id;
-        ID.clear();
-        ID.insert(ID.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, ID.begin());
-
-        iPtr = dudleyFile->Owner;
-        owner.clear();
-        owner.insert(owner.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, owner.begin());
-
-        iPtr = dudleyFile->Tag;
-        tag.clear();
-        tag.insert(tag.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, tag.begin());
+        nodes.assign(dudleyFile->Nodes,
+                     dudleyFile->Nodes+numElements*nodesPerElement);
+        color.assign(dudleyFile->Color, dudleyFile->Color+numElements);
+        ID.assign(dudleyFile->Id, dudleyFile->Id+numElements);
+        owner.assign(dudleyFile->Owner, dudleyFile->Owner+numElements);
+        tag.assign(dudleyFile->Tag, dudleyFile->Tag+numElements);
 
         FinleyElementInfo f = getDudleyTypeInfo(dudleyFile->etype);
         type = f.elementType;
@@ -221,30 +195,12 @@ bool FinleyElements::initFromFinley(const finley::ElementFile* finleyFile)
     if (numElements > 0) {
         nodesPerElement = finleyFile->numNodes;
 
-        index_t* idxPtr = finleyFile->Nodes;
-        nodes.clear();
-        nodes.insert(nodes.end(), numElements*nodesPerElement, 0);
-        copy(idxPtr, idxPtr+numElements*nodesPerElement, nodes.begin());
-
-        int* iPtr = finleyFile->Color;
-        color.clear();
-        color.insert(color.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, color.begin());
-
-        idxPtr = finleyFile->Id;
-        ID.clear();
-        ID.insert(ID.end(), numElements, 0);
-        copy(idxPtr, idxPtr+numElements, ID.begin());
-
-        iPtr = finleyFile->Owner;
-        owner.clear();
-        owner.insert(owner.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, owner.begin());
-
-        iPtr = finleyFile->Tag;
-        tag.clear();
-        tag.insert(tag.end(), numElements, 0);
-        copy(iPtr, iPtr+numElements, tag.begin());
+        nodes.assign(finleyFile->Nodes,
+                     finleyFile->Nodes+numElements*nodesPerElement);
+        color.assign(finleyFile->Color, finleyFile->Color+numElements);
+        ID.assign(finleyFile->Id, finleyFile->Id+numElements);
+        owner.assign(finleyFile->Owner, finleyFile->Owner+numElements);
+        tag.assign(finleyFile->Tag, finleyFile->Tag+numElements);
 
         finleyTypeId = finleyFile->referenceElementSet->referenceElement
             ->Type->TypeId;
@@ -306,7 +262,7 @@ bool FinleyElements::initFromFinley(const finley::ElementFile* finleyFile)
 //
 bool FinleyElements::readFromNc(NcFile* ncfile)
 {
-#if USE_NETCDF
+#if ESYS_HAVE_NETCDF
     string num_str("num_");
     num_str += name;
 
@@ -398,8 +354,8 @@ bool FinleyElements::readFromNc(NcFile* ncfile)
         buildMeshes();
     }
 
-    return true; 
-#else // !USE_NETCDF
+    return true;
+#else // !ESYS_HAVE_NETCDF
     return false;
 #endif
 }
@@ -570,7 +526,7 @@ IntVec FinleyElements::prepareGhostIndices(int ownIndex)
 {
     IntVec indexArray;
     numGhostElements = 0;
-    
+
     // move indices of "ghost zones" to the end to be able to reorder
     // data accordingly
     for (int i=0; i<numElements; i++) {
@@ -670,7 +626,7 @@ void FinleyElements::writeConnectivityVTK(ostream& os)
     }
 }
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 //
 //
 //
@@ -695,7 +651,7 @@ bool FinleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
                                  const StringVec& labels,
                                  const StringVec& units, bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (numElements == 0)
         return true;
 
@@ -745,7 +701,7 @@ bool FinleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
             DBFreeOptlist(optList);
         }
     }
-    
+
     // Point mesh is useful for debugging
     if (0) {
         CoordArray& coordbase = const_cast<CoordArray&>(nodeMesh->getCoords());
@@ -793,7 +749,7 @@ bool FinleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
 
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
@@ -802,7 +758,7 @@ bool FinleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
 //
 //
 #ifdef USE_DUDLEY
-FinleyElementInfo FinleyElements::getDudleyTypeInfo(Dudley_ElementTypeId typeId)
+FinleyElementInfo FinleyElements::getDudleyTypeInfo(dudley::ElementTypeId typeId)
 {
     FinleyElementInfo ret;
     ret.multiCellIndices = NULL;
@@ -811,28 +767,28 @@ FinleyElementInfo FinleyElements::getDudleyTypeInfo(Dudley_ElementTypeId typeId)
     ret.quadDim = 0;
 
     switch (typeId) {
-        case Dudley_Line2Face://untested
-        case Dudley_Point1://untested
+        case dudley::Dudley_Line2Face://untested
+        case dudley::Dudley_Point1://untested
             cerr << "WARNING: Dudley type " <<typeId<< " is untested!" << endl;
             ret.elementSize = 1;
             ret.elementType = ZONETYPE_POLYGON;
             break;
 
-        case Dudley_Tri3Face://untested
+        case dudley::Dudley_Tri3Face://untested
             cerr << "WARNING: Dudley type " <<typeId<< " is untested!" << endl;
-        case Dudley_Line2:
+        case dudley::Dudley_Line2:
             ret.elementSize = ret.reducedElementSize = 2;
             ret.elementType = ret.reducedElementType = ZONETYPE_BEAM;
             break;
 
-        case Dudley_Tet4Face://untested
+        case dudley::Dudley_Tet4Face://untested
             cerr << "WARNING: Dudley type " <<typeId<< " is untested!" << endl;
-        case Dudley_Tri3:
+        case dudley::Dudley_Tri3:
             ret.elementSize = ret.reducedElementSize = 3;
             ret.elementType = ret.reducedElementType = ZONETYPE_TRIANGLE;
             break;
 
-        case Dudley_Tet4:
+        case dudley::Dudley_Tet4:
             ret.elementSize = ret.reducedElementSize = 4;
             ret.elementType = ret.reducedElementType = ZONETYPE_TET;
             break;
@@ -956,7 +912,7 @@ FinleyElementInfo FinleyElements::getFinleyTypeInfo(finley::ElementTypeId typeId
         case finley::Hex20Face_Contact:
             //VTK_QUADRATIC_QUAD
             ret.elementSize = 3;
-            ret.elementType = ZONETYPE_TRIANGLE; 
+            ret.elementType = ZONETYPE_TRIANGLE;
             ret.reducedElementSize = 4;
             ret.reducedElementType = ZONETYPE_QUAD;
             break;
@@ -1009,7 +965,7 @@ FinleyElementInfo FinleyElements::getFinleyTypeInfo(finley::ElementTypeId typeId
 // returns true if |x-c| <= r, false otherwise
 inline bool inside1D(float x, float c, float r)
 {
-    return (ABS(x-c) <= r);
+    return (std::abs(x-c) <= r);
 }
 
 // returns true if |x-cx| <= r and |y-cy| <= r, false otherwise
@@ -1030,7 +986,7 @@ inline bool inside3D(float x, float y, float z,
 inline bool sameSide(float d1, float d2)
 {
     const float TOL = 1.e-8f;
-    return (ABS(d1) < TOL || ABS(d2) < TOL || d1*d2>=0.);
+    return (std::abs(d1) < TOL || std::abs(d2) < TOL || d1*d2>=0.);
 }
 
 // computes the determinant of the 4x4 matrix given by its elements m_ij
@@ -1103,7 +1059,7 @@ static bool pointInTri(float x, float y,
     float dot11 = v1[0]*v1[0]+v1[1]*v1[1];
     float dot12 = v1[0]*v2[0]+v1[1]*v2[1];
     float invDenom = dot00*dot11 - dot01*dot01;
-    if (ABS(invDenom) < TOL) invDenom = TOL;
+    if (std::abs(invDenom) < TOL) invDenom = TOL;
     invDenom = 1.f/invDenom;
     float u = (dot11*dot02 - dot01*dot12) * invDenom;
     float v = (dot00*dot12 - dot01*dot02) * invDenom;
diff --git a/weipa/src/FinleyElements.h b/weipa/src/FinleyElements.h
index 1f797b5..1dbc135 100644
--- a/weipa/src/FinleyElements.h
+++ b/weipa/src/FinleyElements.h
@@ -21,7 +21,8 @@
 #include <weipa/FinleyNodes.h>
 
 #ifdef USE_DUDLEY
-#include <dudley/ElementType.h> // for Dudley_ElementTypeId
+#include <dudley/Dudley.h> // for DUDLEY_...
+#include <dudley/ElementType.h> // for dudley::ElementTypeId
 #endif
 #ifdef USE_FINLEY
 #include <finley/ReferenceElements.h> // for finley::ElementTypeId
@@ -30,7 +31,9 @@
 class DBfile;
 class NcFile;
 
-struct Dudley_ElementFile;
+namespace dudley {
+    class ElementFile;
+}
 
 namespace finley {
     class ElementFile;
@@ -75,10 +78,10 @@ public:
     /// \brief Destructor
     virtual ~FinleyElements() {}
 
-    /// \brief Initialises with data from a Dudley_ElementFile instance.
-    bool initFromDudley(const Dudley_ElementFile* dudleyFile);
+    /// \brief Initialises with data from a Dudley ElementFile instance.
+    bool initFromDudley(const dudley::ElementFile* dudleyFile);
 
-    /// \brief Initialises with data from a Finley_ElementFile instance.
+    /// \brief Initialises with data from a Finley ElementFile instance.
     bool initFromFinley(const finley::ElementFile* finleyFile);
 
     /// \brief Reads element data from escript/finley NetCDF file.
@@ -154,7 +157,7 @@ public:
 private:
     FinleyElements() {}
 #ifdef USE_DUDLEY
-    FinleyElementInfo getDudleyTypeInfo(Dudley_ElementTypeId typeId);
+    FinleyElementInfo getDudleyTypeInfo(dudley::ElementTypeId typeId);
 #endif
 #ifdef USE_FINLEY
     FinleyElementInfo getFinleyTypeInfo(finley::ElementTypeId typeId);
diff --git a/weipa/src/FinleyNodes.cpp b/weipa/src/FinleyNodes.cpp
index 242b8e2..e5eebf7 100644
--- a/weipa/src/FinleyNodes.cpp
+++ b/weipa/src/FinleyNodes.cpp
@@ -14,27 +14,23 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/FinleyNodes.h>
 
 #ifndef VISIT_PLUGIN
 #ifdef USE_DUDLEY
-#include <dudley/Mesh.h>
 #include <dudley/NodeFile.h>
 #endif
 #ifdef USE_FINLEY
-#include <finley/Mesh.h>
 #include <finley/NodeFile.h>
 #endif
+using escript::DataTypes::index_t;
 #endif // VISIT_PLUGIN
 
-#if USE_NETCDF
+#ifdef ESYS_HAVE_NETCDF
 #include <netcdfcpp.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -132,17 +128,13 @@ FinleyNodes::~FinleyNodes()
 //
 //
 //
-bool FinleyNodes::initFromDudley(const Dudley_NodeFile* dudleyFile)
+bool FinleyNodes::initFromDudley(const dudley::NodeFile* dudleyFile)
 {
 #if !defined VISIT_PLUGIN && defined USE_DUDLEY
     numDims = dudleyFile->numDim;
-    numNodes = dudleyFile->numNodes;
-
-    int mpisize = dudleyFile->MPIInfo->size;
-    int* iPtr = dudleyFile->nodesDistribution->first_component;
-    nodeDist.clear();
-    nodeDist.insert(nodeDist.end(), mpisize+1, 0);
-    copy(iPtr, iPtr+mpisize+1, nodeDist.begin());
+    numNodes = dudleyFile->getNumNodes();
+    nodeDist.assign(dudleyFile->nodesDistribution->first_component.begin(),
+                    dudleyFile->nodesDistribution->first_component.end());
 
     CoordArray::iterator it;
     for (it = coords.begin(); it != coords.end(); it++)
@@ -165,30 +157,14 @@ bool FinleyNodes::initFromDudley(const Dudley_NodeFile* dudleyFile)
             }
         }
 
-        iPtr = dudleyFile->Id;
-        nodeID.insert(nodeID.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeID.begin());
-
-        iPtr = dudleyFile->Tag;
-        nodeTag.insert(nodeTag.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeTag.begin());
-
-        iPtr = dudleyFile->globalDegreesOfFreedom;
-        nodeGDOF.insert(nodeGDOF.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeGDOF.begin());
-
-        iPtr = dudleyFile->globalNodesIndex;
-        nodeGNI.insert(nodeGNI.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeGNI.begin());
-
-        iPtr = dudleyFile->globalReducedDOFIndex;
-        nodeGRDFI.insert(nodeGRDFI.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeGRDFI.begin());
-
-        iPtr = dudleyFile->globalReducedNodesIndex;
-        nodeGRNI.insert(nodeGRNI.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeGRNI.begin());
-
+        nodeID.assign(dudleyFile->Id, dudleyFile->Id+numNodes);
+        nodeTag.assign(dudleyFile->Tag, dudleyFile->Tag+numNodes);
+        nodeGDOF.assign(dudleyFile->globalDegreesOfFreedom,
+                        dudleyFile->globalDegreesOfFreedom+numNodes);
+        nodeGRDFI.assign(numNodes, 0);
+        nodeGNI.assign(dudleyFile->globalNodesIndex,
+                       dudleyFile->globalNodesIndex+numNodes);
+        nodeGRNI.assign(numNodes, 0);
     }
     return true;
 #else // VISIT_PLUGIN,USE_DUDLEY
@@ -203,13 +179,9 @@ bool FinleyNodes::initFromFinley(const finley::NodeFile* finleyFile)
 {
 #if !defined VISIT_PLUGIN && defined USE_FINLEY
     numDims = finleyFile->numDim;
-    numNodes = finleyFile->numNodes;
-
-    int mpisize = finleyFile->MPIInfo->size;
-    index_t* idxPtr = finleyFile->nodesDistribution->first_component;
-    nodeDist.clear();
-    nodeDist.insert(nodeDist.end(), mpisize+1, 0);
-    copy(idxPtr, idxPtr+mpisize+1, nodeDist.begin());
+    numNodes = finleyFile->getNumNodes();
+    nodeDist.assign(finleyFile->nodesDistribution->first_component.begin(),
+                    finleyFile->nodesDistribution->first_component.end());
 
     CoordArray::iterator it;
     for (it = coords.begin(); it != coords.end(); it++)
@@ -227,35 +199,21 @@ bool FinleyNodes::initFromFinley(const finley::NodeFile* finleyFile)
             double* srcPtr = finleyFile->Coordinates + i;
             float* c = new float[numNodes];
             coords.push_back(c);
-            for (index_t j=0; j<numNodes; j++, srcPtr+=numDims) {
+            for (escript::DataTypes::index_t j=0; j<numNodes; j++, srcPtr+=numDims) {
                 *c++ = (float) *srcPtr;
             }
         }
 
-        idxPtr = finleyFile->Id;
-        nodeID.insert(nodeID.end(), numNodes, 0);
-        copy(idxPtr, idxPtr+numNodes, nodeID.begin());
-
-        int* iPtr = finleyFile->Tag;
-        nodeTag.insert(nodeTag.end(), numNodes, 0);
-        copy(iPtr, iPtr+numNodes, nodeTag.begin());
-
-        idxPtr = finleyFile->globalDegreesOfFreedom;
-        nodeGDOF.insert(nodeGDOF.end(), numNodes, 0);
-        copy(idxPtr, idxPtr+numNodes, nodeGDOF.begin());
-
-        idxPtr = finleyFile->globalNodesIndex;
-        nodeGNI.insert(nodeGNI.end(), numNodes, 0);
-        copy(idxPtr, idxPtr+numNodes, nodeGNI.begin());
-
-        idxPtr = finleyFile->globalReducedDOFIndex;
-        nodeGRDFI.insert(nodeGRDFI.end(), numNodes, 0);
-        copy(idxPtr, idxPtr+numNodes, nodeGRDFI.begin());
-
-        idxPtr = finleyFile->globalReducedNodesIndex;
-        nodeGRNI.insert(nodeGRNI.end(), numNodes, 0);
-        copy(idxPtr, idxPtr+numNodes, nodeGRNI.begin());
-
+        nodeID.assign(finleyFile->Id, finleyFile->Id+numNodes);
+        nodeTag.assign(finleyFile->Tag, finleyFile->Tag+numNodes);
+        nodeGDOF.assign(finleyFile->globalDegreesOfFreedom,
+                        finleyFile->globalDegreesOfFreedom+numNodes);
+        nodeGNI.assign(finleyFile->globalNodesIndex,
+                       finleyFile->globalNodesIndex+numNodes);
+        nodeGRDFI.assign(finleyFile->globalReducedDOFIndex,
+                        finleyFile->globalReducedDOFIndex+numNodes);
+        nodeGRNI.assign(finleyFile->globalReducedNodesIndex,
+                        finleyFile->globalReducedNodesIndex+numNodes);
     }
     return true;
 #else // VISIT_PLUGIN,USE_FINLEY
@@ -268,7 +226,7 @@ bool FinleyNodes::initFromFinley(const finley::NodeFile* finleyFile)
 //
 bool FinleyNodes::readFromNc(NcFile* ncFile)
 {
-#if USE_NETCDF
+#if ESYS_HAVE_NETCDF
     NcAtt* att;
     NcVar* var;
  
@@ -334,7 +292,7 @@ bool FinleyNodes::readFromNc(NcFile* ncFile)
     }
 
     return true;
-#else // !USE_NETCDF
+#else // !ESYS_HAVE_NETCDF
     return false;
 #endif
 }
@@ -412,7 +370,7 @@ void FinleyNodes::writeCoordinatesVTK(ostream& os, int ownIndex)
 //
 bool FinleyNodes::writeToSilo(DBfile* dbfile)
 {
-#if USE_SILO
+#if ESYS_HAVE_SILO
     if (numNodes == 0)
         return true;
 
@@ -453,7 +411,7 @@ bool FinleyNodes::writeToSilo(DBfile* dbfile)
     DBSetDir(dbfile, "/");
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/FinleyNodes.h b/weipa/src/FinleyNodes.h
index a111eed..471429a 100644
--- a/weipa/src/FinleyNodes.h
+++ b/weipa/src/FinleyNodes.h
@@ -21,10 +21,13 @@
 
 class DBfile;
 class NcFile;
-struct Dudley_NodeFile;
+
 namespace finley {
     class NodeFile;
 }
+namespace dudley {
+    class NodeFile;
+}
 
 namespace weipa {
 
@@ -52,7 +55,7 @@ public:
     virtual ~FinleyNodes();
 
     /// \brief Initialises with dudley node file.
-    bool initFromDudley(const Dudley_NodeFile* dudleyFile);
+    bool initFromDudley(const dudley::NodeFile* dudleyFile);
 
     /// \brief Initialises with finley node file.
     bool initFromFinley(const finley::NodeFile* finleyFile);
diff --git a/weipa/src/RipleyDomain.cpp b/weipa/src/RipleyDomain.cpp
index 0a839bf..bd427b4 100644
--- a/weipa/src/RipleyDomain.cpp
+++ b/weipa/src/RipleyDomain.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/RipleyDomain.h>
 #include <weipa/RipleyNodes.h>
 #include <weipa/DataVar.h>
@@ -27,7 +24,7 @@
 
 #include <iostream>
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -246,7 +243,7 @@ bool RipleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
                                const StringVec& labels, const StringVec& units,
                                bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     // Write nodes, elements and mesh variables
     if (!initialized
             || !cells->writeToSilo(dbfile, pathInSilo, labels, units, writeMeshData)
@@ -256,7 +253,7 @@ bool RipleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
     siloPath = pathInSilo;
     return true;
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/RipleyElements.cpp b/weipa/src/RipleyElements.cpp
index 2fa1d2b..30ef475 100644
--- a/weipa/src/RipleyElements.cpp
+++ b/weipa/src/RipleyElements.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/RipleyElements.h>
 #include <weipa/NodeData.h>
 
@@ -26,10 +23,13 @@
 
 #include <iostream>
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
+#ifndef VISIT_PLUGIN
+using escript::DataTypes::dim_t;
+#endif
 
 using namespace std;
 
@@ -371,7 +371,7 @@ void RipleyElements::writeConnectivityVTK(ostream& os)
     }
 }
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 inline int toSiloElementType(int type)
 {
     switch (type) {
@@ -388,7 +388,7 @@ bool RipleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
                                  const StringVec& labels,
                                  const StringVec& units, bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (numElements == 0)
         return true;
 
@@ -464,7 +464,7 @@ bool RipleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
 
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/RipleyNodes.cpp b/weipa/src/RipleyNodes.cpp
index a0086ae..bf81813 100644
--- a/weipa/src/RipleyNodes.cpp
+++ b/weipa/src/RipleyNodes.cpp
@@ -14,19 +14,20 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/RipleyNodes.h>
 
 #ifndef VISIT_PLUGIN
 #include <ripley/RipleyDomain.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
+#ifndef VISIT_PLUGIN
+using escript::DataTypes::dim_t;
+#endif
+
 using namespace std;
 
 namespace weipa {
@@ -223,7 +224,7 @@ void RipleyNodes::writeCoordinatesVTK(ostream& os, int ownIndex)
 //
 bool RipleyNodes::writeToSilo(DBfile* dbfile)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (numNodes == 0)
         return true;
 
@@ -248,7 +249,7 @@ bool RipleyNodes::writeToSilo(DBfile* dbfile)
     DBSetDir(dbfile, "/");
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/SConscript b/weipa/src/SConscript
index 0e26ed0..8f63010 100644
--- a/weipa/src/SConscript
+++ b/weipa/src/SConscript
@@ -13,20 +13,13 @@
 #
 ##############################################################################
 
-import os
 Import('*')
 
-local_env = env.Clone()
-py_wrapper_local_env = env.Clone()
-
-# Remove the shared library prefix on all platforms - we don't want 'lib'
-# mucking with our python modules
-del py_wrapper_local_env['SHLIBPREFIX']
+module_name = 'weipa'
 
 sources = """
     DataVar.cpp
     EscriptDataset.cpp
-    VisItControl.cpp
 """.split()
 
 headers = """
@@ -40,120 +33,85 @@ headers = """
     weipa.h
 """.split()
 
+local_env = env.Clone()
+
+# collect dependencies for other modules
+weipalibs = []
+weipalibs += env['escript_libs']
+
 if 'dudley' in env['domains'] or 'finley' in env['domains']:
     sources += ['FinleyDomain.cpp','FinleyElements.cpp','FinleyNodes.cpp']
     headers += ['FinleyDomain.h','FinleyElements.h','FinleyNodes.h']
     if 'dudley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_DUDLEY'])
-        local_env.Prepend(LIBS = ['dudley'])
+        weipalibs += env['dudley_libs']
     if 'finley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_FINLEY'])
-        local_env.Prepend(LIBS = ['finley'])
+        weipalibs += env['finley_libs']
+
+if local_env['silo']:
+    weipalibs += env['silo_libs']
+
+pluginlibs = [] + weipalibs
+pluginsources = [] + sources
+# clone here to use same CPPDEFINES
+visitplugin_env = local_env.Clone()
 
 if 'ripley' in env['domains']:
     sources += ['RipleyDomain.cpp','RipleyElements.cpp','RipleyNodes.cpp']
     headers += ['RipleyDomain.h','RipleyElements.h','RipleyNodes.h']
     local_env.Append(CPPDEFINES = ['USE_RIPLEY'])
-    local_env.Prepend(LIBS = ['ripley'])
+    weipalibs += env['ripley_libs']
 
 if 'speckley' in env['domains']:
     sources += ['SpeckleyDomain.cpp','SpeckleyElements.cpp','SpeckleyNodes.cpp']
     headers += ['SpeckleyDomain.h','SpeckleyElements.h','SpeckleyNodes.h']
     local_env.Append(CPPDEFINES = ['USE_SPECKLEY'])
-    local_env.Prepend(LIBS = ['speckley'])
+    weipalibs += env['speckley_libs']
+
+sources.append(['VisItControl.cpp'])
 
 if local_env['visit']:
     sources.append(['VisItData.cpp'])
     headers.append(['VisItData.h'])
     local_env.Append(CPPDEFINES = ['USE_VISIT'])
-    local_env.AppendUnique(LIBS = ['simV2'])
+    weipalibs += ['simV2']
 
-if not env['build_shared']:
-    local_env.Prepend(LIBS = ['escript'])
-else:
-    local_env.Prepend(LIBS = ['escript', 'esysUtils'])
-
-if local_env['silo']:
-    local_env.Append(CPPDEFINES = ['USE_SILO'])
-    local_env.AppendUnique(LIBS = env['silo_libs'])
+local_env.PrependUnique(LIBS = weipalibs)
+env['weipa_libs'] = [module_name] + weipalibs
 
 if IS_WINDOWS:
     local_env.Append(CPPDEFINES = ['WEIPA_EXPORTS'])
 
-module_name = 'weipa'
-
-lib = local_env.SharedLibrary(module_name, sources)
-env.Alias('build_weipa_lib', lib)
-
-include_path = Dir('weipa', local_env['incinstall'])
-
+include_path = Dir(module_name, local_env['incinstall'])
 hdr_inst = local_env.Install(include_path, headers)
-env.Alias('install_weipa_headers', hdr_inst)
 
+lib = local_env.SharedLibrary(module_name, sources)
 lib_inst = local_env.Install(local_env['libinstall'], lib)
-env.Alias('install_weipa_lib', lib_inst)
 
 ### Python wrapper ###
-py_wrapper_local_env.Prepend(LIBS = ['weipa'])
-if 'dudley' in env['domains']:
-    py_wrapper_local_env.Prepend(LIBS = ['dudley'])
-if 'finley' in env['domains']:
-    py_wrapper_local_env.Prepend(LIBS = ['finley'])
-if 'ripley' in env['domains']:
-    py_wrapper_local_env.Prepend(LIBS = ['ripley'])
-if 'speckley' in env['domains']:
-    py_wrapper_local_env.Prepend(LIBS = ['speckley'])
-
-if not env['build_shared']:
-    py_wrapper_local_env.Prepend(LIBS = ['escript'])
-else:
-    py_wrapper_local_env.Prepend(LIBS = ['escript', 'esysUtils'])
+py_env = local_env.Clone()
+py_env.PrependUnique(LIBS = [module_name])
+py_lib_name = module_name + 'cpp'
+py_lib = py_env.PythonModule(py_lib_name, 'weipacpp.cpp')
 
-py_wrapper_name = module_name + 'cpp'
-py_wrapper_lib = py_wrapper_local_env.SharedLibrary(py_wrapper_name, 'weipacpp.cpp')
-env.Alias('build_weipacpp_lib', py_wrapper_lib)
-
-tmp_inst = os.path.join(local_env['pyinstall'], module_name)
-if IS_WINDOWS:
-    wrapper_ext = '.pyd'
-else:
-    wrapper_ext = '.so'
+mod_path = Dir(module_name, local_env['pyinstall'])
+mod_inst = py_env.Install(mod_path, py_lib)
 
-share_name = os.path.join(tmp_inst, py_wrapper_name+wrapper_ext)
-mod_inst = py_wrapper_local_env.InstallAs(target=share_name,
-                                          source=py_wrapper_lib[0])
-env.Alias('install_weipacpp_lib', mod_inst)
+build = env.Alias('build_weipa', [hdr_inst, lib, py_lib])
+env.Alias('install_weipa', [build, lib_inst, mod_inst])
 
 ######################
 ### Plugin library ###
 ######################
-visitplugin_env = env.Clone()
-plugin_sources = """
-    DataVar.cpp
-    EscriptDataset.cpp
-    FinleyDomain.cpp
-    FinleyElements.cpp
-    FinleyNodes.cpp
-""".split()
-
-visitplugin_env.Prepend(LIBS = ['esysUtils'])
-visitplugin_env.Append(CPPDEFINES = ['VISIT_PLUGIN', 'USE_FINLEY'])
-visitplugin_env['OBJPREFIX']='reader_'
-
-if env['build_shared']:
-    plugin_lib = visitplugin_env.SharedLibrary('escriptreader', plugin_sources)
-else:
-    if IS_WINDOWS:
-        visitplugin_env.Append(CPPDEFINES = ['WEIPA_STATIC_LIB'])
-    plugin_lib = visitplugin_env.StaticLibrary('escriptreader', plugin_sources)
-env.Alias('build_escriptreader_lib', plugin_lib)
-
-tmp = local_env.Install(local_env['libinstall'], plugin_lib)
-env.Alias('install_escriptreader_lib', tmp)
+if 'dudley' in env['domains'] or 'finley' in env['domains']:
+    visitplugin_env.PrependUnique(LIBS = pluginlibs)
+    visitplugin_env.Append(CPPDEFINES = ['VISIT_PLUGIN'])
+    visitplugin_env['OBJPREFIX']='reader_'
 
-# configure python module
-local_env.SConscript(dirs = ['#/weipa/py_src'], variant_dir='py', duplicate=0, exports=['py_wrapper_lib'])
+    plugin_lib = visitplugin_env.SharedLibrary('escriptreader', pluginsources)
+    plugin_inst = local_env.Install(local_env['libinstall'], plugin_lib)
 
-# configure unit tests
-local_env.SConscript(dirs = ['#/weipa/test'], variant_dir='test', duplicate=0)
+    build = env.Alias('build_escriptreader', plugin_lib)
+    env.Alias('install_escriptreader', [build, plugin_inst])
 
diff --git a/weipa/src/SpeckleyDomain.cpp b/weipa/src/SpeckleyDomain.cpp
index 565dea6..bc2c7fc 100644
--- a/weipa/src/SpeckleyDomain.cpp
+++ b/weipa/src/SpeckleyDomain.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/SpeckleyDomain.h>
 #include <weipa/SpeckleyNodes.h>
 #include <weipa/DataVar.h>
@@ -27,7 +24,7 @@
 
 #include <iostream>
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
@@ -225,7 +222,7 @@ bool SpeckleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
                                const StringVec& labels, const StringVec& units,
                                bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     // Write nodes, elements and mesh variables
     if (!initialized
             || !cells->writeToSilo(dbfile, pathInSilo, labels, units, writeMeshData)
@@ -235,8 +232,8 @@ bool SpeckleyDomain::writeToSilo(DBfile* dbfile, const string& pathInSilo,
     siloPath = pathInSilo;
     return true;
 
-#else // !USE_SILO
-    std::cerr << "skipping writeToSilo, USE_SILO undefined\n";
+#else // !ESYS_HAVE_SILO
+    std::cerr << "skipping writeToSilo, not built with Silo support\n";
     return false;
 #endif
 }
diff --git a/weipa/src/SpeckleyElements.cpp b/weipa/src/SpeckleyElements.cpp
index 600c005..201a772 100644
--- a/weipa/src/SpeckleyElements.cpp
+++ b/weipa/src/SpeckleyElements.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/SpeckleyElements.h>
 #include <weipa/NodeData.h>
 
@@ -26,10 +23,13 @@
 
 #include <iostream>
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
+#ifndef VISIT_PLUGIN
+using escript::DataTypes::dim_t;
+#endif
 
 using namespace std;
 
@@ -300,7 +300,7 @@ void SpeckleyElements::writeConnectivityVTK(ostream& os)
     }
 }
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 inline int toSiloElementType(int type)
 {
     switch (type) {
@@ -317,7 +317,7 @@ bool SpeckleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
                                  const StringVec& labels,
                                  const StringVec& units, bool writeMeshData)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (numElements == 0)
         return true;
 
@@ -393,7 +393,7 @@ bool SpeckleyElements::writeToSilo(DBfile* dbfile, const string& siloPath,
 
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/SpeckleyNodes.cpp b/weipa/src/SpeckleyNodes.cpp
index 28f154a..e3bc644 100644
--- a/weipa/src/SpeckleyNodes.cpp
+++ b/weipa/src/SpeckleyNodes.cpp
@@ -14,19 +14,21 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/SpeckleyNodes.h>
 
 #ifndef VISIT_PLUGIN
 #include <speckley/SpeckleyDomain.h>
 #endif
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
 #include <silo.h>
 #endif
 
+#ifndef VISIT_PLUGIN
+using escript::DataTypes::dim_t;
+using escript::DataTypes::index_t;
+#endif
+
 using namespace std;
 
 namespace weipa {
@@ -223,7 +225,7 @@ void SpeckleyNodes::writeCoordinatesVTK(ostream& os, int ownIndex)
 //
 bool SpeckleyNodes::writeToSilo(DBfile* dbfile)
 {
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     if (numNodes == 0)
         return true;
 
@@ -248,7 +250,7 @@ bool SpeckleyNodes::writeToSilo(DBfile* dbfile)
     DBSetDir(dbfile, "/");
     return (ret == 0);
 
-#else // !USE_SILO
+#else // !ESYS_HAVE_SILO
     return false;
 #endif
 }
diff --git a/weipa/src/VisItControl.cpp b/weipa/src/VisItControl.cpp
index 93a2cd7..a87021e 100644
--- a/weipa/src/VisItControl.cpp
+++ b/weipa/src/VisItControl.cpp
@@ -14,9 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include <weipa/VisItControl.h>
 #include <weipa/EscriptDataset.h>
 
@@ -50,7 +47,7 @@ bool connected = false;
 // Helper function for processVisItCommand()
 static void broadcastSlaveCommand(int* command)
 {
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
     MPI_Bcast(command, 1, MPI_INT, 0, MPI_COMM_WORLD);
 #endif
 }
@@ -97,7 +94,7 @@ static void slaveProcessCallback()
     broadcastSlaveCommand(&command);
 }
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
 static int broadcastIntCallback(int* value, int sender)
 {
     return MPI_Bcast(value, 1, MPI_INT, sender, MPI_COMM_WORLD);
@@ -193,7 +190,7 @@ bool initialize(const std::string& simFile, const std::string& comment)
             return false;
         }
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
         MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
 
@@ -247,7 +244,7 @@ bool publishData(EscriptDataset_ptr dataset)
             visitState = VisItDetectInput(blocking, -1);
         }
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
         MPI_Bcast(&visitState, 1, MPI_INT, 0, dataset->getMPIComm());
 #endif
 
diff --git a/weipa/src/VisItData.cpp b/weipa/src/VisItData.cpp
index bcf10ca..780f69d 100644
--- a/weipa/src/VisItData.cpp
+++ b/weipa/src/VisItData.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <weipa/VisItData.h>
 #include <weipa/DataVar.h>
 #include <weipa/ElementData.h>
@@ -110,7 +106,7 @@ visit_handle VisItData::getSimMetaData()
 
     // add all meshes
     int mpiSize=1;
-#ifdef HAVE_MPI
+#ifdef WEIPA_HAVE_MPI
     MPI_Comm comm = dataset->getMPIComm();
     MPI_Comm_size(comm, &mpiSize);
 #endif
@@ -132,7 +128,7 @@ visit_handle VisItData::getDomainList()
     visit_handle domainList = VISIT_INVALID_HANDLE;
     if (VisIt_DomainList_alloc(&domainList) == VISIT_OKAY) {
         int mpiRank=0, mpiSize=1;
-#ifdef HAVE_MPI
+#ifdef WEIPA_HAVE_MPI
         MPI_Comm comm = dataset->getMPIComm();
         MPI_Comm_rank(comm, &mpiRank);
         MPI_Comm_size(comm, &mpiSize);
diff --git a/weipa/src/weipa.h b/weipa/src/weipa.h
index 2c86552..5bee4c9 100644
--- a/weipa/src/weipa.h
+++ b/weipa/src/weipa.h
@@ -17,16 +17,20 @@
 #ifndef __WEIPA_H__
 #define __WEIPA_H__
 
+#ifndef VISIT_PLUGIN
+#include <escript/DataTypes.h>
+#endif
+
 #include <string>
 #include <vector>
 #include <map>
 #include <boost/shared_ptr.hpp>
 
 #ifdef ESYS_MPI
-#define HAVE_MPI 1
+#define WEIPA_HAVE_MPI 1
 #endif
 
-#if HAVE_MPI
+#if WEIPA_HAVE_MPI
 #include <mpi.h>
 #endif
 
diff --git a/weipa/src/weipacpp.cpp b/weipa/src/weipacpp.cpp
index 852c6ca..80ff3a3 100644
--- a/weipa/src/weipacpp.cpp
+++ b/weipa/src/weipacpp.cpp
@@ -14,10 +14,6 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
 #include <escript/Data.h>
 
 #include <weipa/EscriptDataset.h>
diff --git a/weipa/test/EscriptDatasetTestCase.cpp b/weipa/test/EscriptDatasetTestCase.cpp
index b60d4b0..ea72276 100644
--- a/weipa/test/EscriptDatasetTestCase.cpp
+++ b/weipa/test/EscriptDatasetTestCase.cpp
@@ -14,21 +14,18 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
 #include "EscriptDatasetTestCase.h"
+
+#include <weipa/EscriptDataset.h>
+
 #include <escript/DataFactory.h>
 #include <escript/FunctionSpaceFactory.h>
-#include <weipa/EscriptDataset.h>
-#include <esysUtils/Esys_MPI.h>
-#include <cppunit/TestCaller.h>
 
 #if USE_DUDLEY
-#include <dudley/CppAdapter/MeshAdapterFactory.h>
+#include <dudley/DomainFactory.h>
 #endif
 #if USE_FINLEY
-#include <finley/CppAdapter/MeshAdapterFactory.h>
+#include <finley/DomainFactory.h>
 #endif
 #if USE_RIPLEY
 #include <ripley/Brick.h>
@@ -37,6 +34,8 @@
 #include <speckley/Brick.h>
 #endif
 
+#include <cppunit/TestCaller.h>
+
 using namespace CppUnit;
 using namespace escript;
 using namespace weipa;
@@ -91,7 +90,7 @@ void EscriptDatasetTestCase::testBase()
 #if USE_DUDLEY
 void EscriptDatasetTestCase::testDudley()
 {
-    esysUtils::JMPI info=esysUtils::makeInfo(MPI_COMM_WORLD);
+    JMPI info=makeInfo(MPI_COMM_WORLD);
     Domain_ptr dom(dudley::brick(info));
     cout << "Running Dudley tests..." << endl;
     runDomainTests(dom);
@@ -101,7 +100,7 @@ void EscriptDatasetTestCase::testDudley()
 #if USE_FINLEY
 void EscriptDatasetTestCase::testFinley()
 {
-    esysUtils::JMPI info=esysUtils::makeInfo(MPI_COMM_WORLD);
+    JMPI info=makeInfo(MPI_COMM_WORLD);
     Domain_ptr dom(finley::brick(info));
     cout << "Running Finley tests..." << endl;
     runDomainTests(dom);
@@ -131,7 +130,7 @@ void EscriptDatasetTestCase::testSpeckley()
 void EscriptDatasetTestCase::runDomainTests(Domain_ptr dom)
 {
     EscriptDataset_ptr dataset(new EscriptDataset());
-    escript::Data data = Scalar(0.0, continuousFunction(*dom), true);
+    Data data = Scalar(0.0, continuousFunction(*dom), true);
 
     cout << "\tTest addData with NULL domain." << endl;
     CPPUNIT_ASSERT(dataset->addData(data, "foo", "bar") == false);
@@ -171,7 +170,7 @@ void EscriptDatasetTestCase::runDomainTests(Domain_ptr dom)
     dataset->setMeshUnits("km", "cm", "mm");
     dataset->setSaveMeshData(true);
 
-#if USE_SILO
+#ifdef ESYS_HAVE_SILO
     cout << "\tTest saveSilo." << endl;
     CPPUNIT_ASSERT(dataset->saveSilo("domaintest.silo") == true);
     ifstream f("domaintest.silo");
diff --git a/weipa/test/SConscript b/weipa/test/SConscript
index 25573c8..b91ccf1 100644
--- a/weipa/test/SConscript
+++ b/weipa/test/SConscript
@@ -20,26 +20,19 @@ local_env = env.Clone()
 
 if local_env['cppunit']:
     # get the test source file names
-    sources = Glob('*.cpp')+Glob('*.c')
+    sources = Glob('*.cpp')
     testname='weipa_UnitTest'
 
     # build the executable
-    local_env.Append(LIBS=['weipa', 'escript', 'esysUtils']+env['cppunit_libs'])
+    local_env.AppendUnique(LIBS=env['weipa_libs']+env['cppunit_libs'])
     if 'dudley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_DUDLEY'])
-        local_env.Prepend(LIBS = ['dudley'])
     if 'finley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_FINLEY'])
-        local_env.Prepend(LIBS = ['finley'])
     if 'ripley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_RIPLEY'])
-        local_env.Prepend(LIBS = ['ripley'])
     if 'speckley' in env['domains']:
         local_env.Append(CPPDEFINES = ['USE_SPECKLEY'])
-        local_env.Prepend(LIBS = ['speckley'])
-    if local_env['silo']:
-        local_env.Append(CPPDEFINES = ['USE_SILO'])
-        local_env.AppendUnique(LIBS = [env['silo_libs']])
     program = local_env.Program(testname, sources)
 
     # run the tests - but only if test_targets are stale
@@ -50,10 +43,10 @@ if local_env['cppunit']:
     Alias("run_tests", testname+'.passed')
 
     # add a group of tests
-    from grouptest import *
-    tgroup=GroupTest("$BINRUNNER ", (), "", "$BUILD_DIR/weipa/test", ('./'+testname,))
+    from grouptest import GroupTest
+    tgroup=GroupTest("weipacpp", "$BINRUNNER ", (), "", "$BUILD_DIR/weipa/test", ('./'+testname,))
     TestGroups.append(tgroup)
 
 # configure python unit tests
-local_env.SConscript(dirs = ['#/weipa/test/python'], variant_dir='python', duplicate=0)
+local_env.SConscript('python/SConscript', duplicate=0)
 
diff --git a/weipa/test/python/SConscript b/weipa/test/python/SConscript
index 11453f3..bb457d8 100644
--- a/weipa/test/python/SConscript
+++ b/weipa/test/python/SConscript
@@ -50,8 +50,8 @@ if env['usempi']:
     Depends(program, env['prefix']+"/lib/pythonMPI")
 
 # add a group of tests
-from grouptest import *
-tgroup=GroupTest("$PYTHONRUNNER ",(("WEIPA_TEST_DATA","$BATCH_ROOT/weipa/test/python"),('WEIPA_WORKDIR','$BUILD_DIR/weipa/test/python')),"$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/weipa/test/python","$BATCH_ROOT/weipa/test/python",testruns)
+from grouptest import GroupTest
+tgroup=GroupTest("weipa", "$PYTHONRUNNER ", (("WEIPA_TEST_DATA","$BATCH_ROOT/weipa/test/python"),('WEIPA_WORKDIR','$BUILD_DIR/weipa/test/python')), "$BATCH_ROOT/escriptcore/test/python:$BATCH_ROOT/weipa/test/python", "$BATCH_ROOT/weipa/test/python", testruns)
 tgroup.makeDir("$BUILD_DIR/weipa/test/python")
 TestGroups.append(tgroup)
 
diff --git a/weipa/test/python/meshes/hex_2D_macro.msh b/weipa/test/python/meshes/hex_2D_macro.msh
index 0d1f53e..af05fc0 100644
--- a/weipa/test/python/meshes/hex_2D_macro.msh
+++ b/weipa/test/python/meshes/hex_2D_macro.msh
@@ -23,3 +23,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/weipa/test/python/meshes/hex_2D_order2.msh b/weipa/test/python/meshes/hex_2D_order2.msh
index f90e615..03d91f7 100644
--- a/weipa/test/python/meshes/hex_2D_order2.msh
+++ b/weipa/test/python/meshes/hex_2D_order2.msh
@@ -22,3 +22,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/weipa/test/python/meshes/hex_2D_order2p.msh b/weipa/test/python/meshes/hex_2D_order2p.msh
index 3ec746d..894dc6f 100644
--- a/weipa/test/python/meshes/hex_2D_order2p.msh
+++ b/weipa/test/python/meshes/hex_2D_order2p.msh
@@ -23,3 +23,4 @@ top 20
 bottom 10
 left 1
 right 2
+
diff --git a/weipa/test/python/meshes/hex_3D_macro.msh b/weipa/test/python/meshes/hex_3D_macro.msh
index 2015e8f..e6b2120 100644
--- a/weipa/test/python/meshes/hex_3D_macro.msh
+++ b/weipa/test/python/meshes/hex_3D_macro.msh
@@ -45,3 +45,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/weipa/test/python/meshes/hex_3D_order2p.msh b/weipa/test/python/meshes/hex_3D_order2p.msh
index 468bb98..15a5c9c 100644
--- a/weipa/test/python/meshes/hex_3D_order2p.msh
+++ b/weipa/test/python/meshes/hex_3D_order2p.msh
@@ -45,3 +45,4 @@ left 1
 right 2
 front 10
 back 20
+
diff --git a/weipa/test/python/meshes/hex_contact_2D_order1.msh b/weipa/test/python/meshes/hex_contact_2D_order1.msh
index 0521486..846bf99 100644
--- a/weipa/test/python/meshes/hex_contact_2D_order1.msh
+++ b/weipa/test/python/meshes/hex_contact_2D_order1.msh
@@ -21,3 +21,4 @@ Line2 6
 Line2_Contact 1
 4 10 3 2 5 4
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_2D_order1_onFace.msh b/weipa/test/python/meshes/hex_contact_2D_order1_onFace.msh
index dddb25a..e959c05 100644
--- a/weipa/test/python/meshes/hex_contact_2D_order1_onFace.msh
+++ b/weipa/test/python/meshes/hex_contact_2D_order1_onFace.msh
@@ -21,3 +21,4 @@ Rec4Face 6
 Rec4Face_Contact 1
 4 10 3 2 0 1 5 4 6 7
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_2D_order2.msh b/weipa/test/python/meshes/hex_contact_2D_order2.msh
index 827035b..aabb44f 100644
--- a/weipa/test/python/meshes/hex_contact_2D_order2.msh
+++ b/weipa/test/python/meshes/hex_contact_2D_order2.msh
@@ -29,3 +29,4 @@ Line3 6
 Line3_Contact 1
 4 10 8 6 7 11 9 10
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_2D_order2_onFace.msh b/weipa/test/python/meshes/hex_contact_2D_order2_onFace.msh
index 6f39b9d..517c3e0 100644
--- a/weipa/test/python/meshes/hex_contact_2D_order2_onFace.msh
+++ b/weipa/test/python/meshes/hex_contact_2D_order2_onFace.msh
@@ -29,3 +29,4 @@ Rec8Face 6
 Rec8Face_Contact 1
 4 10 8 6 0 2 7 3 1 5 11 9 15 17 10 12 16 14
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_3D_order1.msh b/weipa/test/python/meshes/hex_contact_3D_order1.msh
index 505f352..a437b56 100644
--- a/weipa/test/python/meshes/hex_contact_3D_order1.msh
+++ b/weipa/test/python/meshes/hex_contact_3D_order1.msh
@@ -33,3 +33,4 @@ Rec4 10
 Rec4_Contact 1
 2 100 4 5 7 6 8 9 11 10
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_3D_order1_onFace.msh b/weipa/test/python/meshes/hex_contact_3D_order1_onFace.msh
index d6947c8..fbb3661 100644
--- a/weipa/test/python/meshes/hex_contact_3D_order1_onFace.msh
+++ b/weipa/test/python/meshes/hex_contact_3D_order1_onFace.msh
@@ -33,3 +33,4 @@ Hex8Face 10
 Hex8Face_Contact 1
 2 100 4 5 7 6 0 1 3 2 8 9 11 10 12 13 15 14
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_3D_order2.msh b/weipa/test/python/meshes/hex_contact_3D_order2.msh
index a6b371b..f4666a7 100644
--- a/weipa/test/python/meshes/hex_contact_3D_order2.msh
+++ b/weipa/test/python/meshes/hex_contact_3D_order2.msh
@@ -57,3 +57,4 @@ Rec8 10
 Rec8_Contact 1
 2 100 18 20 26 24 19 23 25 21 27 29 35 33 28 32 34 30
 Point1 0
+
diff --git a/weipa/test/python/meshes/hex_contact_3D_order2_onFace.msh b/weipa/test/python/meshes/hex_contact_3D_order2_onFace.msh
index f259980..5e2c2c2 100644
--- a/weipa/test/python/meshes/hex_contact_3D_order2_onFace.msh
+++ b/weipa/test/python/meshes/hex_contact_3D_order2_onFace.msh
@@ -57,3 +57,4 @@ Hex20Face 10
 Hex20Face_Contact 1
 2 100 18 20 26 24 0 2 8 6 19 23 25 21 9 11 17 15 1 5 7 3 27 29 35 33 45 47 53 51 28 32 34 30 36 38 44 42 46 50 52 48
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_2D_dudley.fly b/weipa/test/python/meshes/tet_2D_dudley.fly
index d519039..ab4e279 100644
--- a/weipa/test/python/meshes/tet_2D_dudley.fly
+++ b/weipa/test/python/meshes/tet_2D_dudley.fly
@@ -52,3 +52,4 @@ Line2 12
 30 20 11 10
 31 20 0 11
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_2D_macro.fly b/weipa/test/python/meshes/tet_2D_macro.fly
index 4c48736..38c4756 100644
--- a/weipa/test/python/meshes/tet_2D_macro.fly
+++ b/weipa/test/python/meshes/tet_2D_macro.fly
@@ -89,3 +89,4 @@ Line3Macro 12
 31 20 0 21 23 
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_2D_order1.fly b/weipa/test/python/meshes/tet_2D_order1.fly
index 33e66d5..25c901d 100644
--- a/weipa/test/python/meshes/tet_2D_order1.fly
+++ b/weipa/test/python/meshes/tet_2D_order1.fly
@@ -53,3 +53,4 @@ Line2 12
 31 20 0 11
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_2D_order2.fly b/weipa/test/python/meshes/tet_2D_order2.fly
index 0e71694..0568007 100644
--- a/weipa/test/python/meshes/tet_2D_order2.fly
+++ b/weipa/test/python/meshes/tet_2D_order2.fly
@@ -89,3 +89,4 @@ Line3 12
 31 20 0 21 23 
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_3D_dudley.fly b/weipa/test/python/meshes/tet_3D_dudley.fly
index 54c2b53..3c83a40 100644
--- a/weipa/test/python/meshes/tet_3D_dudley.fly
+++ b/weipa/test/python/meshes/tet_3D_dudley.fly
@@ -82,4 +82,5 @@ Tri3 36
 44 1 17 16 18 
 58 100 19 18 16
 53 10 17 18 19 
-Point1 0
\ No newline at end of file
+Point1 0
+
diff --git a/weipa/test/python/meshes/tet_3D_macro.fly b/weipa/test/python/meshes/tet_3D_macro.fly
index 523ebe6..78aa10a 100644
--- a/weipa/test/python/meshes/tet_3D_macro.fly
+++ b/weipa/test/python/meshes/tet_3D_macro.fly
@@ -145,3 +145,4 @@ Tri6Macro 36
 53 10 61 53 56 63 57 68
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_3D_order1.fly b/weipa/test/python/meshes/tet_3D_order1.fly
index 17a4983..fd1bd14 100644
--- a/weipa/test/python/meshes/tet_3D_order1.fly
+++ b/weipa/test/python/meshes/tet_3D_order1.fly
@@ -84,3 +84,4 @@ Tri3 36
 53 10 17 18 19 
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/python/meshes/tet_3D_order2.fly b/weipa/test/python/meshes/tet_3D_order2.fly
index a062a92..c883404 100644
--- a/weipa/test/python/meshes/tet_3D_order2.fly
+++ b/weipa/test/python/meshes/tet_3D_order2.fly
@@ -145,3 +145,4 @@ Tri6 36
 53 10 61 53 56 63 57 68
 Point1_Contact 0
 Point1 0
+
diff --git a/weipa/test/weipa_UnitTest.cpp b/weipa/test/weipa_UnitTest.cpp
index 7d7520a..2f5ca9b 100644
--- a/weipa/test/weipa_UnitTest.cpp
+++ b/weipa/test/weipa_UnitTest.cpp
@@ -14,21 +14,18 @@
 *
 *****************************************************************************/
 
-#define ESNEEDPYTHON
-#include "esysUtils/first.h"
-
-
-#include <iostream>
+#include <escript/EsysMPI.h>
 
 #include "EscriptDatasetTestCase.h"
+
 #include <cppunit/CompilerOutputter.h>
 #include <cppunit/TestResult.h>
 #include <cppunit/TestResultCollector.h>
 #include <cppunit/TestRunner.h>
 
-using namespace CppUnit;
+#include <iostream>
 
-#include "esysUtils/Esys_MPI.h"
+using namespace CppUnit;
 
 int main(int argc, char* argv[])
 {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-escript.git



More information about the debian-science-commits mailing list